diff options
Diffstat (limited to 'net')
321 files changed, 10467 insertions, 6091 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3c1c8c14e929..a2ad15250575 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -155,9 +155,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) BUG_ON(!grp); /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing. + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). */ - if (real_dev->features & NETIF_F_HW_VLAN_FILTER) + if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER)) ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); grp->nr_vlans--; @@ -419,6 +420,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (is_vlan_dev(dev)) __vlan_device_event(dev, event); + if ((event == NETDEV_UP) && + (dev->features & NETIF_F_HW_VLAN_FILTER) && + dev->netdev_ops->ndo_vlan_rx_add_vid) { + pr_info("8021q: adding VLAN 0 to HW filter on device %s\n", + dev->name); + dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); + } + grp = __vlan_find_group(dev); if (!grp) goto out; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 6abdcac1b2e8..8d9503ad01da 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -2,6 +2,7 @@ #define __BEN_VLAN_802_1Q_INC__ #include <linux/if_vlan.h> +#include <linux/u64_stats_sync.h> /** @@ -21,14 +22,16 @@ struct vlan_priority_tci_mapping { * struct vlan_rx_stats - VLAN percpu rx stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes - * @multicast: number of received multicast packets + * @rx_multicast: number of received multicast packets + * @syncp: synchronization point for 64bit counters * @rx_errors: number of errors */ struct vlan_rx_stats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long multicast; - unsigned long rx_errors; + u64 rx_packets; + u64 rx_bytes; + u64 rx_multicast; + struct u64_stats_sync syncp; + unsigned long rx_errors; }; /** diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 50f58f5f1c34..01ddb0472f86 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -8,6 +8,9 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { + struct net_device *vlan_dev; + u16 vlan_id; + if (netpoll_rx(skb)) return NET_RX_DROP; @@ -16,9 +19,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + vlan_id = vlan_tci & VLAN_VID_MASK; + vlan_dev = vlan_group_get_device(grp, vlan_id); - if (!skb->dev) + if (vlan_dev) + skb->dev = vlan_dev; + else if (vlan_id) goto drop; return (polling ? netif_receive_skb(skb) : netif_rx(skb)); @@ -41,9 +47,9 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, - smp_processor_id()); + rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats); + u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; @@ -51,7 +57,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) case PACKET_BROADCAST: break; case PACKET_MULTICAST: - rx_stats->multicast++; + rx_stats->rx_multicast++; break; case PACKET_OTHERHOST: /* Our lower layer thinks this is not local, let's make sure. @@ -62,6 +68,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) skb->pkt_type = PACKET_HOST; break; } + u64_stats_update_end(&rx_stats->syncp); return 0; } @@ -82,15 +89,20 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb) { struct sk_buff *p; + struct net_device *vlan_dev; + u16 vlan_id; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + vlan_id = vlan_tci & VLAN_VID_MASK; + vlan_dev = vlan_group_get_device(grp, vlan_id); - if (!skb->dev) + if (vlan_dev) + skb->dev = vlan_dev; + else if (vlan_id) goto drop; for (p = napi->gro_list; p; p = p->next) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 529842677817..3d59c9bf8feb 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -142,6 +142,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, { struct vlan_hdr *vhdr; struct vlan_rx_stats *rx_stats; + struct net_device *vlan_dev; u16 vlan_id; u16 vlan_tci; @@ -157,53 +158,71 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, vlan_id = vlan_tci & VLAN_VID_MASK; rcu_read_lock(); - skb->dev = __find_vlan_dev(dev, vlan_id); - if (!skb->dev) { - pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", - __func__, vlan_id, dev->name); - goto err_unlock; - } - - rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, - smp_processor_id()); - rx_stats->rx_packets++; - rx_stats->rx_bytes += skb->len; - - skb_pull_rcsum(skb, VLAN_HLEN); - - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); - - pr_debug("%s: priority: %u for TCI: %hu\n", - __func__, skb->priority, vlan_tci); - - switch (skb->pkt_type) { - case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ - /* stats->broadcast ++; // no such counter :-( */ - break; + vlan_dev = __find_vlan_dev(dev, vlan_id); - case PACKET_MULTICAST: - rx_stats->multicast++; - break; + /* If the VLAN device is defined, we use it. + * If not, and the VID is 0, it is a 802.1p packet (not + * really a VLAN), so we will just netif_rx it later to the + * original interface, but with the skb->proto set to the + * wrapped proto: we do nothing here. + */ - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. - */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - default: - break; + if (!vlan_dev) { + if (vlan_id) { + pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", + __func__, vlan_id, dev->name); + goto err_unlock; + } + rx_stats = NULL; + } else { + skb->dev = vlan_dev; + + rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, + smp_processor_id()); + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->rx_packets++; + rx_stats->rx_bytes += skb->len; + + skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); + + pr_debug("%s: priority: %u for TCI: %hu\n", + __func__, skb->priority, vlan_tci); + + switch (skb->pkt_type) { + case PACKET_BROADCAST: + /* Yeah, stats collect these together.. */ + /* stats->broadcast ++; // no such counter :-( */ + break; + + case PACKET_MULTICAST: + rx_stats->rx_multicast++; + break; + + case PACKET_OTHERHOST: + /* Our lower layer thinks this is not local, let's make + * sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. + */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + skb->dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + break; + default: + break; + } + u64_stats_update_end(&rx_stats->syncp); } + skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); - if (!skb) { - rx_stats->rx_errors++; - goto err_unlock; + if (vlan_dev) { + skb = vlan_check_reorder_header(skb); + if (!skb) { + rx_stats->rx_errors++; + goto err_unlock; + } } netif_rx(skb); @@ -801,37 +820,65 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev) return dev_ethtool_get_flags(vlan->real_dev); } -static struct net_device_stats *vlan_dev_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - struct net_device_stats *stats = &dev->stats; - dev_txq_stats_fold(dev, stats); if (vlan_dev_info(dev)->vlan_rx_stats) { - struct vlan_rx_stats *p, rx = {0}; + struct vlan_rx_stats *p, accum = {0}; int i; for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, rxmulticast; + unsigned int start; + p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); - rx.rx_packets += p->rx_packets; - rx.rx_bytes += p->rx_bytes; - rx.rx_errors += p->rx_errors; - rx.multicast += p->multicast; + do { + start = u64_stats_fetch_begin_bh(&p->syncp); + rxpackets = p->rx_packets; + rxbytes = p->rx_bytes; + rxmulticast = p->rx_multicast; + } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + accum.rx_packets += rxpackets; + accum.rx_bytes += rxbytes; + accum.rx_multicast += rxmulticast; + /* rx_errors is an ulong, not protected by syncp */ + accum.rx_errors += p->rx_errors; } - stats->rx_packets = rx.rx_packets; - stats->rx_bytes = rx.rx_bytes; - stats->rx_errors = rx.rx_errors; - stats->multicast = rx.multicast; + stats->rx_packets = accum.rx_packets; + stats->rx_bytes = accum.rx_bytes; + stats->rx_errors = accum.rx_errors; + stats->multicast = accum.rx_multicast; } return stats; } +static int vlan_ethtool_set_tso(struct net_device *dev, u32 data) +{ + if (data) { + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + + /* Underlying device must support TSO for VLAN-tagged packets + * and must have TSO enabled now. + */ + if (!(real_dev->vlan_features & NETIF_F_TSO)) + return -EOPNOTSUPP; + if (!(real_dev->features & NETIF_F_TSO)) + return -EINVAL; + dev->features |= NETIF_F_TSO; + } else { + dev->features &= ~NETIF_F_TSO; + } + return 0; +} + static const struct ethtool_ops vlan_ethtool_ops = { .get_settings = vlan_ethtool_get_settings, .get_drvinfo = vlan_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, .get_rx_csum = vlan_ethtool_get_rx_csum, .get_flags = vlan_ethtool_get_flags, + .set_tso = vlan_ethtool_set_tso, }; static const struct net_device_ops vlan_netdev_ops = { @@ -848,7 +895,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, @@ -872,7 +919,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, @@ -897,7 +944,7 @@ static const struct net_device_ops vlan_netdev_ops_sq = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, @@ -922,7 +969,7 @@ static const struct net_device_ops vlan_netdev_accel_ops_sq = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index afead353e215..80e280f56686 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -278,25 +278,27 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) { struct net_device *vlandev = (struct net_device *) seq->private; const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); - const struct net_device_stats *stats; + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats; static const char fmt[] = "%30s %12lu\n"; + static const char fmt64[] = "%30s %12llu\n"; int i; if (!is_vlan_dev(vlandev)) return 0; - stats = dev_get_stats(vlandev); + stats = dev_get_stats(vlandev, &temp); seq_printf(seq, "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", vlandev->name, dev_info->vlan_id, (int)(dev_info->flags & 1), vlandev->priv_flags); - seq_printf(seq, fmt, "total frames received", stats->rx_packets); - seq_printf(seq, fmt, "total bytes received", stats->rx_bytes); - seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast); + seq_printf(seq, fmt64, "total frames received", stats->rx_packets); + seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes); + seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast); seq_puts(seq, "\n"); - seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets); - seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes); + seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); + seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); seq_printf(seq, fmt, "total headroom inc", dev_info->cnt_inc_headroom_on_tx); seq_printf(seq, fmt, "total encap on xmit", diff --git a/net/9p/client.c b/net/9p/client.c index 37c8da07a80b..dc6f2f26d023 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -460,7 +460,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) return err; } - if (p9_is_proto_dotu(c)) + if (p9_is_proto_dotu(c) || + p9_is_proto_dotl(c)) err = -ecode; if (!err || !IS_ERR_VALUE(err)) @@ -1015,14 +1016,18 @@ int p9_client_open(struct p9_fid *fid, int mode) struct p9_qid qid; int iounit; - P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode); - err = 0; clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", + p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); + err = 0; if (fid->mode != -1) return -EINVAL; - req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); + if (p9_is_proto_dotl(clnt)) + req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode); + else + req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1034,10 +1039,9 @@ int p9_client_open(struct p9_fid *fid, int mode) goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, - (unsigned long long)qid.path, - qid.version, iounit); + P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", + p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, + (unsigned long long)qid.path, qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1049,6 +1053,50 @@ error: } EXPORT_SYMBOL(p9_client_open); +int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, + gid_t gid, struct p9_qid *qid) +{ + int err = 0; + struct p9_client *clnt; + struct p9_req_t *req; + int iounit; + + P9_DPRINTK(P9_DEBUG_9P, + ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", + ofid->fid, name, flags, mode, gid); + clnt = ofid->clnt; + + if (ofid->mode != -1) + return -EINVAL; + + req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags, + mode, gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", + qid->type, + (unsigned long long)qid->path, + qid->version, iounit); + + ofid->mode = mode; + ofid->iounit = iounit; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_create_dotl); + int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension) { @@ -1094,6 +1142,59 @@ error: } EXPORT_SYMBOL(p9_client_fcreate); +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, + struct p9_qid *qid) +{ + int err = 0; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", + dfid->fid, name, symtgt); + clnt = dfid->clnt; + + req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt, + gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", + qid->type, (unsigned long long)qid->path, qid->version); + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_symlink); + +int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) +{ + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", + dfid->fid, oldfid->fid, newname); + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, + newname); + if (IS_ERR(req)) + return PTR_ERR(req); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n"); + p9_free_req(clnt, req); + return 0; +} +EXPORT_SYMBOL(p9_client_link); + int p9_client_clunk(struct p9_fid *fid) { int err; @@ -1139,9 +1240,8 @@ int p9_client_remove(struct p9_fid *fid) P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); p9_free_req(clnt, req); - p9_fid_destroy(fid); - error: + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_remove); @@ -1302,6 +1402,65 @@ error: } EXPORT_SYMBOL(p9_client_stat); +struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, + u64 request_mask) +{ + int err; + struct p9_client *clnt; + struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl), + GFP_KERNEL); + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", + fid->fid, request_mask); + + if (!ret) + return ERR_PTR(-ENOMEM); + + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, + "<<< RGETATTR st_result_mask=%lld\n" + "<<< qid=%x.%llx.%x\n" + "<<< st_mode=%8.8x st_nlink=%llu\n" + "<<< st_uid=%d st_gid=%d\n" + "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n" + "<<< st_atime_sec=%lld st_atime_nsec=%lld\n" + "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n" + "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n" + "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" + "<<< st_gen=%lld st_data_version=%lld", + ret->st_result_mask, ret->qid.type, ret->qid.path, + ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid, + ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize, + ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, + ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, + ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, + ret->st_gen, ret->st_data_version); + + p9_free_req(clnt, req); + return ret; + +error: + kfree(ret); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_getattr_dotl); + static int p9_client_statsize(struct p9_wstat *wst, int proto_version) { int ret; @@ -1366,6 +1525,36 @@ error: } EXPORT_SYMBOL(p9_client_wstat); +int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, + " valid=%x mode=%x uid=%d gid=%d size=%lld\n" + " atime_sec=%lld atime_nsec=%lld\n" + " mtime_sec=%lld mtime_nsec=%lld\n", + p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, + p9attr->mtime_sec, p9attr->mtime_nsec); + + req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr); + + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_setattr); + int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) { int err; @@ -1432,3 +1621,187 @@ error: } EXPORT_SYMBOL(p9_client_rename); +/* + * An xattrwalk without @attr_name gives the fid for the lisxattr namespace + */ +struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, + const char *attr_name, u64 *attr_size) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + struct p9_fid *attr_fid; + + err = 0; + clnt = file_fid->clnt; + attr_fid = p9_fid_create(clnt); + if (IS_ERR(attr_fid)) { + err = PTR_ERR(attr_fid); + attr_fid = NULL; + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, + ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", + file_fid->fid, attr_fid->fid, attr_name); + + req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds", + file_fid->fid, attr_fid->fid, attr_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; + } + p9_free_req(clnt, req); + P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", + attr_fid->fid, *attr_size); + return attr_fid; +clunk_fid: + p9_client_clunk(attr_fid); + attr_fid = NULL; +error: + if (attr_fid && (attr_fid != file_fid)) + p9_fid_destroy(attr_fid); + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(p9_client_xattrwalk); + +int p9_client_xattrcreate(struct p9_fid *fid, const char *name, + u64 attr_size, int flags) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, + ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", + fid->fid, name, (long long)attr_size, flags); + err = 0; + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd", + fid->fid, name, attr_size, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL_GPL(p9_client_xattrcreate); + +int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) +{ + int err, rsize, total; + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + fid->fid, (long long unsigned) offset, count); + + err = 0; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) + rsize = clnt->msize - P9_READDIRHDRSZ; + + if (count < rsize) + rsize = count; + + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + + if (data) + memmove(data, dataptr, count); + + p9_free_req(clnt, req); + return count; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_readdir); + +int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, + dev_t rdev, gid_t gid, struct p9_qid *qid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); + req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, + MAJOR(rdev), MINOR(rdev), gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); + +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_mknod_dotl); + +int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, + gid_t gid, struct p9_qid *qid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", + fid->fid, name, mode, gid); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, + gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); + +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_mkdir_dotl); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 149f82160130..3acd3afb20c8 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -141,6 +141,7 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) D - data blob (int32_t size followed by void *, results are not freed) T - array of strings (int16_t count, followed by strings) R - array of qids (int16_t count, followed by qids) + A - stat for 9p2000.L (p9_stat_dotl) ? - if optional = 1, continue parsing */ @@ -340,6 +341,33 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } } break; + case 'A': { + struct p9_stat_dotl *stbuf = + va_arg(ap, struct p9_stat_dotl *); + + memset(stbuf, 0, sizeof(struct p9_stat_dotl)); + errcode = + p9pdu_readf(pdu, proto_version, + "qQdddqqqqqqqqqqqqqqq", + &stbuf->st_result_mask, + &stbuf->qid, + &stbuf->st_mode, + &stbuf->st_uid, &stbuf->st_gid, + &stbuf->st_nlink, + &stbuf->st_rdev, &stbuf->st_size, + &stbuf->st_blksize, &stbuf->st_blocks, + &stbuf->st_atime_sec, + &stbuf->st_atime_nsec, + &stbuf->st_mtime_sec, + &stbuf->st_mtime_nsec, + &stbuf->st_ctime_sec, + &stbuf->st_ctime_nsec, + &stbuf->st_btime_sec, + &stbuf->st_btime_nsec, + &stbuf->st_gen, + &stbuf->st_data_version); + } + break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) @@ -488,6 +516,23 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } } break; + case 'I':{ + struct p9_iattr_dotl *p9attr = va_arg(ap, + struct p9_iattr_dotl *); + + errcode = p9pdu_writef(pdu, proto_version, + "ddddqqqqq", + p9attr->valid, + p9attr->mode, + p9attr->uid, + p9attr->gid, + p9attr->size, + p9attr->atime_sec, + p9attr->atime_nsec, + p9attr->mtime_sec, + p9attr->mtime_nsec); + } + break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) @@ -580,3 +625,30 @@ void p9pdu_reset(struct p9_fcall *pdu) pdu->offset = 0; pdu->size = 0; } + +int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, + int proto_version) +{ + struct p9_fcall fake_pdu; + int ret; + char *nameptr; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); + if (ret) { + P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); + p9pdu_dump(1, &fake_pdu); + goto out; + } + + strcpy(dirent->d_name, nameptr); + +out: + return fake_pdu.offset; +} +EXPORT_SYMBOL(p9dirent_read); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 98ce9bcb0e15..c85109d809ca 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -948,7 +948,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; - if (strlen(addr) > UNIX_PATH_MAX) { + if (strlen(addr) >= UNIX_PATH_MAX) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", addr); return -ENAMETOOLONG; diff --git a/net/Kconfig b/net/Kconfig index 0d68b40fc0e6..e330594d3709 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -32,7 +32,7 @@ config WANT_COMPAT_NETLINK_MESSAGES config COMPAT_NETLINK_MESSAGES def_bool y depends on COMPAT - depends on WIRELESS_EXT || WANT_COMPAT_NETLINK_MESSAGES + depends on WEXT_CORE || WANT_COMPAT_NETLINK_MESSAGES help This option makes it possible to send different netlink messages to tasks depending on whether the task is a compat task or not. To @@ -86,6 +86,16 @@ config NETWORK_SECMARK to nfmark, but designated for security purposes. If you are unsure how to answer this question, answer N. +config NETWORK_PHY_TIMESTAMPING + bool "Timestamping in PHY devices" + depends on EXPERIMENTAL + help + This allows timestamping of network packets by PHYs with + hardware timestamping capabilities. This option adds some + overhead in the transmit and receive paths. + + If you are unsure how to answer this question, answer N. + menuconfig NETFILTER bool "Network packet filtering framework (Netfilter)" ---help--- @@ -203,6 +213,7 @@ source "net/phonet/Kconfig" source "net/ieee802154/Kconfig" source "net/sched/Kconfig" source "net/dcb/Kconfig" +source "net/dns_resolver/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index cb7bdc1210cb..ea60fbce9b1b 100644 --- a/net/Makefile +++ b/net/Makefile @@ -50,7 +50,7 @@ endif obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_RDS) += rds/ -obj-y += wireless/ +obj-$(CONFIG_WIRELESS) += wireless/ obj-$(CONFIG_MAC80211) += mac80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ @@ -61,9 +61,10 @@ obj-$(CONFIG_CAIF) += caif/ ifneq ($(CONFIG_DCB),) obj-y += dcb/ endif -obj-y += ieee802154/ +obj-$(CONFIG_IEEE802154) += ieee802154/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o endif obj-$(CONFIG_WIMAX) += wimax/ +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 6719af6a59fa..651babdfab38 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -139,6 +139,43 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s) return NULL; } +static int atm_dev_event(struct notifier_block *this, unsigned long event, + void *arg) +{ + struct atm_dev *atm_dev = arg; + struct list_head *lh; + struct net_device *net_dev; + struct br2684_vcc *brvcc; + struct atm_vcc *atm_vcc; + unsigned long flags; + + pr_debug("event=%ld dev=%p\n", event, atm_dev); + + read_lock_irqsave(&devs_lock, flags); + list_for_each(lh, &br2684_devs) { + net_dev = list_entry_brdev(lh); + + list_for_each_entry(brvcc, &BRPRIV(net_dev)->brvccs, brvccs) { + atm_vcc = brvcc->atmvcc; + if (atm_vcc && brvcc->atmvcc->dev == atm_dev) { + + if (atm_vcc->dev->signal == ATM_PHY_SIG_LOST) + netif_carrier_off(net_dev); + else + netif_carrier_on(net_dev); + + } + } + } + read_unlock_irqrestore(&devs_lock, flags); + + return NOTIFY_DONE; +} + +static struct notifier_block atm_dev_notifier = { + .notifier_call = atm_dev_event, +}; + /* chained vcc->pop function. Check if we should wake the netif_queue */ static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb) { @@ -362,6 +399,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) unregister_netdev(net_dev); free_netdev(net_dev); } + read_lock_irq(&devs_lock); + if (list_empty(&br2684_devs)) { + /* last br2684 device */ + unregister_atmdevice_notifier(&atm_dev_notifier); + } + read_unlock_irq(&devs_lock); return; } @@ -530,6 +573,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) br2684_push(atmvcc, skb); } + + /* initialize netdev carrier state */ + if (atmvcc->dev->signal == ATM_PHY_SIG_LOST) + netif_carrier_off(net_dev); + else + netif_carrier_on(net_dev); + __module_get(THIS_MODULE); return 0; @@ -620,9 +670,16 @@ static int br2684_create(void __user *arg) } write_lock_irq(&devs_lock); + brdev->payload = payload; - brdev->number = list_empty(&br2684_devs) ? 1 : - BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; + + if (list_empty(&br2684_devs)) { + /* 1st br2684 device */ + register_atmdevice_notifier(&atm_dev_notifier); + brdev->number = 1; + } else + brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; + list_add_tail(&brdev->br2684_devs, &br2684_devs); write_unlock_irq(&devs_lock); return 0; @@ -772,6 +829,11 @@ static void __exit br2684_exit(void) remove_proc_entry("br2684", atm_proc_root); #endif + + /* if not already empty */ + if (!list_empty(&br2684_devs)) + unregister_atmdevice_notifier(&atm_dev_notifier); + while (!list_empty(&br2684_devs)) { net_dev = list_entry_brdev(br2684_devs.next); brdev = BRPRIV(net_dev); diff --git a/net/atm/clip.c b/net/atm/clip.c index 313aba11316b..95fdd1185067 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) error = ip_route_output_key(&init_net, &rt, &fl); if (error) return error; - neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); + neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); ip_rt_put(rt); if (!neigh) return -ENOMEM; diff --git a/net/atm/common.c b/net/atm/common.c index b43feb1a3995..940404a73b3d 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -37,6 +37,8 @@ EXPORT_SYMBOL(vcc_hash); DEFINE_RWLOCK(vcc_sklist_lock); EXPORT_SYMBOL(vcc_sklist_lock); +static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain); + static void __vcc_insert_socket(struct sock *sk) { struct atm_vcc *vcc = atm_sk(sk); @@ -212,6 +214,22 @@ void vcc_release_async(struct atm_vcc *vcc, int reply) } EXPORT_SYMBOL(vcc_release_async); +void atm_dev_signal_change(struct atm_dev *dev, char signal) +{ + pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n", + __func__, signal, dev, dev->number, dev->signal); + + /* atm driver sending invalid signal */ + WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND); + + if (dev->signal == signal) + return; /* no change */ + + dev->signal = signal; + + atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev); +} +EXPORT_SYMBOL(atm_dev_signal_change); void atm_dev_release_vccs(struct atm_dev *dev) { @@ -781,6 +799,18 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len); } +int register_atmdevice_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&atm_dev_notify_chain, nb); +} +EXPORT_SYMBOL_GPL(register_atmdevice_notifier); + +void unregister_atmdevice_notifier(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier); + static int __init atm_init(void) { int error; diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index ee3b3049d385..ed371684c133 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -43,19 +43,6 @@ config BT_L2CAP Say Y here to compile L2CAP support into the kernel or say M to compile it as module (l2cap). -config BT_L2CAP_EXT_FEATURES - bool "L2CAP Extended Features support (EXPERIMENTAL)" - depends on BT_L2CAP && EXPERIMENTAL - help - This option enables the L2CAP Extended Features support. These - new features include the Enhanced Retransmission and Streaming - Modes, the Frame Check Sequence (FCS), and Segmentation and - Reassembly (SAR) for L2CAP packets. They are a required for the - new Alternate MAC/PHY and the Bluetooth Medical Profile. - - You should say N unless you know what you are doing. Note that - this is in an experimental state yet. - config BT_SCO tristate "SCO links support" depends on BT diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 0d9e506f5d5a..70672544db86 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -86,26 +86,26 @@ struct bnep_setup_conn_req { __u8 ctrl; __u8 uuid_size; __u8 service[0]; -} __attribute__((packed)); +} __packed; struct bnep_set_filter_req { __u8 type; __u8 ctrl; __be16 len; __u8 list[0]; -} __attribute__((packed)); +} __packed; struct bnep_control_rsp { __u8 type; __u8 ctrl; __be16 resp; -} __attribute__((packed)); +} __packed; struct bnep_ext_hdr { __u8 type; __u8 len; __u8 data[0]; -} __attribute__((packed)); +} __packed; /* BNEP ioctl defines */ #define BNEPCONNADD _IOW('B', 200, int) diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 0faad5ce6dc4..8c100c9dae28 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -104,6 +104,8 @@ static void bnep_net_set_mc_list(struct net_device *dev) break; memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); + + i++; } r->len = htons(skb->len - len); } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b10e3cdb08f8..0b1e460fe440 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1,6 +1,6 @@ /* BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } +/* Device _must_ be locked */ +void hci_sco_setup(struct hci_conn *conn, __u8 status) +{ + struct hci_conn *sco = conn->link; + + BT_DBG("%p", conn); + + if (!sco) + return; + + if (!status) { + if (lmp_esco_capable(conn->hdev)) + hci_setup_sync(sco, conn->handle); + else + hci_add_sco(sco, conn->handle); + } else { + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } +} + static void hci_conn_timeout(unsigned long arg) { struct hci_conn *conn = (void *) arg; @@ -358,6 +379,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 acl->sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); + } else { + if (acl->sec_level < sec_level) + acl->sec_level = sec_level; + if (acl->auth_type < auth_type) + acl->auth_type = auth_type; } if (type == ACL_LINK) @@ -380,10 +406,13 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 acl->power_save = 1; hci_conn_enter_active_mode(acl); - if (lmp_esco_capable(hdev)) - hci_setup_sync(sco, acl->handle); - else - hci_add_sco(sco, acl->handle); + if (test_bit(HCI_CONN_MODE_CHANGE_PEND, &acl->pend)) { + /* defer SCO setup until mode change completed */ + set_bit(HCI_CONN_SCO_SETUP_PEND, &acl->pend); + return sco; + } + + hci_sco_setup(acl, 0x00); } return sco; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2f768de87011..8303f1c9ef54 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -562,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_dev_lock_bh(hdev); inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); + hci_blacklist_clear(hdev); hci_dev_unlock_bh(hdev); hci_notify(hdev, HCI_DEV_DOWN); @@ -913,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev) skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); - for (i = 0; i < 3; i++) + for (i = 0; i < NUM_REASSEMBLY; i++) hdev->reassembly[i] = NULL; init_waitqueue_head(&hdev->req_wait_q); @@ -923,6 +924,8 @@ int hci_register_dev(struct hci_dev *hdev) hci_conn_hash_init(hdev); + INIT_LIST_HEAD(&hdev->blacklist.list); + memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); atomic_set(&hdev->promisc, 0); @@ -970,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); - for (i = 0; i < 3; i++) + for (i = 0; i < NUM_REASSEMBLY; i++) kfree_skb(hdev->reassembly[i]); hci_notify(hdev, HCI_DEV_UNREG); @@ -1030,89 +1033,170 @@ int hci_recv_frame(struct sk_buff *skb) } EXPORT_SYMBOL(hci_recv_frame); -/* Receive packet type fragment */ -#define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) - -int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) +static int hci_reassembly(struct hci_dev *hdev, int type, void *data, + int count, __u8 index, gfp_t gfp_mask) { - if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) + int len = 0; + int hlen = 0; + int remain = count; + struct sk_buff *skb; + struct bt_skb_cb *scb; + + if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) || + index >= NUM_REASSEMBLY) return -EILSEQ; + skb = hdev->reassembly[index]; + + if (!skb) { + switch (type) { + case HCI_ACLDATA_PKT: + len = HCI_MAX_FRAME_SIZE; + hlen = HCI_ACL_HDR_SIZE; + break; + case HCI_EVENT_PKT: + len = HCI_MAX_EVENT_SIZE; + hlen = HCI_EVENT_HDR_SIZE; + break; + case HCI_SCODATA_PKT: + len = HCI_MAX_SCO_SIZE; + hlen = HCI_SCO_HDR_SIZE; + break; + } + + skb = bt_skb_alloc(len, gfp_mask); + if (!skb) + return -ENOMEM; + + scb = (void *) skb->cb; + scb->expect = hlen; + scb->pkt_type = type; + + skb->dev = (void *) hdev; + hdev->reassembly[index] = skb; + } + while (count) { - struct sk_buff *skb = __reassembly(hdev, type); - struct { int expect; } *scb; - int len = 0; + scb = (void *) skb->cb; + len = min(scb->expect, (__u16)count); - if (!skb) { - /* Start of the frame */ + memcpy(skb_put(skb, len), data, len); - switch (type) { - case HCI_EVENT_PKT: - if (count >= HCI_EVENT_HDR_SIZE) { - struct hci_event_hdr *h = data; - len = HCI_EVENT_HDR_SIZE + h->plen; - } else - return -EILSEQ; - break; + count -= len; + data += len; + scb->expect -= len; + remain = count; - case HCI_ACLDATA_PKT: - if (count >= HCI_ACL_HDR_SIZE) { - struct hci_acl_hdr *h = data; - len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen); - } else - return -EILSEQ; - break; + switch (type) { + case HCI_EVENT_PKT: + if (skb->len == HCI_EVENT_HDR_SIZE) { + struct hci_event_hdr *h = hci_event_hdr(skb); + scb->expect = h->plen; + + if (skb_tailroom(skb) < scb->expect) { + kfree_skb(skb); + hdev->reassembly[index] = NULL; + return -ENOMEM; + } + } + break; - case HCI_SCODATA_PKT: - if (count >= HCI_SCO_HDR_SIZE) { - struct hci_sco_hdr *h = data; - len = HCI_SCO_HDR_SIZE + h->dlen; - } else - return -EILSEQ; - break; + case HCI_ACLDATA_PKT: + if (skb->len == HCI_ACL_HDR_SIZE) { + struct hci_acl_hdr *h = hci_acl_hdr(skb); + scb->expect = __le16_to_cpu(h->dlen); + + if (skb_tailroom(skb) < scb->expect) { + kfree_skb(skb); + hdev->reassembly[index] = NULL; + return -ENOMEM; + } } + break; - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) { - BT_ERR("%s no memory for packet", hdev->name); - return -ENOMEM; + case HCI_SCODATA_PKT: + if (skb->len == HCI_SCO_HDR_SIZE) { + struct hci_sco_hdr *h = hci_sco_hdr(skb); + scb->expect = h->dlen; + + if (skb_tailroom(skb) < scb->expect) { + kfree_skb(skb); + hdev->reassembly[index] = NULL; + return -ENOMEM; + } } + break; + } + + if (scb->expect == 0) { + /* Complete frame */ - skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = type; + hci_recv_frame(skb); - __reassembly(hdev, type) = skb; + hdev->reassembly[index] = NULL; + return remain; + } + } - scb = (void *) skb->cb; - scb->expect = len; - } else { - /* Continuation */ + return remain; +} - scb = (void *) skb->cb; - len = scb->expect; - } +int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) +{ + int rem = 0; - len = min(len, count); + if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) + return -EILSEQ; - memcpy(skb_put(skb, len), data, len); + while (count) { + rem = hci_reassembly(hdev, type, data, count, + type - 1, GFP_ATOMIC); + if (rem < 0) + return rem; - scb->expect -= len; + data += (count - rem); + count = rem; + }; - if (scb->expect == 0) { - /* Complete frame */ + return rem; +} +EXPORT_SYMBOL(hci_recv_fragment); - __reassembly(hdev, type) = NULL; +#define STREAM_REASSEMBLY 0 - bt_cb(skb)->pkt_type = type; - hci_recv_frame(skb); - } +int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count) +{ + int type; + int rem = 0; - count -= len; data += len; - } + while (count) { + struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY]; - return 0; + if (!skb) { + struct { char type; } *pkt; + + /* Start of the frame */ + pkt = data; + type = pkt->type; + + data++; + count--; + } else + type = bt_cb(skb)->pkt_type; + + rem = hci_reassembly(hdev, type, data, + count, STREAM_REASSEMBLY, GFP_ATOMIC); + if (rem < 0) + return rem; + + data += (count - rem); + count = rem; + }; + + return rem; } -EXPORT_SYMBOL(hci_recv_fragment); +EXPORT_SYMBOL(hci_recv_stream_fragment); /* ---- Interface to upper protocols ---- */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6c57fc71c7e2..bfef5bae0b3a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1,6 +1,6 @@ /* BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -584,7 +584,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) conn->out = 1; conn->link_mode |= HCI_LM_MASTER; } else - BT_ERR("No memmory for new connection"); + BT_ERR("No memory for new connection"); } } @@ -785,9 +785,13 @@ static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) + if (conn) { clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, status); + } + hci_dev_unlock(hdev); } @@ -808,9 +812,13 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) + if (conn) { clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, status); + } + hci_dev_unlock(hdev); } @@ -915,20 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s } else conn->state = BT_CLOSED; - if (conn->type == ACL_LINK) { - struct hci_conn *sco = conn->link; - if (sco) { - if (!ev->status) { - if (lmp_esco_capable(hdev)) - hci_setup_sync(sco, conn->handle); - else - hci_add_sco(sco, conn->handle); - } else { - hci_proto_connect_cfm(sco, ev->status); - hci_conn_del(sco); - } - } - } + if (conn->type == ACL_LINK) + hci_sco_setup(conn, ev->status); if (ev->status) { hci_proto_connect_cfm(conn, ev->status); @@ -952,7 +948,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - if (mask & HCI_LM_ACCEPT) { + if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) { /* Connection accepted */ struct inquiry_entry *ie; struct hci_conn *conn; @@ -965,7 +961,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { - BT_ERR("No memmory for new connection"); + BT_ERR("No memory for new connection"); hci_dev_unlock(hdev); return; } @@ -1049,6 +1045,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn) { if (!ev->status) conn->link_mode |= HCI_LM_AUTH; + else + conn->sec_level = BT_SECURITY_LOW; clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); @@ -1479,6 +1477,9 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb else conn->power_save = 0; } + + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, ev->status); } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 38f08f6b86f6..4f170a595934 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -165,6 +165,86 @@ static int hci_sock_release(struct socket *sock) return 0; } +struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct list_head *p; + struct bdaddr_list *blacklist = &hdev->blacklist; + + list_for_each(p, &blacklist->list) { + struct bdaddr_list *b; + + b = list_entry(p, struct bdaddr_list, list); + + if (bacmp(bdaddr, &b->bdaddr) == 0) + return b; + } + + return NULL; +} + +static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg) +{ + bdaddr_t bdaddr; + struct bdaddr_list *entry; + + if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) + return -EFAULT; + + if (bacmp(&bdaddr, BDADDR_ANY) == 0) + return -EBADF; + + if (hci_blacklist_lookup(hdev, &bdaddr)) + return -EEXIST; + + entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + bacpy(&entry->bdaddr, &bdaddr); + + list_add(&entry->list, &hdev->blacklist.list); + + return 0; +} + +int hci_blacklist_clear(struct hci_dev *hdev) +{ + struct list_head *p, *n; + struct bdaddr_list *blacklist = &hdev->blacklist; + + list_for_each_safe(p, n, &blacklist->list) { + struct bdaddr_list *b; + + b = list_entry(p, struct bdaddr_list, list); + + list_del(p); + kfree(b); + } + + return 0; +} + +static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg) +{ + bdaddr_t bdaddr; + struct bdaddr_list *entry; + + if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) + return -EFAULT; + + if (bacmp(&bdaddr, BDADDR_ANY) == 0) + return hci_blacklist_clear(hdev); + + entry = hci_blacklist_lookup(hdev, &bdaddr); + if (!entry) + return -ENOENT; + + list_del(&entry->list); + kfree(entry); + + return 0; +} + /* Ioctls that require bound socket */ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { @@ -194,6 +274,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign case HCIGETAUTHINFO: return hci_get_auth_info(hdev, (void __user *) arg); + case HCIBLOCKADDR: + if (!capable(CAP_NET_ADMIN)) + return -EACCES; + return hci_blacklist_add(hdev, (void __user *) arg); + + case HCIUNBLOCKADDR: + if (!capable(CAP_NET_ADMIN)) + return -EACCES; + return hci_blacklist_del(hdev, (void __user *) arg); + default: if (hdev->ioctl) return hdev->ioctl(hdev, cmd, arg); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 463ffa4fe042..ce44c47eeac1 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -436,6 +436,41 @@ static const struct file_operations inquiry_cache_fops = { .release = single_release, }; +static int blacklist_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *blacklist = &hdev->blacklist; + struct list_head *l; + + hci_dev_lock_bh(hdev); + + list_for_each(l, &blacklist->list) { + struct bdaddr_list *b; + bdaddr_t bdaddr; + + b = list_entry(l, struct bdaddr_list, list); + + baswap(&bdaddr, &b->bdaddr); + + seq_printf(f, "%s\n", batostr(&bdaddr)); + } + + hci_dev_unlock_bh(hdev); + + return 0; +} + +static int blacklist_open(struct inode *inode, struct file *file) +{ + return single_open(file, blacklist_show, inode->i_private); +} + +static const struct file_operations blacklist_fops = { + .open = blacklist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; int hci_register_sysfs(struct hci_dev *hdev) { struct device *dev = &hdev->dev; @@ -465,6 +500,9 @@ int hci_register_sysfs(struct hci_dev *hdev) debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, &inquiry_cache_fops); + debugfs_create_file("blacklist", 0444, hdev->debugfs, + hdev, &blacklist_fops); + return 0; } diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 1b682a5aa061..9ba1e8eee37c 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1,6 +1,8 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> + Copyright (C) 2010 Google Inc. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -53,15 +55,9 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> -#define VERSION "2.14" +#define VERSION "2.15" -#ifdef CONFIG_BT_L2CAP_EXT_FEATURES -static int enable_ertm = 1; -#else -static int enable_ertm = 0; -#endif -static int max_transmit = L2CAP_DEFAULT_MAX_TX; -static int tx_window = L2CAP_DEFAULT_TX_WINDOW; +static int disable_ertm = 0; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; @@ -80,9 +76,12 @@ static void __l2cap_sock_close(struct sock *sk, int reason); static void l2cap_sock_close(struct sock *sk); static void l2cap_sock_kill(struct sock *sk); +static int l2cap_build_conf_req(struct sock *sk, void *data); static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); +static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); + /* ---- L2CAP timers ---- */ static void l2cap_sock_timeout(unsigned long arg) { @@ -278,6 +277,24 @@ static void l2cap_chan_del(struct sock *sk, int err) parent->sk_data_ready(parent, 0); } else sk->sk_state_change(sk); + + skb_queue_purge(TX_QUEUE(sk)); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + struct srej_list *l, *tmp; + + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + del_timer(&l2cap_pi(sk)->ack_timer); + + skb_queue_purge(SREJ_QUEUE(sk)); + skb_queue_purge(BUSY_QUEUE(sk)); + + list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) { + list_del(&l->list); + kfree(l); + } + } } /* Service level security */ @@ -351,8 +368,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) struct sk_buff *skb; struct l2cap_hdr *lh; struct l2cap_conn *conn = pi->conn; + struct sock *sk = (struct sock *)pi; int count, hlen = L2CAP_HDR_SIZE + 2; + if (sk->sk_state != BT_CONNECTED) + return; + if (pi->fcs == L2CAP_FCS_CRC16) hlen += 2; @@ -401,6 +422,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) l2cap_send_sframe(pi, control); } +static inline int __l2cap_no_conn_pending(struct sock *sk) +{ + return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND); +} + static void l2cap_do_start(struct sock *sk) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; @@ -409,12 +435,13 @@ static void l2cap_do_start(struct sock *sk) if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; - if (l2cap_check_security(sk)) { + if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(l2cap_pi(sk)->scid); req.psm = l2cap_pi(sk)->psm; l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); @@ -434,24 +461,57 @@ static void l2cap_do_start(struct sock *sk) } } -static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk) +static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) +{ + u32 local_feat_mask = l2cap_feat_mask; + if (!disable_ertm) + local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; + + switch (mode) { + case L2CAP_MODE_ERTM: + return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; + case L2CAP_MODE_STREAMING: + return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; + default: + return 0x00; + } +} + +static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) { struct l2cap_disconn_req req; + if (!conn) + return; + + skb_queue_purge(TX_QUEUE(sk)); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + del_timer(&l2cap_pi(sk)->ack_timer); + } + req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid); req.scid = cpu_to_le16(l2cap_pi(sk)->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ, sizeof(req), &req); + + sk->sk_state = BT_DISCONN; + sk->sk_err = err; } /* ---- L2CAP connections ---- */ static void l2cap_conn_start(struct l2cap_conn *conn) { struct l2cap_chan_list *l = &conn->chan_list; + struct sock_del_list del, *tmp1, *tmp2; struct sock *sk; BT_DBG("conn %p", conn); + INIT_LIST_HEAD(&del.list); + read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { @@ -464,18 +524,38 @@ static void l2cap_conn_start(struct l2cap_conn *conn) } if (sk->sk_state == BT_CONNECT) { - if (l2cap_check_security(sk)) { - struct l2cap_conn_req req; - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; + struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); + if (!l2cap_check_security(sk) || + !__l2cap_no_conn_pending(sk)) { + bh_unlock_sock(sk); + continue; + } - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); + if (!l2cap_mode_supported(l2cap_pi(sk)->mode, + conn->feat_mask) + && l2cap_pi(sk)->conf_state & + L2CAP_CONF_STATE2_DEVICE) { + tmp1 = kzalloc(sizeof(struct sock_del_list), + GFP_ATOMIC); + tmp1->sk = sk; + list_add_tail(&tmp1->list, &del.list); + bh_unlock_sock(sk); + continue; } + + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } else if (sk->sk_state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; + char buf[128]; rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); @@ -498,12 +578,31 @@ static void l2cap_conn_start(struct l2cap_conn *conn) l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT || + rsp.result != L2CAP_CR_SUCCESS) { + bh_unlock_sock(sk); + continue; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; } bh_unlock_sock(sk); } read_unlock(&l->lock); + + list_for_each_entry_safe(tmp1, tmp2, &del.list, list) { + bh_lock_sock(tmp1->sk); + __l2cap_sock_close(tmp1->sk, ECONNRESET); + bh_unlock_sock(tmp1->sk); + list_del(&tmp1->list); + kfree(tmp1); + } } static void l2cap_conn_ready(struct l2cap_conn *conn) @@ -732,9 +831,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason) sk->sk_type == SOCK_STREAM) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; - sk->sk_state = BT_DISCONN; l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, reason); } else l2cap_chan_del(sk, reason); break; @@ -794,6 +892,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) pi->imtu = l2cap_pi(parent)->imtu; pi->omtu = l2cap_pi(parent)->omtu; + pi->conf_state = l2cap_pi(parent)->conf_state; pi->mode = l2cap_pi(parent)->mode; pi->fcs = l2cap_pi(parent)->fcs; pi->max_tx = l2cap_pi(parent)->max_tx; @@ -804,13 +903,15 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) } else { pi->imtu = L2CAP_DEFAULT_MTU; pi->omtu = 0; - if (enable_ertm && sk->sk_type == SOCK_STREAM) + if (!disable_ertm && sk->sk_type == SOCK_STREAM) { pi->mode = L2CAP_MODE_ERTM; - else + pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; + } else { pi->mode = L2CAP_MODE_BASIC; - pi->max_tx = max_transmit; + } + pi->max_tx = L2CAP_DEFAULT_MAX_TX; pi->fcs = L2CAP_FCS_CRC16; - pi->tx_win = tx_window; + pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; pi->sec_level = BT_SECURITY_LOW; pi->role_switch = 0; pi->force_reliable = 0; @@ -1059,7 +1160,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: - if (enable_ertm) + if (!disable_ertm) break; /* fall through */ default: @@ -1076,6 +1177,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al case BT_CONNECTED: /* Already connected */ + err = -EISCONN; goto done; case BT_OPEN: @@ -1124,7 +1226,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: - if (enable_ertm) + if (!disable_ertm) break; /* fall through */ default: @@ -1277,9 +1379,11 @@ static void l2cap_monitor_timeout(unsigned long arg) { struct sock *sk = (void *) arg; + BT_DBG("sk %p", sk); + bh_lock_sock(sk); if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) { - l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk); + l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED); bh_unlock_sock(sk); return; } @@ -1295,6 +1399,8 @@ static void l2cap_retrans_timeout(unsigned long arg) { struct sock *sk = (void *) arg; + BT_DBG("sk %p", sk); + bh_lock_sock(sk); l2cap_pi(sk)->retry_count = 1; __mod_monitor_timer(); @@ -1333,7 +1439,7 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) hci_send_acl(pi->conn->hcon, skb, 0); } -static int l2cap_streaming_send(struct sock *sk) +static void l2cap_streaming_send(struct sock *sk) { struct sk_buff *skb, *tx_skb; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1363,7 +1469,6 @@ static int l2cap_streaming_send(struct sock *sk) skb = skb_dequeue(TX_QUEUE(sk)); kfree_skb(skb); } - return 0; } static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) @@ -1387,15 +1492,22 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) if (pi->remote_max_tx && bt_cb(skb)->retries == pi->remote_max_tx) { - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); return; } tx_skb = skb_clone(skb, GFP_ATOMIC); bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + + if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { + control |= L2CAP_CTRL_FINAL; + pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; + } + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); + put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); if (pi->fcs == L2CAP_FCS_CRC16) { @@ -1413,15 +1525,14 @@ static int l2cap_ertm_send(struct sock *sk) u16 control, fcs; int nsent = 0; - if (pi->conn_state & L2CAP_CONN_WAIT_F) - return 0; + if (sk->sk_state != BT_CONNECTED) + return -ENOTCONN; - while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) && - !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) { + while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) { if (pi->remote_max_tx && bt_cb(skb)->retries == pi->remote_max_tx) { - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); break; } @@ -1430,6 +1541,8 @@ static int l2cap_ertm_send(struct sock *sk) bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + control &= L2CAP_CTRL_SAR; + if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { control |= L2CAP_CTRL_FINAL; pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; @@ -1470,16 +1583,11 @@ static int l2cap_retransmit_frames(struct sock *sk) struct l2cap_pinfo *pi = l2cap_pi(sk); int ret; - spin_lock_bh(&pi->send_lock); - if (!skb_queue_empty(TX_QUEUE(sk))) sk->sk_send_head = TX_QUEUE(sk)->next; pi->next_tx_seq = pi->expected_ack_seq; ret = l2cap_ertm_send(sk); - - spin_unlock_bh(&pi->send_lock); - return ret; } @@ -1487,7 +1595,6 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi) { struct sock *sk = (struct sock *)pi; u16 control = 0; - int nframes; control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; @@ -1498,11 +1605,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi) return; } - spin_lock_bh(&pi->send_lock); - nframes = l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); - - if (nframes > 0) + if (l2cap_ertm_send(sk) > 0) return; control |= L2CAP_SUPER_RCV_READY; @@ -1697,10 +1800,8 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz size += buflen; } skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk)); - spin_lock_bh(&pi->send_lock); if (sk->sk_send_head == NULL) sk->sk_send_head = sar_queue.next; - spin_unlock_bh(&pi->send_lock); return size; } @@ -1745,7 +1846,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms case L2CAP_MODE_BASIC: /* Check outgoing MTU */ if (len > pi->omtu) { - err = -EINVAL; + err = -EMSGSIZE; goto done; } @@ -1772,14 +1873,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms } __skb_queue_tail(TX_QUEUE(sk), skb); - if (pi->mode == L2CAP_MODE_ERTM) - spin_lock_bh(&pi->send_lock); - if (sk->sk_send_head == NULL) sk->sk_send_head = skb; - if (pi->mode == L2CAP_MODE_ERTM) - spin_unlock_bh(&pi->send_lock); } else { /* Segment SDU into multiples PDUs */ err = l2cap_sar_segment_sdu(sk, msg, len); @@ -1788,11 +1884,14 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms } if (pi->mode == L2CAP_MODE_STREAMING) { - err = l2cap_streaming_send(sk); + l2cap_streaming_send(sk); } else { - spin_lock_bh(&pi->send_lock); + if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && + pi->conn_state && L2CAP_CONN_WAIT_F) { + err = len; + break; + } err = l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); } if (err >= 0) @@ -1801,7 +1900,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms default: BT_DBG("bad state %1.1x", pi->mode); - err = -EINVAL; + err = -EBADFD; } done: @@ -1817,6 +1916,8 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { struct l2cap_conn_rsp rsp; + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + u8 buf[128]; sk->sk_state = BT_CONFIG; @@ -1827,6 +1928,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { + release_sock(sk); + return 0; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + release_sock(sk); return 0; } @@ -1863,13 +1974,19 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us break; } + if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { + err = -EINVAL; + break; + } + l2cap_pi(sk)->mode = opts.mode; switch (l2cap_pi(sk)->mode) { case L2CAP_MODE_BASIC: + l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: - if (enable_ertm) + if (!disable_ertm) break; /* fall through */ default: @@ -2137,6 +2254,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } + + if (!err && sk->sk_err) + err = -sk->sk_err; + release_sock(sk); return err; } @@ -2357,25 +2478,10 @@ static inline void l2cap_ertm_init(struct sock *sk) __skb_queue_head_init(SREJ_QUEUE(sk)); __skb_queue_head_init(BUSY_QUEUE(sk)); - spin_lock_init(&l2cap_pi(sk)->send_lock); INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work); -} -static int l2cap_mode_supported(__u8 mode, __u32 feat_mask) -{ - u32 local_feat_mask = l2cap_feat_mask; - if (enable_ertm) - local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; - - switch (mode) { - case L2CAP_MODE_ERTM: - return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; - case L2CAP_MODE_STREAMING: - return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; - default: - return 0x00; - } + sk->sk_backlog_rcv = l2cap_ertm_data_rcv; } static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) @@ -2406,10 +2512,10 @@ static int l2cap_build_conf_req(struct sock *sk, void *data) switch (pi->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: - pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; - if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) - l2cap_send_disconn_req(pi->conn, sk); - break; + if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE) + break; + + /* fall through */ default: pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); break; @@ -2420,6 +2526,14 @@ done: case L2CAP_MODE_BASIC: if (pi->imtu != L2CAP_DEFAULT_MTU) l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); + + rfc.mode = L2CAP_MODE_BASIC; + rfc.txwin_size = 0; + rfc.max_transmit = 0; + rfc.retrans_timeout = 0; + rfc.monitor_timeout = 0; + rfc.max_pdu_size = 0; + break; case L2CAP_MODE_ERTM: @@ -2432,9 +2546,6 @@ done: if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) break; @@ -2455,9 +2566,6 @@ done: if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) break; @@ -2469,6 +2577,9 @@ done: break; } + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc); + /* FIXME: Need actual value of the flush timeout */ //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); @@ -2533,18 +2644,21 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) } } - if (pi->num_conf_rsp || pi->num_conf_req) + if (pi->num_conf_rsp || pi->num_conf_req > 1) goto done; switch (pi->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: - pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; - if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) + if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) { + pi->mode = l2cap_select_mode(rfc.mode, + pi->conn->feat_mask); + break; + } + + if (pi->mode != rfc.mode) return -ECONNREFUSED; - break; - default: - pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); + break; } @@ -2667,7 +2781,6 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, rfc.mode != pi->mode) return -ECONNREFUSED; - pi->mode = rfc.mode; pi->fcs = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, @@ -2676,6 +2789,11 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, } } + if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode) + return -ECONNREFUSED; + + pi->mode = rfc.mode; + if (*result == L2CAP_CONF_SUCCESS) { switch (rfc.mode) { case L2CAP_MODE_ERTM: @@ -2770,7 +2888,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd struct l2cap_chan_list *list = &conn->chan_list; struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; - struct sock *sk, *parent; + struct sock *parent, *uninitialized_var(sk); int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); @@ -2879,6 +2997,15 @@ sendresp: L2CAP_INFO_REQ, sizeof(info), &info); } + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) && + result == L2CAP_CR_SUCCESS) { + u8 buf[128]; + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + } + return 0; } @@ -2899,11 +3026,11 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd if (scid) { sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); if (!sk) - return 0; + return -EFAULT; } else { sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident); if (!sk) - return 0; + return -EFAULT; } switch (result) { @@ -2911,10 +3038,13 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd sk->sk_state = BT_CONFIG; l2cap_pi(sk)->ident = 0; l2cap_pi(sk)->dcid = dcid; - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) + break; + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(sk, req), req); l2cap_pi(sk)->num_conf_req++; @@ -2950,8 +3080,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (!sk) return -ENOENT; - if (sk->sk_state == BT_DISCONN) + if (sk->sk_state != BT_CONFIG) { + struct l2cap_cmd_rej rej; + + rej.reason = cpu_to_le16(0x0002); + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); goto unlock; + } /* Reject if config buffer is too small. */ len = cmd_len - sizeof(*req); @@ -2977,7 +3113,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr /* Complete config. */ len = l2cap_parse_conf_req(sk, rsp); if (len < 0) { - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto unlock; } @@ -3047,7 +3183,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr char req[64]; if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto done; } @@ -3056,7 +3192,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr len = l2cap_parse_conf_rsp(sk, rsp->data, len, req, &result); if (len < 0) { - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto done; } @@ -3069,10 +3205,9 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr } default: - sk->sk_state = BT_DISCONN; sk->sk_err = ECONNRESET; l2cap_sock_set_timer(sk, HZ * 5); - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto done; } @@ -3123,16 +3258,6 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd sk->sk_shutdown = SHUTDOWN_MASK; - skb_queue_purge(TX_QUEUE(sk)); - - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { - skb_queue_purge(SREJ_QUEUE(sk)); - skb_queue_purge(BUSY_QUEUE(sk)); - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); - del_timer(&l2cap_pi(sk)->ack_timer); - } - l2cap_chan_del(sk, ECONNRESET); bh_unlock_sock(sk); @@ -3155,16 +3280,6 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd if (!sk) return 0; - skb_queue_purge(TX_QUEUE(sk)); - - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { - skb_queue_purge(SREJ_QUEUE(sk)); - skb_queue_purge(BUSY_QUEUE(sk)); - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); - del_timer(&l2cap_pi(sk)->ack_timer); - } - l2cap_chan_del(sk, 0); bh_unlock_sock(sk); @@ -3187,7 +3302,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); - if (enable_ertm) + if (!disable_ertm) feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING | L2CAP_FEAT_FCS; put_unaligned_le32(feat_mask, rsp->data); @@ -3352,7 +3467,7 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi, struct sk_buff *skb) our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); if (our_fcs != rcv_fcs) - return -EINVAL; + return -EBADMSG; } return 0; } @@ -3363,25 +3478,19 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk) u16 control = 0; pi->frames_sent = 0; - pi->conn_state |= L2CAP_CONN_SEND_FBIT; control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { - control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL; + control |= L2CAP_SUPER_RCV_NOT_READY; l2cap_send_sframe(pi, control); pi->conn_state |= L2CAP_CONN_RNR_SENT; - pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; } - if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0) - __mod_retrans_timer(); - - pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY) + l2cap_retransmit_frames(sk); - spin_lock_bh(&pi->send_lock); l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) && pi->frames_sent == 0) { @@ -3393,6 +3502,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk) static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar) { struct sk_buff *next_skb; + struct l2cap_pinfo *pi = l2cap_pi(sk); + int tx_seq_offset, next_tx_seq_offset; bt_cb(skb)->tx_seq = tx_seq; bt_cb(skb)->sar = sar; @@ -3403,11 +3514,20 @@ static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_s return 0; } + tx_seq_offset = (tx_seq - pi->buffer_seq) % 64; + if (tx_seq_offset < 0) + tx_seq_offset += 64; + do { if (bt_cb(next_skb)->tx_seq == tx_seq) return -EINVAL; - if (bt_cb(next_skb)->tx_seq > tx_seq) { + next_tx_seq_offset = (bt_cb(next_skb)->tx_seq - + pi->buffer_seq) % 64; + if (next_tx_seq_offset < 0) + next_tx_seq_offset += 64; + + if (next_tx_seq_offset > tx_seq_offset) { __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb); return 0; } @@ -3525,11 +3645,51 @@ drop: pi->sdu = NULL; disconnect: - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); kfree_skb(skb); return 0; } +static int l2cap_try_push_rx_skb(struct sock *sk) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb; + u16 control; + int err; + + while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) { + control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; + err = l2cap_ertm_reassembly_sdu(sk, skb, control); + if (err < 0) { + skb_queue_head(BUSY_QUEUE(sk), skb); + return -EBUSY; + } + + pi->buffer_seq = (pi->buffer_seq + 1) % 64; + } + + if (!(pi->conn_state & L2CAP_CONN_RNR_SENT)) + goto done; + + control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL; + l2cap_send_sframe(pi, control); + l2cap_pi(sk)->retry_count = 1; + + del_timer(&pi->retrans_timer); + __mod_monitor_timer(); + + l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; + +done: + pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY; + pi->conn_state &= ~L2CAP_CONN_RNR_SENT; + + BT_DBG("sk %p, Exit local busy", sk); + + return 0; +} + static void l2cap_busy_work(struct work_struct *work) { DECLARE_WAITQUEUE(wait, current); @@ -3538,7 +3698,6 @@ static void l2cap_busy_work(struct work_struct *work) struct sock *sk = (struct sock *)pi; int n_tries = 0, timeo = HZ/5, err; struct sk_buff *skb; - u16 control; lock_sock(sk); @@ -3548,8 +3707,8 @@ static void l2cap_busy_work(struct work_struct *work) if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) { err = -EBUSY; - l2cap_send_disconn_req(pi->conn, sk); - goto done; + l2cap_send_disconn_req(pi->conn, sk, EBUSY); + break; } if (!timeo) @@ -3557,7 +3716,7 @@ static void l2cap_busy_work(struct work_struct *work) if (signal_pending(current)) { err = sock_intr_errno(timeo); - goto done; + break; } release_sock(sk); @@ -3566,40 +3725,12 @@ static void l2cap_busy_work(struct work_struct *work) err = sock_error(sk); if (err) - goto done; - - while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) { - control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; - err = l2cap_ertm_reassembly_sdu(sk, skb, control); - if (err < 0) { - skb_queue_head(BUSY_QUEUE(sk), skb); - break; - } - - pi->buffer_seq = (pi->buffer_seq + 1) % 64; - } + break; - if (!skb) + if (l2cap_try_push_rx_skb(sk) == 0) break; } - if (!(pi->conn_state & L2CAP_CONN_RNR_SENT)) - goto done; - - control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL; - l2cap_send_sframe(pi, control); - l2cap_pi(sk)->retry_count = 1; - - del_timer(&pi->retrans_timer); - __mod_monitor_timer(); - - l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; - -done: - pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY; - pi->conn_state &= ~L2CAP_CONN_RNR_SENT; - set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); @@ -3614,7 +3745,9 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; __skb_queue_tail(BUSY_QUEUE(sk), skb); - return -EBUSY; + return l2cap_try_push_rx_skb(sk); + + } err = l2cap_ertm_reassembly_sdu(sk, skb, control); @@ -3624,6 +3757,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) } /* Busy Condition */ + BT_DBG("sk %p, Enter local busy", sk); + pi->conn_state |= L2CAP_CONN_LOCAL_BUSY; bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; __skb_queue_tail(BUSY_QUEUE(sk), skb); @@ -3634,6 +3769,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) pi->conn_state |= L2CAP_CONN_RNR_SENT; + del_timer(&pi->ack_timer); + queue_work(_busy_wq, &pi->busy_work); return err; @@ -3747,7 +3884,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq) l2cap_ertm_reassembly_sdu(sk, skb, control); l2cap_pi(sk)->buffer_seq_srej = (l2cap_pi(sk)->buffer_seq_srej + 1) % 64; - tx_seq++; + tx_seq = (tx_seq + 1) % 64; } } @@ -3783,10 +3920,11 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq) l2cap_send_sframe(pi, control); new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); - new->tx_seq = pi->expected_tx_seq++; + new->tx_seq = pi->expected_tx_seq; + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; list_add_tail(&new->list, SREJ_LIST(sk)); } - pi->expected_tx_seq++; + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; } static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) @@ -3795,11 +3933,12 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str u8 tx_seq = __get_txseq(rx_control); u8 req_seq = __get_reqseq(rx_control); u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; - u8 tx_seq_offset, expected_tx_seq_offset; + int tx_seq_offset, expected_tx_seq_offset; int num_to_ack = (pi->tx_win/6) + 1; int err = 0; - BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); + BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq, + rx_control); if (L2CAP_CTRL_FINAL & rx_control && l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) { @@ -3821,7 +3960,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str /* invalid tx_seq */ if (tx_seq_offset >= pi->tx_win) { - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); goto drop; } @@ -3844,6 +3983,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str pi->buffer_seq = pi->buffer_seq_srej; pi->conn_state &= ~L2CAP_CONN_SREJ_SENT; l2cap_send_ack(pi); + BT_DBG("sk %p, Exit SREJ_SENT", sk); } } else { struct srej_list *l; @@ -3872,6 +4012,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str pi->conn_state |= L2CAP_CONN_SREJ_SENT; + BT_DBG("sk %p, Enter SREJ", sk); + INIT_LIST_HEAD(SREJ_LIST(sk)); pi->buffer_seq_srej = pi->buffer_seq; @@ -3882,6 +4024,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str pi->conn_state |= L2CAP_CONN_SEND_PBIT; l2cap_send_srejframe(sk, tx_seq); + + del_timer(&pi->ack_timer); } return 0; @@ -3895,6 +4039,10 @@ expected: return 0; } + err = l2cap_push_rx_skb(sk, skb, rx_control); + if (err < 0) + return 0; + if (rx_control & L2CAP_CTRL_FINAL) { if (pi->conn_state & L2CAP_CONN_REJ_ACT) pi->conn_state &= ~L2CAP_CONN_REJ_ACT; @@ -3902,10 +4050,6 @@ expected: l2cap_retransmit_frames(sk); } - err = l2cap_push_rx_skb(sk, skb, rx_control); - if (err < 0) - return 0; - __mod_ack_timer(); pi->num_acked = (pi->num_acked + 1) % num_to_ack; @@ -3923,10 +4067,14 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) { struct l2cap_pinfo *pi = l2cap_pi(sk); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control), + rx_control); + pi->expected_ack_seq = __get_reqseq(rx_control); l2cap_drop_acked_frames(sk); if (rx_control & L2CAP_CTRL_POLL) { + pi->conn_state |= L2CAP_CONN_SEND_FBIT; if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && (pi->unacked_frames > 0)) @@ -3955,9 +4103,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { l2cap_send_ack(pi); } else { - spin_lock_bh(&pi->send_lock); l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); } } } @@ -3967,6 +4113,8 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control) struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_reqseq(rx_control); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; pi->expected_ack_seq = tx_seq; @@ -3989,16 +4137,18 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control) struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_reqseq(rx_control); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; if (rx_control & L2CAP_CTRL_POLL) { pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); + + pi->conn_state |= L2CAP_CONN_SEND_FBIT; l2cap_retransmit_one_frame(sk, tx_seq); - spin_lock_bh(&pi->send_lock); l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); if (pi->conn_state & L2CAP_CONN_WAIT_F) { pi->srej_save_reqseq = tx_seq; @@ -4024,10 +4174,15 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control) struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_reqseq(rx_control); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + pi->conn_state |= L2CAP_CONN_REMOTE_BUSY; pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); + if (rx_control & L2CAP_CTRL_POLL) + pi->conn_state |= L2CAP_CONN_SEND_FBIT; + if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) { del_timer(&pi->retrans_timer); if (rx_control & L2CAP_CTRL_POLL) @@ -4075,12 +4230,83 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str return 0; } +static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u16 control; + u8 req_seq; + int len, next_tx_seq_offset, req_seq_offset; + + control = get_unaligned_le16(skb->data); + skb_pull(skb, 2); + len = skb->len; + + /* + * We can just drop the corrupted I-frame here. + * Receiver will miss it and start proper recovery + * procedures and ask retransmission. + */ + if (l2cap_check_fcs(pi, skb)) + goto drop; + + if (__is_sar_start(control) && __is_iframe(control)) + len -= 2; + + if (pi->fcs == L2CAP_FCS_CRC16) + len -= 2; + + if (len > pi->mps) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + req_seq = __get_reqseq(control); + req_seq_offset = (req_seq - pi->expected_ack_seq) % 64; + if (req_seq_offset < 0) + req_seq_offset += 64; + + next_tx_seq_offset = + (pi->next_tx_seq - pi->expected_ack_seq) % 64; + if (next_tx_seq_offset < 0) + next_tx_seq_offset += 64; + + /* check for invalid req-seq */ + if (req_seq_offset > next_tx_seq_offset) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + if (__is_iframe(control)) { + if (len < 0) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + l2cap_data_channel_iframe(sk, control, skb); + } else { + if (len != 0) { + BT_ERR("%d", len); + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + l2cap_data_channel_sframe(sk, control, skb); + } + + return 0; + +drop: + kfree_skb(skb); + return 0; +} + static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) { struct sock *sk; struct l2cap_pinfo *pi; - u16 control, len; - u8 tx_seq, req_seq, next_tx_seq_offset, req_seq_offset; + u16 control; + u8 tx_seq; + int len; sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); if (!sk) { @@ -4110,59 +4336,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk break; case L2CAP_MODE_ERTM: - control = get_unaligned_le16(skb->data); - skb_pull(skb, 2); - len = skb->len; - - if (__is_sar_start(control)) - len -= 2; - - if (pi->fcs == L2CAP_FCS_CRC16) - len -= 2; - - /* - * We can just drop the corrupted I-frame here. - * Receiver will miss it and start proper recovery - * procedures and ask retransmission. - */ - if (len > pi->mps) { - l2cap_send_disconn_req(pi->conn, sk); - goto drop; - } - - if (l2cap_check_fcs(pi, skb)) - goto drop; - - req_seq = __get_reqseq(control); - req_seq_offset = (req_seq - pi->expected_ack_seq) % 64; - if (req_seq_offset < 0) - req_seq_offset += 64; - - next_tx_seq_offset = - (pi->next_tx_seq - pi->expected_ack_seq) % 64; - if (next_tx_seq_offset < 0) - next_tx_seq_offset += 64; - - /* check for invalid req-seq */ - if (req_seq_offset > next_tx_seq_offset) { - l2cap_send_disconn_req(pi->conn, sk); - goto drop; - } - - if (__is_iframe(control)) { - if (len < 4) { - l2cap_send_disconn_req(pi->conn, sk); - goto drop; - } - - l2cap_data_channel_iframe(sk, control, skb); + if (!sock_owned_by_user(sk)) { + l2cap_ertm_data_rcv(sk, skb); } else { - if (len != 0) { - l2cap_send_disconn_req(pi->conn, sk); + if (sk_add_backlog(sk, skb)) goto drop; - } - - l2cap_data_channel_sframe(sk, control, skb); } goto done; @@ -4172,16 +4350,16 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk skb_pull(skb, 2); len = skb->len; + if (l2cap_check_fcs(pi, skb)) + goto drop; + if (__is_sar_start(control)) len -= 2; if (pi->fcs == L2CAP_FCS_CRC16) len -= 2; - if (len > pi->mps || len < 4 || __is_sframe(control)) - goto drop; - - if (l2cap_check_fcs(pi, skb)) + if (len > pi->mps || len < 0 || __is_sframe(control)) goto drop; tx_seq = __get_txseq(control); @@ -4281,7 +4459,7 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) struct hlist_node *node; if (type != ACL_LINK) - return 0; + return -EINVAL; BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); @@ -4314,7 +4492,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (hcon->type != ACL_LINK) - return 0; + return -EINVAL; if (!status) { conn = l2cap_conn_add(hcon, status); @@ -4343,7 +4521,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) BT_DBG("hcon %p reason %d", hcon, reason); if (hcon->type != ACL_LINK) - return 0; + return -EINVAL; l2cap_conn_del(hcon, bt_err(reason)); @@ -4404,6 +4582,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) req.psm = l2cap_pi(sk)->psm; l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); @@ -4671,14 +4850,8 @@ EXPORT_SYMBOL(l2cap_load); module_init(l2cap_init); module_exit(l2cap_exit); -module_param(enable_ertm, bool, 0644); -MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode"); - -module_param(max_transmit, uint, 0644); -MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)"); - -module_param(tx_window, uint, 0644); -MODULE_PARM_DESC(tx_window, "Transmission window size value (default = 63)"); +module_param(disable_ertm, bool, 0644); +MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 43fbf6b4b4bf..44a623275951 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1152,7 +1152,7 @@ error: return err; } -void rfcomm_cleanup_sockets(void) +void __exit rfcomm_cleanup_sockets(void) { debugfs_remove(rfcomm_sock_debugfs); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 309b6c261b25..026205c18b78 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -1153,7 +1153,7 @@ static const struct tty_operations rfcomm_ops = { .tiocmset = rfcomm_tty_tiocmset, }; -int rfcomm_init_ttys(void) +int __init rfcomm_init_ttys(void) { rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS); if (!rfcomm_tty_driver) @@ -1183,7 +1183,7 @@ int rfcomm_init_ttys(void) return 0; } -void rfcomm_cleanup_ttys(void) +void __exit rfcomm_cleanup_ttys(void) { tty_unregister_driver(rfcomm_tty_driver); put_tty_driver(rfcomm_tty_driver); diff --git a/net/bridge/br.c b/net/bridge/br.c index 76357b547752..c8436fa31344 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -63,7 +63,6 @@ static int __init br_init(void) goto err_out4; brioctl_set(br_ioctl_deviceless_stub); - br_handle_frame_hook = br_handle_frame; #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) br_fdb_test_addr_hook = br_fdb_test_addr; @@ -100,7 +99,6 @@ static void __exit br_deinit(void) br_fdb_test_addr_hook = NULL; #endif - br_handle_frame_hook = NULL; br_fdb_fini(); } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index eedf2c94820e..cf09fe591fc2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,7 +22,7 @@ #include <asm/uaccess.h> #include "br_private.h" -/* net device transmit always called with no BH (preempt_disabled) */ +/* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -38,17 +38,26 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) } #endif + u64_stats_update_begin(&brstats->syncp); brstats->tx_packets++; brstats->tx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + rcu_read_lock(); if (is_multicast_ether_addr(dest)) { - if (br_multicast_rcv(br, NULL, skb)) + if (unlikely(netpoll_tx_running(dev))) { + br_flood_deliver(br, skb); + goto out; + } + if (br_multicast_rcv(br, NULL, skb)) { + kfree_skb(skb); goto out; + } mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) @@ -61,6 +70,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood_deliver(br, skb); out: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -92,21 +102,25 @@ static int br_dev_stop(struct net_device *dev) return 0; } -static struct net_device_stats *br_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct net_bridge *br = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct br_cpu_netstats sum = { 0 }; + struct br_cpu_netstats tmp, sum = { 0 }; unsigned int cpu; for_each_possible_cpu(cpu) { + unsigned int start; const struct br_cpu_netstats *bstats = per_cpu_ptr(br->stats, cpu); - - sum.tx_bytes += bstats->tx_bytes; - sum.tx_packets += bstats->tx_packets; - sum.rx_bytes += bstats->rx_bytes; - sum.rx_packets += bstats->rx_packets; + do { + start = u64_stats_fetch_begin(&bstats->syncp); + memcpy(&tmp, bstats, sizeof(tmp)); + } while (u64_stats_fetch_retry(&bstats->syncp, start)); + sum.tx_bytes += tmp.tx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.rx_packets += tmp.rx_packets; } stats->tx_bytes = sum.tx_bytes; @@ -127,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) #ifdef CONFIG_BRIDGE_NETFILTER /* remember the MTU in the rtable for PMTU */ - br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu; + br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu; #endif return 0; @@ -199,73 +213,81 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) } #ifdef CONFIG_NET_POLL_CONTROLLER -static bool br_devices_support_netpoll(struct net_bridge *br) +static void br_poll_controller(struct net_device *br_dev) { - struct net_bridge_port *p; - bool ret = true; - int count = 0; - unsigned long flags; - - spin_lock_irqsave(&br->lock, flags); - list_for_each_entry(p, &br->port_list, list) { - count++; - if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) || - !p->dev->netdev_ops->ndo_poll_controller) - ret = false; - } - spin_unlock_irqrestore(&br->lock, flags); - return count != 0 && ret; } -static void br_poll_controller(struct net_device *br_dev) +static void br_netpoll_cleanup(struct net_device *dev) { - struct netpoll *np = br_dev->npinfo->netpoll; + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p, *n; - if (np->real_dev != br_dev) - netpoll_poll_dev(np->real_dev); + list_for_each_entry_safe(p, n, &br->port_list, list) { + br_netpoll_disable(p); + } } -void br_netpoll_cleanup(struct net_device *dev) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; - const struct net_device_ops *ops; + int err = 0; - br->dev->npinfo = NULL; list_for_each_entry_safe(p, n, &br->port_list, list) { - if (p->dev) { - ops = p->dev->netdev_ops; - if (ops->ndo_netpoll_cleanup) - ops->ndo_netpoll_cleanup(p->dev); - else - p->dev->npinfo = NULL; - } + if (!p->dev) + continue; + + err = br_netpoll_enable(p); + if (err) + goto fail; } + +out: + return err; + +fail: + br_netpoll_cleanup(dev); + goto out; } -void br_netpoll_disable(struct net_bridge *br, - struct net_device *dev) +int br_netpoll_enable(struct net_bridge_port *p) { - if (br_devices_support_netpoll(br)) - br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; - if (dev->netdev_ops->ndo_netpoll_cleanup) - dev->netdev_ops->ndo_netpoll_cleanup(dev); - else - dev->npinfo = NULL; + struct netpoll *np; + int err = 0; + + np = kzalloc(sizeof(*p->np), GFP_KERNEL); + err = -ENOMEM; + if (!np) + goto out; + + np->dev = p->dev; + + err = __netpoll_setup(np); + if (err) { + kfree(np); + goto out; + } + + p->np = np; + +out: + return err; } -void br_netpoll_enable(struct net_bridge *br, - struct net_device *dev) +void br_netpoll_disable(struct net_bridge_port *p) { - if (br_devices_support_netpoll(br)) { - br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; - if (br->dev->npinfo) - dev->npinfo = br->dev->npinfo; - } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) { - br->dev->priv_flags |= IFF_DISABLE_NETPOLL; - br_info(br,"new device %s does not support netpoll (disabling)", - dev->name); - } + struct netpoll *np = p->np; + + if (!np) + return; + + p->np = NULL; + + /* Wait for transmitting packets to finish before freeing. */ + synchronize_rcu_bh(); + + __netpoll_cleanup(np); + kfree(np); } #endif @@ -288,12 +310,13 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_start_xmit = br_dev_xmit, - .ndo_get_stats = br_get_stats, + .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, .ndo_set_multicast_list = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_setup = br_netpoll_setup, .ndo_netpoll_cleanup = br_netpoll_cleanup, .ndo_poll_controller = br_poll_controller, #endif diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 26637439965b..90512ccfd3e9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -128,7 +128,7 @@ void br_fdb_cleanup(unsigned long _data) { struct net_bridge *br = (struct net_bridge *)_data; unsigned long delay = hold_time(br); - unsigned long next_timer = jiffies + br->forward_delay; + unsigned long next_timer = jiffies + br->ageing_time; int i; spin_lock_bh(&br->hash_lock); @@ -149,9 +149,7 @@ void br_fdb_cleanup(unsigned long _data) } spin_unlock_bh(&br->hash_lock); - /* Add HZ/4 to ensure we round the jiffies upwards to be after the next - * timer, otherwise we might round down and will have no-op run. */ - mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4)); + mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); } /* Completely flush all dynamic entries in forwarding database.*/ @@ -216,7 +214,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */ +/* No locking or refcounting, assumes caller has rcu_read_lock */ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, const unsigned char *addr) { @@ -242,11 +240,11 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) struct net_bridge_fdb_entry *fdb; int ret; - if (!dev->br_port) + if (!br_port_exists(dev)) return 0; rcu_read_lock(); - fdb = __br_fdb_get(dev->br_port->br, addr); + fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr); ret = fdb && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; rcu_read_unlock(); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index a98ef1393097..cbfe87f0f34a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) kfree_skb(skb); else { skb_push(skb, ETH_HLEN); - -#ifdef CONFIG_NET_POLL_CONTROLLER - if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) { - netpoll_send_skb(skb->dev->npinfo->netpoll, skb); - skb->dev->priv_flags &= ~IFF_IN_NETPOLL; - } else -#endif - dev_queue_xmit(skb); + dev_queue_xmit(skb); } } @@ -73,23 +66,20 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { -#ifdef CONFIG_NET_POLL_CONTROLLER - struct net_bridge *br = to->br; - if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) { - struct netpoll *np; - to->dev->npinfo = skb->dev->npinfo; - np = skb->dev->npinfo->netpoll; - np->real_dev = np->dev = to->dev; - to->dev->priv_flags |= IFF_IN_NETPOLL; - } -#endif skb->dev = to->dev; + + if (unlikely(netpoll_tx_running(to->dev))) { + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) + kfree_skb(skb); + else { + skb_push(skb, ETH_HLEN); + br_netpoll_send_skb(to, skb); + } + return; + } + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, br_forward_finish); -#ifdef CONFIG_NET_POLL_CONTROLLER - if (skb->dev->npinfo) - skb->dev->npinfo->netpoll->dev = br->dev; -#endif } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) @@ -140,10 +130,10 @@ static int deliver_clone(const struct net_bridge_port *prev, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { - struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; - dev->stats.tx_dropped++; return -ENOMEM; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 18b245e2c00e..c03d2c3ff03e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -147,14 +147,17 @@ static void del_nbp(struct net_bridge_port *p) list_del_rcu(&p->list); - rcu_assign_pointer(dev->br_port, NULL); + dev->priv_flags &= ~IFF_BRIDGE_PORT; + + netdev_rx_handler_unregister(dev); br_multicast_del_port(p); kobject_uevent(&p->kobj, KOBJ_REMOVE); kobject_del(&p->kobj); - br_netpoll_disable(br, dev); + br_netpoll_disable(p); + call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -167,8 +170,6 @@ static void del_br(struct net_bridge *br, struct list_head *head) del_nbp(p); } - br_netpoll_cleanup(br->dev); - del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); @@ -400,7 +401,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return -ELOOP; /* Device is already being bridged */ - if (dev->br_port != NULL) + if (br_port_exists(dev)) return -EBUSY; /* No bridging devices that dislike that (e.g. wireless) */ @@ -428,7 +429,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - rcu_assign_pointer(dev->br_port, p); + if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) + goto err3; + + err = netdev_rx_handler_register(dev, br_handle_frame, p); + if (err) + goto err3; + + dev->priv_flags |= IFF_BRIDGE_PORT; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -448,9 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) kobject_uevent(&p->kobj, KOBJ_ADD); - br_netpoll_enable(br, dev); - return 0; +err3: + sysfs_remove_link(br->ifobj, p->dev->name); err2: br_fdb_delete_by_port(br, p, 1); err1: @@ -467,9 +476,13 @@ put_back: /* called with RTNL */ int br_del_if(struct net_bridge *br, struct net_device *dev) { - struct net_bridge_port *p = dev->br_port; + struct net_bridge_port *p; + + if (!br_port_exists(dev)) + return -EINVAL; - if (!p || p->br != br) + p = br_port_get(dev); + if (p->br != br) return -EINVAL; del_nbp(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index d36e700f7a26..826cd5221536 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -27,8 +27,10 @@ static int br_pass_frame_up(struct sk_buff *skb) struct net_bridge *br = netdev_priv(brdev); struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; brstats->rx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); indev = skb->dev; skb->dev = brdev; @@ -37,11 +39,11 @@ static int br_pass_frame_up(struct sk_buff *skb) netif_receive_skb); } -/* note: already called with rcu_read_lock (preempt_disabled) */ +/* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct net_bridge *br; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; @@ -108,13 +110,12 @@ drop: goto out; } -/* note: already called with rcu_read_lock (preempt_disabled) */ +/* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct sk_buff *skb) { - struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge_port *p = br_port_get_rcu(skb->dev); - if (p) - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_fdb_update(p->br, p, eth_hdr(skb)->h_source); return 0; /* process further */ } @@ -131,15 +132,18 @@ static inline int is_link_local(const unsigned char *dest) } /* - * Called via br_handle_frame_hook. * Return NULL if skb is handled - * note: already called with rcu_read_lock (preempt_disabled) + * note: already called with rcu_read_lock */ -struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) +struct sk_buff *br_handle_frame(struct sk_buff *skb) { + struct net_bridge_port *p; const unsigned char *dest = eth_hdr(skb)->h_dest; int (*rhook)(struct sk_buff *skb); + if (skb->pkt_type == PACKET_LOOPBACK) + return skb; + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; @@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (!skb) return NULL; + p = br_port_get_rcu(skb->dev); + if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9d21d98ae5fa..eb5b256ffc88 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -99,6 +99,15 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( return NULL; } +static struct net_bridge_mdb_entry *br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, struct br_ip *dst) +{ + if (!mdb) + return NULL; + + return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); +} + static struct net_bridge_mdb_entry *br_mdb_ip4_get( struct net_bridge_mdb_htable *mdb, __be32 dst) { @@ -107,7 +116,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get( br_dst.u.ip4 = dst; br_dst.proto = htons(ETH_P_IP); - return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst)); + return br_mdb_ip_get(mdb, &br_dst); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -119,23 +128,17 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get( ipv6_addr_copy(&br_dst.u.ip6, dst); br_dst.proto = htons(ETH_P_IPV6); - return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst)); + return br_mdb_ip_get(mdb, &br_dst); } #endif -static struct net_bridge_mdb_entry *br_mdb_ip_get( - struct net_bridge_mdb_htable *mdb, struct br_ip *dst) -{ - return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); -} - struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { struct net_bridge_mdb_htable *mdb = br->mdb; struct br_ip ip; - if (!mdb || br->multicast_disabled) + if (br->multicast_disabled) return NULL; if (BR_INPUT_SKB_CB(skb)->igmp) @@ -1432,7 +1435,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct icmp6hdr *icmp6h; u8 nexthdr; unsigned len; - unsigned offset; + int offset; int err; if (!pskb_may_pull(skb, sizeof(*ip6h))) @@ -1725,13 +1728,9 @@ unlock: int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; - int err = -ENOENT; + int err = 0; spin_lock(&br->multicast_lock); - if (!netif_running(br->dev)) - goto unlock; - - err = 0; if (br->multicast_disabled == !val) goto unlock; @@ -1739,6 +1738,9 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (br->multicast_disabled) goto unlock; + if (!netif_running(br->dev)) + goto unlock; + if (br->mdb) { if (br->mdb->old) { err = -EEXIST; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 44420992f72f..2c911c0759c2 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -55,6 +55,9 @@ static int brnf_call_arptables __read_mostly = 1; static int brnf_filter_vlan_tagged __read_mostly = 0; static int brnf_filter_pppoe_tagged __read_mostly = 0; #else +#define brnf_call_iptables 1 +#define brnf_call_ip6tables 1 +#define brnf_call_arptables 1 #define brnf_filter_vlan_tagged 0 #define brnf_filter_pppoe_tagged 0 #endif @@ -117,26 +120,27 @@ void br_netfilter_rtable_init(struct net_bridge *br) { struct rtable *rt = &br->fake_rtable; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.dev = br->dev; - rt->u.dst.path = &rt->u.dst; - rt->u.dst.metrics[RTAX_MTU - 1] = 1500; - rt->u.dst.flags = DST_NOXFRM; - rt->u.dst.ops = &fake_dst_ops; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; + rt->dst.path = &rt->dst; + rt->dst.metrics[RTAX_MTU - 1] = 1500; + rt->dst.flags = DST_NOXFRM; + rt->dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->br_port); - - return port ? &port->br->fake_rtable : NULL; + if (!br_port_exists(dev)) + return NULL; + return &br_port_get_rcu(dev)->br->fake_rtable; } static inline struct net_device *bridge_parent(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->br_port); + if (!br_port_exists(dev)) + return NULL; - return port ? port->br->dev : NULL; + return br_port_get_rcu(dev)->br->dev; } static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) @@ -244,8 +248,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) kfree_skb(skb); return 0; } - dst_hold(&rt->u.dst); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); @@ -396,8 +399,7 @@ bridged_dnat: kfree_skb(skb); return 0; } - dst_hold(&rt->u.dst); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); } skb->dev = nf_bridge->physindev; @@ -545,25 +547,30 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net_bridge_port *p; + struct net_bridge *br; struct iphdr *iph; __u32 len = nf_bridge_encap_header_len(skb); if (unlikely(!pskb_may_pull(skb, len))) goto out; + p = br_port_get_rcu(in); + if (p == NULL) + goto out; + br = p->br; + if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { -#ifdef CONFIG_SYSCTL - if (!brnf_call_ip6tables) + if (!brnf_call_ip6tables && !br->nf_call_ip6tables) return NF_ACCEPT; -#endif + nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); } -#ifdef CONFIG_SYSCTL - if (!brnf_call_iptables) + + if (!brnf_call_iptables && !br->nf_call_iptables) return NF_ACCEPT; -#endif if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) @@ -591,6 +598,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, pskb_trim_rcsum(skb, len); + /* BUG: Should really parse the IP options here. */ + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + nf_bridge_put(skb->nf_bridge); if (!nf_bridge_alloc(skb)) return NF_DROP; @@ -716,12 +726,17 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net_bridge_port *p; + struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); -#ifdef CONFIG_SYSCTL - if (!brnf_call_arptables) + p = br_port_get_rcu(out); + if (p == NULL) + return NF_ACCEPT; + br = p->br; + + if (!brnf_call_arptables && !br->nf_call_arptables) return NF_ACCEPT; -#endif if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe0a79018ab2..4a6a378c84e3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -120,10 +120,11 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; for_each_netdev(net, dev) { /* not a bridge port */ - if (dev->br_port == NULL || idx < cb->args[0]) + if (!br_port_exists(dev) || idx < cb->args[0]) goto skip; - if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid, + if (br_fill_ifinfo(skb, br_port_get(dev), + NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) break; @@ -168,9 +169,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev) return -ENODEV; - p = dev->br_port; - if (!p) + if (!br_port_exists(dev)) return -EINVAL; + p = br_port_get(dev); /* if kernel STP is running, don't allow changes */ if (p->br->stp_enabled == BR_KERNEL_STP) diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 717e1fd6133c..404d4e14c6a7 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -32,14 +32,15 @@ struct notifier_block br_device_notifier = { static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net_bridge_port *p = dev->br_port; + struct net_bridge_port *p = br_port_get(dev); struct net_bridge *br; int err; /* not a port of a bridge */ - if (p == NULL) + if (!br_port_exists(dev)) return NOTIFY_DONE; + p = br_port_get(dev); br = p->br; switch (event) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0f4a74bc6a9b..75c90edaf7db 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -15,6 +15,8 @@ #include <linux/netdevice.h> #include <linux/if_bridge.h> +#include <linux/netpoll.h> +#include <linux/u64_stats_sync.h> #include <net/route.h> #define BR_HASH_BITS 8 @@ -143,13 +145,23 @@ struct net_bridge_port #ifdef CONFIG_SYSFS char sysfs_name[IFNAMSIZ]; #endif + +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *np; +#endif }; +#define br_port_get_rcu(dev) \ + ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data)) +#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data) +#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) + struct br_cpu_netstats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; }; struct net_bridge @@ -164,6 +176,9 @@ struct net_bridge unsigned long feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; + bool nf_call_iptables; + bool nf_call_ip6tables; + bool nf_call_arptables; #endif unsigned long flags; #define BR_SET_MAC_ADDR 0x00000001 @@ -273,16 +288,41 @@ extern void br_dev_setup(struct net_device *dev); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER -extern void br_netpoll_cleanup(struct net_device *dev); -extern void br_netpoll_enable(struct net_bridge *br, - struct net_device *dev); -extern void br_netpoll_disable(struct net_bridge *br, - struct net_device *dev); +static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) +{ + return br->dev->npinfo; +} + +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + struct netpoll *np = p->np; + + if (np) + netpoll_send_skb(np, skb); +} + +extern int br_netpoll_enable(struct net_bridge_port *p); +extern void br_netpoll_disable(struct net_bridge_port *p); #else -#define br_netpoll_cleanup(br) -#define br_netpoll_enable(br, dev) -#define br_netpoll_disable(br, dev) +static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) +{ + return NULL; +} + +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} +static inline int br_netpoll_enable(struct net_bridge_port *p) +{ + return 0; +} + +static inline void br_netpoll_disable(struct net_bridge_port *p) +{ +} #endif /* br_fdb.c */ @@ -331,8 +371,7 @@ extern void br_features_recompute(struct net_bridge *br); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); -extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, - struct sk_buff *skb); +extern struct sk_buff *br_handle_frame(struct sk_buff *skb); /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 217bd225a42f..35cf27087b56 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -131,18 +131,19 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) /* * Called from llc. * - * NO locks, but rcu_read_lock (preempt_disabled) + * NO locks, but rcu_read_lock */ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_device *dev) { const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = rcu_dereference(dev->br_port); + struct net_bridge_port *p; struct net_bridge *br; const unsigned char *buf; - if (!p) + if (!br_port_exists(dev)) goto err; + p = br_port_get_rcu(dev); if (!pskb_may_pull(skb, 4)) goto err; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 486b8f3861d2..5c1e5559ebba 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -611,6 +611,73 @@ static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, show_multicast_startup_query_interval, store_multicast_startup_query_interval); #endif +#ifdef CONFIG_BRIDGE_NETFILTER +static ssize_t show_nf_call_iptables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_iptables); +} + +static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_iptables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_iptables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_iptables); +} +static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR, + show_nf_call_iptables, store_nf_call_iptables); + +static ssize_t show_nf_call_ip6tables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_ip6tables); +} + +static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_ip6tables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_ip6tables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); +} +static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR, + show_nf_call_ip6tables, store_nf_call_ip6tables); + +static ssize_t show_nf_call_arptables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_arptables); +} + +static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_arptables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_arptables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_arptables); +} +static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, + show_nf_call_arptables, store_nf_call_arptables); +#endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, @@ -645,6 +712,11 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, #endif +#ifdef CONFIG_BRIDGE_NETFILTER + &dev_attr_nf_call_iptables.attr, + &dev_attr_nf_call_ip6tables.attr, + &dev_attr_nf_call_arptables.attr, +#endif NULL }; diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 9e19166ba453..46624bb6d9be 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -24,8 +24,9 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) return EBT_DROP; if (par->hooknum != NF_BR_BROUTING) + /* rcu_read_lock()ed by nf_hook_slow */ memcpy(eth_hdr(skb)->h_dest, - par->in->br_port->br->dev->dev_addr, ETH_ALEN); + br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN); else memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index ae3c7cef1484..26377e96fa1c 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -177,8 +177,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (in) { strcpy(pm->physindev, in->name); /* If in isn't a bridge, then physindev==indev */ - if (in->br_port) - strcpy(pm->indev, in->br_port->br->dev->name); + if (br_port_exists(in)) + /* rcu_read_lock()ed by nf_hook_slow */ + strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); else strcpy(pm->indev, in->name); } else @@ -187,7 +188,8 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); - strcpy(pm->outdev, out->br_port->br->dev->name); + /* rcu_read_lock()ed by nf_hook_slow */ + strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); } else pm->outdev[0] = pm->physoutdev[0] = '\0'; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 59ca00e40dec..bcc102e3be4d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -140,11 +140,14 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h, return 1; if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) return 1; - if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN)) + /* rcu_read_lock()ed by nf_hook_slow */ + if (in && br_port_exists(in) && + FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev), + EBT_ILOGICALIN)) return 1; - if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT)) + if (out && br_port_exists(out) && + FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev), + EBT_ILOGICALOUT)) return 1; if (e->bitmask & EBT_SOURCEMAC) { diff --git a/net/caif/Kconfig b/net/caif/Kconfig index ed651786f16b..529750da9624 100644 --- a/net/caif/Kconfig +++ b/net/caif/Kconfig @@ -21,19 +21,18 @@ menuconfig CAIF See Documentation/networking/caif for a further explanation on how to use and configure CAIF. -if CAIF - config CAIF_DEBUG bool "Enable Debug" + depends on CAIF default n --- help --- Enable the inclusion of debug code in the CAIF stack. Be aware that doing this will impact performance. If unsure say N. - config CAIF_NETDEV tristate "CAIF GPRS Network device" + depends on CAIF default CAIF ---help--- Say Y if you will be using a CAIF based GPRS network device. @@ -41,5 +40,3 @@ config CAIF_NETDEV If you select to build it as a built-in then the main CAIF device must also be a built-in. If unsure say Y. - -endif diff --git a/net/caif/Makefile b/net/caif/Makefile index 34852af2595e..f87481fb0e65 100644 --- a/net/caif/Makefile +++ b/net/caif/Makefile @@ -1,23 +1,13 @@ -ifeq ($(CONFIG_CAIF_DEBUG),1) -CAIF_DBG_FLAGS := -DDEBUG +ifeq ($(CONFIG_CAIF_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG endif -ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS) - caif-objs := caif_dev.o \ cfcnfg.o cfmuxl.o cfctrl.o \ cffrml.o cfveil.o cfdbgl.o\ cfserl.o cfdgml.o \ cfrfml.o cfvidl.o cfutill.o \ cfsrvl.o cfpkt_skbuff.o caif_config_util.o -clean-dirs:= .tmp_versions - -clean-files:= \ - Module.symvers \ - modules.order \ - *.cmd \ - *.o \ - *~ obj-$(CONFIG_CAIF) += caif.o obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c index 6f36580366f0..76ae68303d3a 100644 --- a/net/caif/caif_config_util.c +++ b/net/caif/caif_config_util.c @@ -80,6 +80,11 @@ int connect_req_to_link_param(struct cfcnfg *cnfg, l->u.utility.paramlen); break; + case CAIFPROTO_DEBUG: + l->linktype = CFCTRL_SRV_DBG; + l->endpoint = s->sockaddr.u.dbg.service; + l->chtype = s->sockaddr.u.dbg.type; + break; default: return -EINVAL; } diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index e2b86f1f5a47..0b586e9d1378 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -255,7 +255,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, pref = CFPHYPREF_HIGH_BW; break; } - + dev_hold(dev); cfcnfg_add_phy_layer(get_caif_conf(), phy_type, dev, @@ -285,6 +285,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, caifd->layer.up->ctrlcmd(caifd->layer.up, _CAIF_CTRLCMD_PHYIF_DOWN_IND, caifd->layer.id); + might_sleep(); res = wait_event_interruptible_timeout(caifd->event, atomic_read(&caifd->in_use) == 0, TIMEOUT); @@ -300,6 +301,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, "Unregistering an active CAIF device: %s\n", __func__, dev->name); cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); + dev_put(dev); atomic_set(&caifd->state, what); break; @@ -326,7 +328,8 @@ struct cfcnfg *get_caif_conf(void) EXPORT_SYMBOL(get_caif_conf); int caif_connect_client(struct caif_connect_request *conn_req, - struct cflayer *client_layer) + struct cflayer *client_layer, int *ifindex, + int *headroom, int *tailroom) { struct cfctrl_link_param param; int ret; @@ -334,8 +337,9 @@ int caif_connect_client(struct caif_connect_request *conn_req, if (ret) return ret; /* Hook up the adaptation layer. */ - return cfcnfg_add_adaptation_layer(get_caif_conf(), - ¶m, client_layer); + return cfcnfg_add_adaptation_layer(get_caif_conf(), ¶m, + client_layer, ifindex, + headroom, tailroom); } EXPORT_SYMBOL(caif_connect_client); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 3d0e09584fae..8ce904786116 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -28,8 +28,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(AF_CAIF); -#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10) -#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100) +#define CAIF_DEF_SNDBUF (4096*10) +#define CAIF_DEF_RCVBUF (4096*100) /* * CAIF state is re-using the TCP socket states. @@ -76,6 +76,7 @@ struct caifsock { struct caif_connect_request conn_req; struct mutex readlock; struct dentry *debugfs_socket_dir; + int headroom, tailroom, maxframe; }; static int rx_flow_is_on(struct caifsock *cf_sk) @@ -594,27 +595,32 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, goto err; noblock = msg->msg_flags & MSG_DONTWAIT; - buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM; - - ret = -EMSGSIZE; - if (buffer_size > CAIF_MAX_PAYLOAD_SIZE) - goto err; - timeo = sock_sndtimeo(sk, noblock); timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk), 1, timeo, &ret); + if (ret) + goto err; ret = -EPIPE; if (cf_sk->sk.sk_state != CAIF_CONNECTED || sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) goto err; + /* Error if trying to write more than maximum frame size. */ + ret = -EMSGSIZE; + if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM) + goto err; + + buffer_size = len + cf_sk->headroom + cf_sk->tailroom; + ret = -ENOMEM; skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret); - if (!skb) + + if (!skb || skb_tailroom(skb) < buffer_size) goto err; - skb_reserve(skb, CAIF_NEEDED_HEADROOM); + + skb_reserve(skb, cf_sk->headroom); ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); @@ -645,7 +651,6 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, long timeo; err = -EOPNOTSUPP; - if (unlikely(msg->msg_flags&MSG_OOB)) goto out_err; @@ -662,8 +667,8 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = len-sent; - if (size > CAIF_MAX_PAYLOAD_SIZE) - size = CAIF_MAX_PAYLOAD_SIZE; + if (size > cf_sk->maxframe) + size = cf_sk->maxframe; /* If size is more than half of sndbuf, chop up message */ if (size > ((sk->sk_sndbuf >> 1) - 64)) @@ -673,14 +678,14 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = SKB_MAX_ALLOC; skb = sock_alloc_send_skb(sk, - size + CAIF_NEEDED_HEADROOM - + CAIF_NEEDED_TAILROOM, + size + cf_sk->headroom + + cf_sk->tailroom, msg->msg_flags&MSG_DONTWAIT, &err); if (skb == NULL) goto out_err; - skb_reserve(skb, CAIF_NEEDED_HEADROOM); + skb_reserve(skb, cf_sk->headroom); /* * If you pass two values to the sock_alloc_send_skb * it tries to grab the large buffer with GFP_NOFS @@ -821,17 +826,15 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); long timeo; int err; + int ifindex, headroom, tailroom; + struct net_device *dev; + lock_sock(sk); err = -EAFNOSUPPORT; if (uaddr->sa_family != AF_CAIF) goto out; - err = -ESOCKTNOSUPPORT; - if (unlikely(!(sk->sk_type == SOCK_STREAM && - cf_sk->sk.sk_protocol == CAIFPROTO_AT) && - sk->sk_type != SOCK_SEQPACKET)) - goto out; switch (sock->state) { case SS_UNCONNECTED: /* Normal case, a fresh connect */ @@ -874,8 +877,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, sk_stream_kill_queues(&cf_sk->sk); err = -EINVAL; - if (addr_len != sizeof(struct sockaddr_caif) || - !uaddr) + if (addr_len != sizeof(struct sockaddr_caif)) goto out; memcpy(&cf_sk->conn_req.sockaddr, uaddr, @@ -888,12 +890,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, dbfs_atomic_inc(&cnt.num_connect_req); cf_sk->layer.receive = caif_sktrecv_cb; err = caif_connect_client(&cf_sk->conn_req, - &cf_sk->layer); + &cf_sk->layer, &ifindex, &headroom, &tailroom); if (err < 0) { cf_sk->sk.sk_socket->state = SS_UNCONNECTED; cf_sk->sk.sk_state = CAIF_DISCONNECTED; goto out; } + dev = dev_get_by_index(sock_net(sk), ifindex); + cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom); + cf_sk->tailroom = tailroom; + cf_sk->maxframe = dev->mtu - (headroom + tailroom); + dev_put(dev); + if (cf_sk->maxframe < 1) { + pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n", + __func__, dev->mtu); + err = -ENODEV; + goto out; + } err = -EINPROGRESS; wait_connect: diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index df43f264d9fb..1c29189b344d 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/slab.h> +#include <linux/netdevice.h> #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/cfcnfg.h> @@ -22,6 +23,7 @@ #define PHY_NAME_LEN 20 #define container_obj(layr) container_of(layr, struct cfcnfg, layer) +#define RFM_FRAGMENT_SIZE 4030 /* Information about CAIF physical interfaces held by Config Module in order * to manage physical interfaces @@ -41,6 +43,15 @@ struct cfcnfg_phyinfo { /* Information about the physical device */ struct dev_info dev_info; + + /* Interface index */ + int ifindex; + + /* Use Start of frame extension */ + bool use_stx; + + /* Use Start of frame checksum */ + bool use_fcs; }; struct cfcnfg { @@ -248,9 +259,20 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id) { } +int protohead[CFCTRL_SRV_MASK] = { + [CFCTRL_SRV_VEI] = 4, + [CFCTRL_SRV_DATAGRAM] = 7, + [CFCTRL_SRV_UTIL] = 4, + [CFCTRL_SRV_RFM] = 3, + [CFCTRL_SRV_DBG] = 3, +}; + int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, struct cfctrl_link_param *param, - struct cflayer *adap_layer) + struct cflayer *adap_layer, + int *ifindex, + int *proto_head, + int *proto_tail) { struct cflayer *frml; if (adap_layer == NULL) { @@ -276,6 +298,14 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, param->phyid); caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id == param->phyid); + + *ifindex = cnfg->phy_layers[param->phyid].ifindex; + *proto_head = + protohead[param->linktype]+ + (cnfg->phy_layers[param->phyid].use_stx ? 1 : 0); + + *proto_tail = 2; + /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */ cfctrl_enum_req(cnfg->ctrl, param->phyid); return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer); @@ -297,6 +327,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, struct cfcnfg *cnfg = container_obj(layer); struct cflayer *servicel = NULL; struct cfcnfg_phyinfo *phyinfo; + struct net_device *netdev; + if (adapt_layer == NULL) { pr_debug("CAIF: %s(): link setup response " "but no client exist, send linkdown back\n", @@ -308,19 +340,15 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, caif_assert(cnfg != NULL); caif_assert(phyid != 0); phyinfo = &cnfg->phy_layers[phyid]; - caif_assert(phyinfo != NULL); caif_assert(phyinfo->id == phyid); caif_assert(phyinfo->phy_layer != NULL); caif_assert(phyinfo->phy_layer->id == phyid); - if (phyinfo != NULL && - phyinfo->phy_ref_count++ == 0 && - phyinfo->phy_layer != NULL && + phyinfo->phy_ref_count++; + if (phyinfo->phy_ref_count == 1 && phyinfo->phy_layer->modemcmd != NULL) { - caif_assert(phyinfo->phy_layer->id == phyid); phyinfo->phy_layer->modemcmd(phyinfo->phy_layer, _CAIF_MODEMCMD_PHYIF_USEFULL); - } adapt_layer->id = channel_id; @@ -332,7 +360,9 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, servicel = cfdgml_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_RFM: - servicel = cfrfml_create(channel_id, &phyinfo->dev_info); + netdev = phyinfo->dev_info.dev; + servicel = cfrfml_create(channel_id, &phyinfo->dev_info, + netdev->mtu); break; case CFCTRL_SRV_UTIL: servicel = cfutill_create(channel_id, &phyinfo->dev_info); @@ -363,8 +393,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, void cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, - void *dev, struct cflayer *phy_layer, u16 *phyid, - enum cfcnfg_phy_preference pref, + struct net_device *dev, struct cflayer *phy_layer, + u16 *phyid, enum cfcnfg_phy_preference pref, bool fcs, bool stx) { struct cflayer *frml; @@ -418,6 +448,10 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, cnfg->phy_layers[*phyid].dev_info.dev = dev; cnfg->phy_layers[*phyid].phy_layer = phy_layer; cnfg->phy_layers[*phyid].phy_ref_count = 0; + cnfg->phy_layers[*phyid].ifindex = dev->ifindex; + cnfg->phy_layers[*phyid].use_stx = stx; + cnfg->phy_layers[*phyid].use_fcs = fcs; + phy_layer->type = phy_type; frml = cffrml_create(*phyid, fcs); if (!frml) { diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index fcfda98a5e6d..563145fdc4c3 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -19,7 +19,7 @@ #ifdef CAIF_NO_LOOP static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt){ - return CAIF_FAILURE; + return -1; } #else static int handle_loop(struct cfctrl *ctrl, @@ -43,7 +43,7 @@ struct cflayer *cfctrl_create(void) memset(&dev_info, 0, sizeof(dev_info)); dev_info.id = 0xff; memset(this, 0, sizeof(*this)); - cfsrvl_init(&this->serv, 0, &dev_info); + cfsrvl_init(&this->serv, 0, &dev_info, false); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); this->serv.layer.receive = cfctrl_recv; @@ -395,7 +395,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) cmd = cmdrsp & CFCTRL_CMD_MASK; if (cmd != CFCTRL_CMD_LINK_ERR && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) { - if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) + if (handle_loop(cfctrl, cmd, pkt) != 0) cmdrsp |= CFCTRL_ERR_BIT; } @@ -647,6 +647,6 @@ found: default: break; } - return CAIF_SUCCESS; + return 0; } #endif diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index ab6b6dc34cf8..676648cac8dd 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -22,7 +22,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(dbg, 0, sizeof(struct cfsrvl)); - cfsrvl_init(dbg, channel_id, dev_info); + cfsrvl_init(dbg, channel_id, dev_info, false); dbg->layer.receive = cfdbgl_receive; dbg->layer.transmit = cfdbgl_transmit; snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id); diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 53194840ecb6..ed9d53aff280 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -17,6 +17,7 @@ #define DGM_FLOW_OFF 0x81 #define DGM_FLOW_ON 0x80 #define DGM_CTRL_PKT_SIZE 1 +#define DGM_MTU 1500 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt); @@ -30,7 +31,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(dgm, 0, sizeof(struct cfsrvl)); - cfsrvl_init(dgm, channel_id, dev_info); + cfsrvl_init(dgm, channel_id, dev_info, true); dgm->layer.receive = cfdgml_receive; dgm->layer.transmit = cfdgml_transmit; snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id); @@ -89,6 +90,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) if (!cfsrvl_ready(service, &ret)) return ret; + /* STE Modem cannot handle more than 1500 bytes datagrams */ + if (cfpkt_getlen(pkt) > DGM_MTU) + return -EMSGSIZE; + cfpkt_add_head(pkt, &zero, 4); /* Add info for MUX-layer to route the packet out. */ diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index a6fdf899741a..01f238ff2346 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -9,8 +9,8 @@ #include <linux/hardirq.h> #include <net/caif/cfpkt.h> -#define PKT_PREFIX CAIF_NEEDED_HEADROOM -#define PKT_POSTFIX CAIF_NEEDED_TAILROOM +#define PKT_PREFIX 16 +#define PKT_POSTFIX 2 #define PKT_LEN_WHEN_EXTENDING 128 #define PKT_ERROR(pkt, errmsg) do { \ cfpkt_priv(pkt)->erronous = true; \ @@ -338,7 +338,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, u16 dstlen; u16 createlen; if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) { - cfpkt_destroy(addpkt); return dstpkt; } if (expectlen > addlen) diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index fd27b172fb5d..eb1602022ac0 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -7,102 +7,304 @@ #include <linux/stddef.h> #include <linux/spinlock.h> #include <linux/slab.h> +#include <linux/unaligned/le_byteshift.h> #include <net/caif/caif_layer.h> #include <net/caif/cfsrvl.h> #include <net/caif/cfpkt.h> -#define container_obj(layr) container_of(layr, struct cfsrvl, layer) - +#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer) #define RFM_SEGMENTATION_BIT 0x01 -#define RFM_PAYLOAD 0x00 -#define RFM_CMD_BIT 0x80 -#define RFM_FLOW_OFF 0x81 -#define RFM_FLOW_ON 0x80 -#define RFM_SET_PIN 0x82 -#define RFM_CTRL_PKT_SIZE 1 +#define RFM_HEAD_SIZE 7 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt); -static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl); -struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info) +struct cfrfml { + struct cfsrvl serv; + struct cfpkt *incomplete_frm; + int fragment_size; + u8 seghead[6]; + u16 pdu_size; + /* Protects serialized processing of packets */ + spinlock_t sync; +}; + +static void cfrfml_release(struct kref *kref) +{ + struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref); + struct cfrfml *rfml = container_obj(&srvl->layer); + + if (rfml->incomplete_frm) + cfpkt_destroy(rfml->incomplete_frm); + + kfree(srvl); +} + +struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, + int mtu_size) { - struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!rfm) { + int tmp; + struct cfrfml *this = + kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); + + if (!this) { pr_warning("CAIF: %s(): Out of memory\n", __func__); return NULL; } - caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(rfm, 0, sizeof(struct cfsrvl)); - cfsrvl_init(rfm, channel_id, dev_info); - rfm->layer.modemcmd = cfservl_modemcmd; - rfm->layer.receive = cfrfml_receive; - rfm->layer.transmit = cfrfml_transmit; - snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id); - return &rfm->layer; + + cfsrvl_init(&this->serv, channel_id, dev_info, false); + this->serv.release = cfrfml_release; + this->serv.layer.receive = cfrfml_receive; + this->serv.layer.transmit = cfrfml_transmit; + + /* Round down to closest multiple of 16 */ + tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16; + tmp *= 16; + + this->fragment_size = tmp; + spin_lock_init(&this->sync); + snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ, + "rfm%d", channel_id); + + return &this->serv.layer; } -static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) +static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, + struct cfpkt *pkt, int *err) { - return -EPROTO; + struct cfpkt *tmppkt; + *err = -EPROTO; + /* n-th but not last segment */ + + if (cfpkt_extr_head(pkt, seghead, 6) < 0) + return NULL; + + /* Verify correct header */ + if (memcmp(seghead, rfml->seghead, 6) != 0) + return NULL; + + tmppkt = cfpkt_append(rfml->incomplete_frm, pkt, + rfml->pdu_size + RFM_HEAD_SIZE); + + /* If cfpkt_append failes input pkts are not freed */ + *err = -ENOMEM; + if (tmppkt == NULL) + return NULL; + + *err = 0; + return tmppkt; } static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt) { u8 tmp; bool segmented; - int ret; + int err; + u8 seghead[6]; + struct cfrfml *rfml; + struct cfpkt *tmppkt = NULL; + caif_assert(layr->up != NULL); caif_assert(layr->receive != NULL); + rfml = container_obj(layr); + spin_lock(&rfml->sync); + + err = -EPROTO; + if (cfpkt_extr_head(pkt, &tmp, 1) < 0) + goto out; + segmented = tmp & RFM_SEGMENTATION_BIT; + + if (segmented) { + if (rfml->incomplete_frm == NULL) { + /* Initial Segment */ + if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0) + goto out; + + rfml->pdu_size = get_unaligned_le16(rfml->seghead+4); + + if (cfpkt_erroneous(pkt)) + goto out; + rfml->incomplete_frm = pkt; + pkt = NULL; + } else { + + tmppkt = rfm_append(rfml, seghead, pkt, &err); + if (tmppkt == NULL) + goto out; + + if (cfpkt_erroneous(tmppkt)) + goto out; + + rfml->incomplete_frm = tmppkt; + + + if (cfpkt_erroneous(tmppkt)) + goto out; + } + err = 0; + goto out; + } + + if (rfml->incomplete_frm) { + + /* Last Segment */ + tmppkt = rfm_append(rfml, seghead, pkt, &err); + if (tmppkt == NULL) + goto out; + + if (cfpkt_erroneous(tmppkt)) + goto out; + + rfml->incomplete_frm = NULL; + pkt = tmppkt; + tmppkt = NULL; + + /* Verify that length is correct */ + err = EPROTO; + if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1) + goto out; + } + + err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt); + +out: + + if (err != 0) { + if (tmppkt) + cfpkt_destroy(tmppkt); + if (pkt) + cfpkt_destroy(pkt); + if (rfml->incomplete_frm) + cfpkt_destroy(rfml->incomplete_frm); + rfml->incomplete_frm = NULL; + + pr_info("CAIF: %s(): " + "Connection error %d triggered on RFM link\n", + __func__, err); + + /* Trigger connection error upon failure.*/ + layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, + rfml->serv.dev_info.id); + } + spin_unlock(&rfml->sync); + return err; +} + + +static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) +{ + caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size); + + /* Add info for MUX-layer to route the packet out. */ + cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; /* - * RFM is taking care of segmentation and stripping of - * segmentation bit. + * To optimize alignment, we add up the size of CAIF header before + * payload. */ - if (cfpkt_extr_head(pkt, &tmp, 1) < 0) { - pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); - cfpkt_destroy(pkt); - return -EPROTO; - } - segmented = tmp & RFM_SEGMENTATION_BIT; - caif_assert(!segmented); + cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE; + cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info; - ret = layr->up->receive(layr->up, pkt); - return ret; + return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt); } static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt) { - u8 tmp = 0; - int ret; - struct cfsrvl *service = container_obj(layr); + int err; + u8 seg; + u8 head[6]; + struct cfpkt *rearpkt = NULL; + struct cfpkt *frontpkt = pkt; + struct cfrfml *rfml = container_obj(layr); caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); - if (!cfsrvl_ready(service, &ret)) - return ret; + if (!cfsrvl_ready(&rfml->serv, &err)) + return err; + + err = -EPROTO; + if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1) + goto out; + + err = 0; + if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE) + err = cfpkt_peek_head(pkt, head, 6); + + if (err < 0) + goto out; + + while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) { + + seg = 1; + err = -EPROTO; + + if (cfpkt_add_head(frontpkt, &seg, 1) < 0) + goto out; + /* + * On OOM error cfpkt_split returns NULL. + * + * NOTE: Segmented pdu is not correctly aligned. + * This has negative performance impact. + */ + + rearpkt = cfpkt_split(frontpkt, rfml->fragment_size); + if (rearpkt == NULL) + goto out; + + err = cfrfml_transmit_segment(rfml, frontpkt); + + if (err != 0) + goto out; + frontpkt = rearpkt; + rearpkt = NULL; + + err = -ENOMEM; + if (frontpkt == NULL) + goto out; + err = -EPROTO; + if (cfpkt_add_head(frontpkt, head, 6) < 0) + goto out; - if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { - pr_err("CAIF: %s():Packet too large - size=%d\n", - __func__, cfpkt_getlen(pkt)); - return -EOVERFLOW; } - if (cfpkt_add_head(pkt, &tmp, 1) < 0) { - pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); - return -EPROTO; + + seg = 0; + err = -EPROTO; + + if (cfpkt_add_head(frontpkt, &seg, 1) < 0) + goto out; + + err = cfrfml_transmit_segment(rfml, frontpkt); + + frontpkt = NULL; +out: + + if (err != 0) { + pr_info("CAIF: %s(): " + "Connection error %d triggered on RFM link\n", + __func__, err); + /* Trigger connection error upon failure.*/ + + layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, + rfml->serv.dev_info.id); + + if (rearpkt) + cfpkt_destroy(rearpkt); + + if (frontpkt && frontpkt != pkt) { + + cfpkt_destroy(frontpkt); + /* + * Socket layer will free the original packet, + * but this packet may already be sent and + * freed. So we have to return 0 in this case + * to avoid socket layer to re-free this packet. + * The return of shutdown indication will + * cause connection to be invalidated anyhow. + */ + err = 0; + } } - /* Add info for MUX-layer to route the packet out. */ - cfpkt_info(pkt)->channel_id = service->layer.id; - /* - * To optimize alignment, we add up the size of CAIF header before - * payload. - */ - cfpkt_info(pkt)->hdr_len = 1; - cfpkt_info(pkt)->dev_info = &service->dev_info; - ret = layr->dn->transmit(layr->dn, pkt); - if (ret < 0) - cfpkt_extr_head(pkt, &tmp, 1); - return ret; + return err; } diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 965c5baace40..a11fbd68a13d 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -14,7 +14,8 @@ #define container_obj(layr) ((struct cfserl *) layr) #define CFSERL_STX 0x02 -#define CAIF_MINIUM_PACKET_SIZE 4 +#define SERIAL_MINIUM_PACKET_SIZE 4 +#define SERIAL_MAX_FRAMESIZE 4096 struct cfserl { struct cflayer layer; struct cfpkt *incomplete_frm; @@ -119,8 +120,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) /* * Frame error handling */ - if (expectlen < CAIF_MINIUM_PACKET_SIZE - || expectlen > CAIF_MAX_FRAMESIZE) { + if (expectlen < SERIAL_MINIUM_PACKET_SIZE + || expectlen > SERIAL_MAX_FRAMESIZE) { if (!layr->usestx) { if (pkt != NULL) cfpkt_destroy(pkt); diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 6e5b7079a684..f40939a91211 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -24,8 +24,10 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid) { struct cfsrvl *service = container_obj(layr); + caif_assert(layr->up != NULL); caif_assert(layr->up->ctrlcmd != NULL); + switch (ctrl) { case CAIF_CTRLCMD_INIT_RSP: service->open = true; @@ -89,9 +91,14 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) { struct cfsrvl *service = container_obj(layr); + caif_assert(layr != NULL); caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); + + if (!service->supports_flowctrl) + return 0; + switch (ctrl) { case CAIF_MODEMCMD_FLOW_ON_REQ: { @@ -152,9 +159,17 @@ void cfservl_destroy(struct cflayer *layer) kfree(layer); } +void cfsrvl_release(struct kref *kref) +{ + struct cfsrvl *service = container_of(kref, struct cfsrvl, ref); + kfree(service); +} + void cfsrvl_init(struct cfsrvl *service, - u8 channel_id, - struct dev_info *dev_info) + u8 channel_id, + struct dev_info *dev_info, + bool supports_flowctrl + ) { caif_assert(offsetof(struct cfsrvl, layer) == 0); service->open = false; @@ -164,14 +179,11 @@ void cfsrvl_init(struct cfsrvl *service, service->layer.ctrlcmd = cfservl_ctrlcmd; service->layer.modemcmd = cfservl_modemcmd; service->dev_info = *dev_info; + service->supports_flowctrl = supports_flowctrl; + service->release = cfsrvl_release; kref_init(&service->ref); } -void cfsrvl_release(struct kref *kref) -{ - struct cfsrvl *service = container_of(kref, struct cfsrvl, ref); - kfree(service); -} bool cfsrvl_ready(struct cfsrvl *service, int *err) { diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 5fd2c9ea8b42..02795aff57a4 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -31,7 +31,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(util, 0, sizeof(struct cfsrvl)); - cfsrvl_init(util, channel_id, dev_info); + cfsrvl_init(util, channel_id, dev_info, true); util->layer.receive = cfutill_receive; util->layer.transmit = cfutill_transmit; snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1"); @@ -90,12 +90,6 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt) if (!cfsrvl_ready(service, &ret)) return ret; - if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { - pr_err("CAIF: %s(): packet too large size=%d\n", - __func__, cfpkt_getlen(pkt)); - return -EOVERFLOW; - } - cfpkt_add_head(pkt, &zero, 1); /* Add info for MUX-layer to route the packet out. */ info = cfpkt_info(pkt); diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index e04f7d964e83..77cc09faac9a 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -30,7 +30,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(vei, 0, sizeof(struct cfsrvl)); - cfsrvl_init(vei, channel_id, dev_info); + cfsrvl_init(vei, channel_id, dev_info, true); vei->layer.receive = cfvei_receive; vei->layer.transmit = cfvei_transmit; snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id); @@ -84,11 +84,6 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt) return ret; caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); - if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { - pr_warning("CAIF: %s(): Packet too large - size=%d\n", - __func__, cfpkt_getlen(pkt)); - return -EOVERFLOW; - } if (cfpkt_add_head(pkt, &tmp, 1) < 0) { pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index 89ad4ea239f1..ada6ee2d48f5 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -27,7 +27,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(vid, 0, sizeof(struct cfsrvl)); - cfsrvl_init(vid, channel_id, dev_info); + cfsrvl_init(vid, channel_id, dev_info, false); vid->layer.receive = cfvidl_receive; vid->layer.transmit = cfvidl_transmit; snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1"); diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 610966abe2dc..4293e190ec53 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -23,7 +23,7 @@ #include <net/caif/caif_dev.h> /* GPRS PDP connection has MTU to 1500 */ -#define SIZE_MTU 1500 +#define GPRS_PDP_MTU 1500 /* 5 sec. connect timeout */ #define CONNECT_TIMEOUT (5 * HZ) #define CAIF_NET_DEFAULT_QUEUE_LEN 500 @@ -232,6 +232,8 @@ static int chnl_net_open(struct net_device *dev) { struct chnl_net *priv = NULL; int result = -1; + int llifindex, headroom, tailroom, mtu; + struct net_device *lldev; ASSERT_RTNL(); priv = netdev_priv(dev); if (!priv) { @@ -241,41 +243,88 @@ static int chnl_net_open(struct net_device *dev) if (priv->state != CAIF_CONNECTING) { priv->state = CAIF_CONNECTING; - result = caif_connect_client(&priv->conn_req, &priv->chnl); + result = caif_connect_client(&priv->conn_req, &priv->chnl, + &llifindex, &headroom, &tailroom); if (result != 0) { - priv->state = CAIF_DISCONNECTED; pr_debug("CAIF: %s(): err: " "Unable to register and open device," " Err:%d\n", __func__, result); - return result; + goto error; + } + + lldev = dev_get_by_index(dev_net(dev), llifindex); + + if (lldev == NULL) { + pr_debug("CAIF: %s(): no interface?\n", __func__); + result = -ENODEV; + goto error; + } + + dev->needed_tailroom = tailroom + lldev->needed_tailroom; + dev->hard_header_len = headroom + lldev->hard_header_len + + lldev->needed_tailroom; + + /* + * MTU, head-room etc is not know before we have a + * CAIF link layer device available. MTU calculation may + * override initial RTNL configuration. + * MTU is minimum of current mtu, link layer mtu pluss + * CAIF head and tail, and PDP GPRS contexts max MTU. + */ + mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom)); + mtu = min_t(int, GPRS_PDP_MTU, mtu); + dev_set_mtu(dev, mtu); + dev_put(lldev); + + if (mtu < 100) { + pr_warning("CAIF: %s(): " + "CAIF Interface MTU too small (%d)\n", + __func__, mtu); + result = -ENODEV; + goto error; } } + rtnl_unlock(); /* Release RTNL lock during connect wait */ + result = wait_event_interruptible_timeout(priv->netmgmt_wq, priv->state != CAIF_CONNECTING, CONNECT_TIMEOUT); + rtnl_lock(); + if (result == -ERESTARTSYS) { pr_debug("CAIF: %s(): wait_event_interruptible" " woken by a signal\n", __func__); - return -ERESTARTSYS; + result = -ERESTARTSYS; + goto error; } + if (result == 0) { pr_debug("CAIF: %s(): connect timeout\n", __func__); caif_disconnect_client(&priv->chnl); priv->state = CAIF_DISCONNECTED; pr_debug("CAIF: %s(): state disconnected\n", __func__); - return -ETIMEDOUT; + result = -ETIMEDOUT; + goto error; } if (priv->state != CAIF_CONNECTED) { pr_debug("CAIF: %s(): connect failed\n", __func__); - return -ECONNREFUSED; + result = -ECONNREFUSED; + goto error; } pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); return 0; + +error: + caif_disconnect_client(&priv->chnl); + priv->state = CAIF_DISCONNECTED; + pr_debug("CAIF: %s(): state disconnected\n", __func__); + return result; + } static int chnl_net_stop(struct net_device *dev) @@ -321,9 +370,7 @@ static void ipcaif_net_setup(struct net_device *dev) dev->destructor = free_netdev; dev->flags |= IFF_NOARP; dev->flags |= IFF_POINTOPOINT; - dev->needed_headroom = CAIF_NEEDED_HEADROOM; - dev->needed_tailroom = CAIF_NEEDED_TAILROOM; - dev->mtu = SIZE_MTU; + dev->mtu = GPRS_PDP_MTU; dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN; priv = netdev_priv(dev); diff --git a/net/can/raw.c b/net/can/raw.c index da99cf153b33..a10e3338f084 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -436,14 +436,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (count > 1) { /* filter does not fit into dfilter => alloc space */ - filter = kmalloc(optlen, GFP_KERNEL); - if (!filter) - return -ENOMEM; - - if (copy_from_user(filter, optval, optlen)) { - kfree(filter); - return -EFAULT; - } + filter = memdup_user(optval, optlen); + if (IS_ERR(filter)) + return PTR_ERR(filter); } else if (count == 1) { if (copy_from_user(&sfilter, optval, sizeof(sfilter))) return -EFAULT; @@ -655,6 +650,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = sock_tx_timestamp(msg, sk, skb_tx(skb)); if (err < 0) goto free_skb; + + /* to be able to check the received tx sock reference in raw_rcv() */ + skb_tx(skb)->prevent_sk_orphan = 1; + skb->dev = dev; skb->sk = sk; diff --git a/net/compat.c b/net/compat.c index ec24d9edb025..63d260e81472 100644 --- a/net/compat.c +++ b/net/compat.c @@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, int tot_len; if (kern_msg->msg_namelen) { - if (mode==VERIFY_READ) { + if (mode == VERIFY_READ) { int err = move_addr_to_kernel(kern_msg->msg_name, kern_msg->msg_namelen, kern_address); @@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, static int do_set_sock_timeout(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - struct compat_timeval __user *up = (struct compat_timeval __user *) optval; + struct compat_timeval __user *up = (struct compat_timeval __user *)optval; struct timeval ktime; mm_segment_t old_fs; int err; @@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level, return -EFAULT; old_fs = get_fs(); set_fs(KERNEL_DS); - err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime)); + err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime)); set_fs(old_fs); return err; @@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_setsockopt(sock,level,optname); + if (sock) { + err = security_socket_setsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname, int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct compat_timeval __user *ctv = - (struct compat_timeval __user*) userstamp; + (struct compat_timeval __user *) userstamp; int err = -ENOENT; struct timeval tv; @@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp); int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) { struct compat_timespec __user *ctv = - (struct compat_timespec __user*) userstamp; + (struct compat_timespec __user *) userstamp; int err = -ENOENT; struct timespec ts; @@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_getsockopt(sock, level, - optname); + if (sock) { + err = security_socket_getsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -531,7 +528,7 @@ struct compat_group_req { __u32 gr_interface; struct __kernel_sockaddr_storage gr_group __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; struct compat_group_source_req { __u32 gsr_interface; @@ -539,7 +536,7 @@ struct compat_group_source_req { __attribute__ ((aligned(4))); struct __kernel_sockaddr_storage gsr_source __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; struct compat_group_filter { __u32 gf_interface; @@ -549,7 +546,7 @@ struct compat_group_filter { __u32 gf_numsrc; struct __kernel_sockaddr_storage gf_slist[1] __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ sizeof(struct __kernel_sockaddr_storage)) @@ -557,7 +554,7 @@ struct compat_group_filter { int compat_mc_setsockopt(struct sock *sock, int level, int optname, char __user *optval, unsigned int optlen, - int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int)) + int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int)) { char __user *koptval = optval; int koptlen = optlen; @@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, } return setsockopt(sock, level, optname, koptval, koptlen); } - EXPORT_SYMBOL(compat_mc_setsockopt); int compat_mc_getsockopt(struct sock *sock, int level, int optname, char __user *optval, int __user *optlen, - int (*getsockopt)(struct sock *,int,int,char __user *,int __user *)) + int (*getsockopt)(struct sock *, int, int, char __user *, int __user *)) { struct compat_group_filter __user *gf32 = (void *)optval; struct group_filter __user *kgf; @@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, __put_user(interface, &kgf->gf_interface) || __put_user(fmode, &kgf->gf_fmode) || __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group))) + copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group))) return -EFAULT; err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); @@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, copylen = numsrc * sizeof(gf32->gf_slist[0]); if (copylen > klen) copylen = klen; - if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) + if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) return -EFAULT; } return err; } - EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5)}; +static unsigned char nas[20] = { + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) +}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: - ret = sys_shutdown(a0,a1); + ret = sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = compat_sys_setsockopt(a0, a1, a[2], diff --git a/net/core/Makefile b/net/core/Makefile index 51c3eec850ef..8a04dd22cf77 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o - +obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o diff --git a/net/core/datagram.c b/net/core/datagram.c index f5b6f43a4c2e..251997a95483 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -219,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, err); } +EXPORT_SYMBOL(skb_recv_datagram); void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { @@ -288,7 +289,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) return err; } - EXPORT_SYMBOL(skb_kill_datagram); /** @@ -373,6 +373,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, fault: return -EFAULT; } +EXPORT_SYMBOL(skb_copy_datagram_iovec); /** * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. @@ -716,6 +717,7 @@ csum_error: fault: return -EFAULT; } +EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); /** * datagram_poll - generic datagram poll @@ -770,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, return mask; } - EXPORT_SYMBOL(datagram_poll); -EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); -EXPORT_SYMBOL(skb_copy_datagram_iovec); -EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 2b3bf53bc687..e1c1cdcc2bb0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -101,8 +101,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> -#include <linux/if_bridge.h> -#include <linux/if_macvlan.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -803,35 +801,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * dev_get_by_flags - find any device with given flags + * dev_get_by_flags_rcu - find any device with given flags * @net: the applicable net namespace * @if_flags: IFF_* values * @mask: bitmask of bits in if_flags to check * * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. The device returned has - * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. + * is not found or a pointer to the device. Must be called inside + * rcu_read_lock(), and result refcount is unchanged. */ -struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; - rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { - dev_hold(dev); ret = dev; break; } } - rcu_read_unlock(); return ret; } -EXPORT_SYMBOL(dev_get_by_flags); +EXPORT_SYMBOL(dev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device @@ -1488,6 +1482,7 @@ static inline void net_timestamp_check(struct sk_buff *skb) int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { skb_orphan(skb); + nf_reset(skb); if (!(dev->flags & IFF_UP) || (skb->len > (dev->mtu + dev->hard_header_len))) { @@ -1541,7 +1536,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if (net_ratelimit()) printk(KERN_CRIT "protocol %04x is " "buggy, dev %s\n", - skb2->protocol, dev->name); + ntohs(skb2->protocol), + dev->name); skb_reset_network_header(skb2); } @@ -1553,6 +1549,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } +/* + * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues + * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. + */ +void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if (txq < real_num) { + dev->real_num_tx_queues = txq; + qdisc_reset_all_tx_gt(dev, txq); + } +} +EXPORT_SYMBOL(netif_set_real_num_tx_queues); static inline void __netif_reschedule(struct Qdisc *q) { @@ -1893,8 +1907,32 @@ static int dev_gso_segment(struct sk_buff *skb) */ static inline void skb_orphan_try(struct sk_buff *skb) { - if (!skb_tx(skb)->flags) + struct sock *sk = skb->sk; + + if (sk && !skb_tx(skb)->flags) { + /* skb_tx_hash() wont be able to get sk. + * We copy sk_hash into skb->rxhash + */ + if (!skb->rxhash) + skb->rxhash = sk->sk_hash; skb_orphan(skb); + } +} + +/* + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG, or if + * at least one of fragments is in highmem and device does not + * support DMA from it. + */ +static inline int skb_needs_linearize(struct sk_buff *skb, + struct net_device *dev) +{ + return skb_is_nonlinear(skb) && + ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || + illegal_highdma(dev, skb)))); } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -1921,6 +1959,22 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto out_kfree_skb; if (skb->next) goto gso; + } else { + if (skb_needs_linearize(skb, dev) && + __skb_linearize(skb)) + goto out_kfree_skb; + + /* If packet is not checksummed and device does not + * support checksumming for this protocol, complete + * checksumming here. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_set_transport_header(skb, skb->csum_start - + skb_headroom(skb)); + if (!dev_can_checksum(dev, skb) && + skb_checksum_help(skb)) + goto out_kfree_skb; + } } rc = ops->ndo_start_xmit(skb, dev); @@ -1980,8 +2034,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol; - + hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); @@ -2004,12 +2057,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - u16 queue_index; + int queue_index; struct sock *sk = skb->sk; - if (sk_tx_queue_recorded(sk)) { - queue_index = sk_tx_queue_get(sk); - } else { + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) { @@ -2038,14 +2090,24 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); + bool contended = qdisc_is_running(q); int rc; + /* + * Heuristic to force contended enqueues to serialize on a + * separate lock before trying to get qdisc main lock. + * This permits __QDISC_STATE_RUNNING owner to get the lock more often + * and dequeue packets faster. + */ + if (unlikely(contended)) + spin_lock(&q->busylock); + spin_lock(root_lock); if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { kfree_skb(skb); rc = NET_XMIT_DROP; } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && - !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { + qdisc_run_begin(q)) { /* * This is a work-conserving queue; there are no old skbs * waiting to be sent out; and the qdisc is not running - @@ -2054,37 +2116,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); __qdisc_update_bstats(q, skb->len); - if (sch_direct_xmit(skb, q, dev, txq, root_lock)) + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } __qdisc_run(q); - else - clear_bit(__QDISC_STATE_RUNNING, &q->state); + } else + qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); rc = qdisc_enqueue_root(skb, q); - qdisc_run(q); + if (qdisc_run_begin(q)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } + __qdisc_run(q); + } } spin_unlock(root_lock); - + if (unlikely(contended)) + spin_unlock(&q->busylock); return rc; } -/* - * Returns true if either: - * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG, or if - * at least one of fragments is in highmem and device does not - * support DMA from it. - */ -static inline int skb_needs_linearize(struct sk_buff *skb, - struct net_device *dev) -{ - return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || - illegal_highdma(dev, skb))); -} - /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -2117,25 +2175,6 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; - /* GSO will handle the following emulations directly. */ - if (netif_needs_gso(dev, skb)) - goto gso; - - /* Convert a paged skb to linear, if required */ - if (skb_needs_linearize(skb, dev) && __skb_linearize(skb)) - goto out_kfree_skb; - - /* If packet is not checksummed and device does not support - * checksumming for this protocol, complete checksumming here. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, skb->csum_start - - skb_headroom(skb)); - if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) - goto out_kfree_skb; - } - -gso: /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -2194,7 +2233,6 @@ gso: rc = -ENETDOWN; rcu_read_unlock_bh(); -out_kfree_skb: kfree_skb(skb); return rc; out: @@ -2579,70 +2617,14 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } -#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) - -#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ + (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -/* - * If bridge module is loaded call bridging hook. - * returns NULL if packet was consumed. - */ -struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, - struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(br_handle_frame_hook); - -static inline struct sk_buff *handle_bridge(struct sk_buff *skb, - struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) -{ - struct net_bridge_port *port; - - if (skb->pkt_type == PACKET_LOOPBACK || - (port = rcu_dereference(skb->dev->br_port)) == NULL) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - - return br_handle_frame_hook(port, skb); -} -#else -#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) -#endif - -#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) -struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p, - struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); - -static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, - struct net_device *orig_dev) -{ - struct macvlan_port *port; - - port = rcu_dereference(skb->dev->macvlan_port); - if (!port) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - return macvlan_handle_frame_hook(port, skb); -} -#else -#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) -#endif - #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -2660,10 +2642,10 @@ static int ing_filter(struct sk_buff *skb) int result = TC_ACT_OK; struct Qdisc *q; - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING - "Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); + if (unlikely(MAX_RED_LOOP < ttl++)) { + if (net_ratelimit()) + pr_warning( "Redir loop detected Dropping packet (%d->%d)\n", + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -2693,9 +2675,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; - } else { - /* Huh? Why does turning on AF_PACKET affect this? */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); } switch (ing_filter(skb)) { @@ -2738,6 +2717,51 @@ void netif_nit_deliver(struct sk_buff *skb) rcu_read_unlock(); } +/** + * netdev_rx_handler_register - register receive handler + * @dev: device to register a handler for + * @rx_handler: receive handler to register + * @rx_handler_data: data pointer that is used by rx handler + * + * Register a receive hander for a device. This handler will then be + * called from __netif_receive_skb. A negative errno code is returned + * on a failure. + * + * The caller must hold the rtnl_mutex. + */ +int netdev_rx_handler_register(struct net_device *dev, + rx_handler_func_t *rx_handler, + void *rx_handler_data) +{ + ASSERT_RTNL(); + + if (dev->rx_handler) + return -EBUSY; + + rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); + rcu_assign_pointer(dev->rx_handler, rx_handler); + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_rx_handler_register); + +/** + * netdev_rx_handler_unregister - unregister receive handler + * @dev: device to unregister a handler from + * + * Unregister a receive hander from a device. + * + * The caller must hold the rtnl_mutex. + */ +void netdev_rx_handler_unregister(struct net_device *dev) +{ + + ASSERT_RTNL(); + rcu_assign_pointer(dev->rx_handler, NULL); + rcu_assign_pointer(dev->rx_handler_data, NULL); +} +EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); + static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, struct net_device *master) { @@ -2759,7 +2783,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) if (master->priv_flags & IFF_MASTER_ARPMON) dev->last_rx = jiffies; - if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { + if ((master->priv_flags & IFF_MASTER_ALB) && + (master->priv_flags & IFF_BRIDGE_PORT)) { /* Do address unmangle. The local destination address * will be always the one master has. Provides the right * functionality in a bridge. @@ -2790,6 +2815,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + rx_handler_func_t *rx_handler; struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; @@ -2831,8 +2857,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb->dev = master; } - __get_cpu_var(softnet_data).processed++; - + __this_cpu_inc(softnet_data.processed); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; @@ -2864,12 +2889,17 @@ static int __netif_receive_skb(struct sk_buff *skb) ncls: #endif - skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; - skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; + /* Handle special case of bridge or macvlan */ + rx_handler = rcu_dereference(skb->dev->rx_handler); + if (rx_handler) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + skb = rx_handler(skb); + if (!skb) + goto out; + } /* * Make sure frames received on VLAN interfaces stacked on @@ -2930,6 +2960,9 @@ int netif_receive_skb(struct sk_buff *skb) if (netdev_tstamp_prequeue) net_timestamp_check(skb); + if (skb_defer_rx_timestamp(skb)) + return NET_RX_SUCCESS; + #ifdef CONFIG_RPS { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -3694,10 +3727,11 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - const struct net_device_stats *stats = dev_get_stats(dev); + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " - "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, @@ -5246,20 +5280,22 @@ void netdev_run_todo(void) /** * dev_txq_stats_fold - fold tx_queues stats * @dev: device to get statistics from - * @stats: struct net_device_stats to hold results + * @stats: struct rtnl_link_stats64 to hold results */ void dev_txq_stats_fold(const struct net_device *dev, - struct net_device_stats *stats) + struct rtnl_link_stats64 *stats) { - unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; unsigned int i; struct netdev_queue *txq; for (i = 0; i < dev->num_tx_queues; i++) { txq = netdev_get_tx_queue(dev, i); + spin_lock_bh(&txq->_xmit_lock); tx_bytes += txq->tx_bytes; tx_packets += txq->tx_packets; tx_dropped += txq->tx_dropped; + spin_unlock_bh(&txq->_xmit_lock); } if (tx_bytes || tx_packets || tx_dropped) { stats->tx_bytes = tx_bytes; @@ -5269,23 +5305,53 @@ void dev_txq_stats_fold(const struct net_device *dev, } EXPORT_SYMBOL(dev_txq_stats_fold); +/* Convert net_device_stats to rtnl_link_stats64. They have the same + * fields in the same order, with only the type differing. + */ +static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats) +{ +#if BITS_PER_LONG == 64 + BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); + memcpy(stats64, netdev_stats, sizeof(*stats64)); +#else + size_t i, n = sizeof(*stats64) / sizeof(u64); + const unsigned long *src = (const unsigned long *)netdev_stats; + u64 *dst = (u64 *)stats64; + + BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != + sizeof(*stats64) / sizeof(u64)); + for (i = 0; i < n; i++) + dst[i] = src[i]; +#endif +} + /** * dev_get_stats - get network device statistics * @dev: device to get statistics from + * @storage: place to store stats * - * Get network statistics from device. The device driver may provide - * its own method by setting dev->netdev_ops->get_stats; otherwise - * the internal statistics structure is used. + * Get network statistics from device. Return @storage. + * The device driver may provide its own method by setting + * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats; + * otherwise the internal statistics structure is used. */ -const struct net_device_stats *dev_get_stats(struct net_device *dev) +struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *storage) { const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_get_stats) - return ops->ndo_get_stats(dev); - - dev_txq_stats_fold(dev, &dev->stats); - return &dev->stats; + if (ops->ndo_get_stats64) { + memset(storage, 0, sizeof(*storage)); + return ops->ndo_get_stats64(dev, storage); + } + if (ops->ndo_get_stats) { + netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); + return storage; + } + netdev_stats_to_stats64(storage, &dev->stats); + dev_txq_stats_fold(dev, storage); + return storage; } EXPORT_SYMBOL(dev_get_stats); @@ -5790,6 +5856,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len) return buffer; } +static int __netdev_printk(const char *level, const struct net_device *dev, + struct va_format *vaf) +{ + int r; + + if (dev && dev->dev.parent) + r = dev_printk(level, dev->dev.parent, "%s: %pV", + netdev_name(dev), vaf); + else if (dev) + r = printk("%s%s: %pV", level, netdev_name(dev), vaf); + else + r = printk("%s(NULL net_device): %pV", level, vaf); + + return r; +} + +int netdev_printk(const char *level, const struct net_device *dev, + const char *format, ...) +{ + struct va_format vaf; + va_list args; + int r; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + + r = __netdev_printk(level, dev, &vaf); + va_end(args); + + return r; +} +EXPORT_SYMBOL(netdev_printk); + +#define define_netdev_printk_level(func, level) \ +int func(const struct net_device *dev, const char *fmt, ...) \ +{ \ + int r; \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + r = __netdev_printk(level, dev, &vaf); \ + va_end(args); \ + \ + return r; \ +} \ +EXPORT_SYMBOL(func); + +define_netdev_printk_level(netdev_emerg, KERN_EMERG); +define_netdev_printk_level(netdev_alert, KERN_ALERT); +define_netdev_printk_level(netdev_crit, KERN_CRIT); +define_netdev_printk_level(netdev_err, KERN_ERR); +define_netdev_printk_level(netdev_warn, KERN_WARNING); +define_netdev_printk_level(netdev_notice, KERN_NOTICE); +define_netdev_printk_level(netdev_info, KERN_INFO); + static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ad41529fb60f..36e603c78ce9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -223,6 +223,11 @@ static int set_all_monitor_traces(int state) spin_lock(&trace_state_lock); + if (state == trace_state) { + rc = -EAGAIN; + goto out_unlock; + } + switch (state) { case TRACE_ON: rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); @@ -251,11 +256,12 @@ static int set_all_monitor_traces(int state) if (!rc) trace_state = state; + else + rc = -EINPROGRESS; +out_unlock: spin_unlock(&trace_state_lock); - if (rc) - return -EINPROGRESS; return rc; } @@ -341,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = { static int __init init_net_drop_monitor(void) { - int cpu; - int rc, i, ret; struct per_cpu_dm_data *data; + int cpu, rc; + printk(KERN_INFO "Initalizing network drop monitor service\n"); if (sizeof(void *) > 8) { @@ -351,21 +357,12 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - if (genl_register_family(&net_drop_monitor_family) < 0) { + rc = genl_register_family_with_ops(&net_drop_monitor_family, + dropmon_ops, + ARRAY_SIZE(dropmon_ops)); + if (rc) { printk(KERN_ERR "Could not create drop monitor netlink family\n"); - return -EFAULT; - } - - rc = -EFAULT; - - for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { - ret = genl_register_ops(&net_drop_monitor_family, - &dropmon_ops[i]); - if (ret) { - printk(KERN_CRIT "Failed to register operation %d\n", - dropmon_ops[i].cmd); - goto out_unreg; - } + return rc; } rc = register_netdevice_notifier(&dropmon_net_notifier); diff --git a/net/core/dst.c b/net/core/dst.c index 9920722cc82b..6c41b1fac3db 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -197,7 +197,6 @@ static void ___dst_free(struct dst_entry *dst) dst->input = dst->output = dst_discard; dst->obsolete = 2; } -EXPORT_SYMBOL(__dst_free); void __dst_free(struct dst_entry *dst) { @@ -213,6 +212,7 @@ void __dst_free(struct dst_entry *dst) } spin_unlock_bh(&dst_garbage.lock); } +EXPORT_SYMBOL(__dst_free); struct dst_entry *dst_destroy(struct dst_entry * dst) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a0f4964033d2..7a85367b3c2f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -144,31 +144,13 @@ u32 ethtool_op_get_flags(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_flags); -int ethtool_op_set_flags(struct net_device *dev, u32 data) +int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) { - const struct ethtool_ops *ops = dev->ethtool_ops; - unsigned long features = dev->features; - - if (data & ETH_FLAG_LRO) - features |= NETIF_F_LRO; - else - features &= ~NETIF_F_LRO; - - if (data & ETH_FLAG_NTUPLE) { - if (!ops->set_rx_ntuple) - return -EOPNOTSUPP; - features |= NETIF_F_NTUPLE; - } else { - /* safe to clear regardless */ - features &= ~NETIF_F_NTUPLE; - } - - if (data & ETH_FLAG_RXHASH) - features |= NETIF_F_RXHASH; - else - features &= ~NETIF_F_RXHASH; + if (data & ~supported) + return -EINVAL; - dev->features = features; + dev->features = ((dev->features & ~flags_dup_features) | + (data & flags_dup_features)); return 0; } EXPORT_SYMBOL(ethtool_op_set_flags); @@ -318,23 +300,33 @@ out: } static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { - struct ethtool_rxnfc cmd; + struct ethtool_rxnfc info; + size_t info_size = sizeof(info); if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_SRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; - return dev->ethtool_ops->set_rxnfc(dev, &cmd); + return dev->ethtool_ops->set_rxnfc(dev, &info); } static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { struct ethtool_rxnfc info; + size_t info_size = sizeof(info); const struct ethtool_ops *ops = dev->ethtool_ops; int ret; void *rule_buf = NULL; @@ -342,13 +334,22 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (!ops->get_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_GRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { - rule_buf = kmalloc(info.rule_cnt * sizeof(u32), - GFP_USER); + if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) + rule_buf = kmalloc(info.rule_cnt * sizeof(u32), + GFP_USER); if (!rule_buf) return -ENOMEM; } @@ -359,7 +360,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, goto err_out; ret = -EFAULT; - if (copy_to_user(useraddr, &info, sizeof(info))) + if (copy_to_user(useraddr, &info, info_size)) goto err_out; if (rule_buf) { @@ -376,6 +377,80 @@ err_out: return ret; } +static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_rxfh_indir *indir; + u32 table_size; + size_t full_size; + int ret; + + if (!dev->ethtool_ops->get_rxfh_indir) + return -EOPNOTSUPP; + + if (copy_from_user(&table_size, + useraddr + offsetof(struct ethtool_rxfh_indir, size), + sizeof(table_size))) + return -EFAULT; + + if (table_size > + (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) + return -ENOMEM; + full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; + indir = kmalloc(full_size, GFP_USER); + if (!indir) + return -ENOMEM; + + indir->cmd = ETHTOOL_GRXFHINDIR; + indir->size = table_size; + ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); + if (ret) + goto out; + + if (copy_to_user(useraddr, indir, full_size)) + ret = -EFAULT; + +out: + kfree(indir); + return ret; +} + +static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_rxfh_indir *indir; + u32 table_size; + size_t full_size; + int ret; + + if (!dev->ethtool_ops->set_rxfh_indir) + return -EOPNOTSUPP; + + if (copy_from_user(&table_size, + useraddr + offsetof(struct ethtool_rxfh_indir, size), + sizeof(table_size))) + return -EFAULT; + + if (table_size > + (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) + return -ENOMEM; + full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; + indir = kmalloc(full_size, GFP_USER); + if (!indir) + return -ENOMEM; + + if (copy_from_user(indir, useraddr, full_size)) { + ret = -EFAULT; + goto out; + } + + ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); + +out: + kfree(indir); + return ret; +} + static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, struct ethtool_rx_ntuple_flow_spec *spec, struct ethtool_rx_ntuple_flow_spec_container *fsc) @@ -1516,12 +1591,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: - rc = ethtool_get_rxnfc(dev, useraddr); + rc = ethtool_get_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_SRXFH: case ETHTOOL_SRXCLSRLDEL: case ETHTOOL_SRXCLSRLINS: - rc = ethtool_set_rxnfc(dev, useraddr); + rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_GGRO: rc = ethtool_get_gro(dev, useraddr); @@ -1544,6 +1619,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GSSET_INFO: rc = ethtool_get_sset_info(dev, useraddr); break; + case ETHTOOL_GRXFHINDIR: + rc = ethtool_get_rxfh_indir(dev, useraddr); + break; + case ETHTOOL_SRXFHINDIR: + rc = ethtool_set_rxfh_indir(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/filter.c b/net/core/filter.c index da69fb728d32..52b051f82a01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int fentry = &filter[pc]; switch (fentry->code) { - case BPF_ALU|BPF_ADD|BPF_X: + case BPF_S_ALU_ADD_X: A += X; continue; - case BPF_ALU|BPF_ADD|BPF_K: + case BPF_S_ALU_ADD_K: A += fentry->k; continue; - case BPF_ALU|BPF_SUB|BPF_X: + case BPF_S_ALU_SUB_X: A -= X; continue; - case BPF_ALU|BPF_SUB|BPF_K: + case BPF_S_ALU_SUB_K: A -= fentry->k; continue; - case BPF_ALU|BPF_MUL|BPF_X: + case BPF_S_ALU_MUL_X: A *= X; continue; - case BPF_ALU|BPF_MUL|BPF_K: + case BPF_S_ALU_MUL_K: A *= fentry->k; continue; - case BPF_ALU|BPF_DIV|BPF_X: + case BPF_S_ALU_DIV_X: if (X == 0) return 0; A /= X; continue; - case BPF_ALU|BPF_DIV|BPF_K: + case BPF_S_ALU_DIV_K: A /= fentry->k; continue; - case BPF_ALU|BPF_AND|BPF_X: + case BPF_S_ALU_AND_X: A &= X; continue; - case BPF_ALU|BPF_AND|BPF_K: + case BPF_S_ALU_AND_K: A &= fentry->k; continue; - case BPF_ALU|BPF_OR|BPF_X: + case BPF_S_ALU_OR_X: A |= X; continue; - case BPF_ALU|BPF_OR|BPF_K: + case BPF_S_ALU_OR_K: A |= fentry->k; continue; - case BPF_ALU|BPF_LSH|BPF_X: + case BPF_S_ALU_LSH_X: A <<= X; continue; - case BPF_ALU|BPF_LSH|BPF_K: + case BPF_S_ALU_LSH_K: A <<= fentry->k; continue; - case BPF_ALU|BPF_RSH|BPF_X: + case BPF_S_ALU_RSH_X: A >>= X; continue; - case BPF_ALU|BPF_RSH|BPF_K: + case BPF_S_ALU_RSH_K: A >>= fentry->k; continue; - case BPF_ALU|BPF_NEG: + case BPF_S_ALU_NEG: A = -A; continue; - case BPF_JMP|BPF_JA: + case BPF_S_JMP_JA: pc += fentry->k; continue; - case BPF_JMP|BPF_JGT|BPF_K: + case BPF_S_JMP_JGT_K: pc += (A > fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGE|BPF_K: + case BPF_S_JMP_JGE_K: pc += (A >= fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JEQ|BPF_K: + case BPF_S_JMP_JEQ_K: pc += (A == fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JSET|BPF_K: + case BPF_S_JMP_JSET_K: pc += (A & fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGT|BPF_X: + case BPF_S_JMP_JGT_X: pc += (A > X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGE|BPF_X: + case BPF_S_JMP_JGE_X: pc += (A >= X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JEQ|BPF_X: + case BPF_S_JMP_JEQ_X: pc += (A == X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JSET|BPF_X: + case BPF_S_JMP_JSET_X: pc += (A & X) ? fentry->jt : fentry->jf; continue; - case BPF_LD|BPF_W|BPF_ABS: + case BPF_S_LD_W_ABS: k = fentry->k; load_w: ptr = load_pointer(skb, k, 4, &tmp); @@ -217,7 +217,7 @@ load_w: continue; } break; - case BPF_LD|BPF_H|BPF_ABS: + case BPF_S_LD_H_ABS: k = fentry->k; load_h: ptr = load_pointer(skb, k, 2, &tmp); @@ -226,7 +226,7 @@ load_h: continue; } break; - case BPF_LD|BPF_B|BPF_ABS: + case BPF_S_LD_B_ABS: k = fentry->k; load_b: ptr = load_pointer(skb, k, 1, &tmp); @@ -235,54 +235,54 @@ load_b: continue; } break; - case BPF_LD|BPF_W|BPF_LEN: + case BPF_S_LD_W_LEN: A = skb->len; continue; - case BPF_LDX|BPF_W|BPF_LEN: + case BPF_S_LDX_W_LEN: X = skb->len; continue; - case BPF_LD|BPF_W|BPF_IND: + case BPF_S_LD_W_IND: k = X + fentry->k; goto load_w; - case BPF_LD|BPF_H|BPF_IND: + case BPF_S_LD_H_IND: k = X + fentry->k; goto load_h; - case BPF_LD|BPF_B|BPF_IND: + case BPF_S_LD_B_IND: k = X + fentry->k; goto load_b; - case BPF_LDX|BPF_B|BPF_MSH: + case BPF_S_LDX_B_MSH: ptr = load_pointer(skb, fentry->k, 1, &tmp); if (ptr != NULL) { X = (*(u8 *)ptr & 0xf) << 2; continue; } return 0; - case BPF_LD|BPF_IMM: + case BPF_S_LD_IMM: A = fentry->k; continue; - case BPF_LDX|BPF_IMM: + case BPF_S_LDX_IMM: X = fentry->k; continue; - case BPF_LD|BPF_MEM: + case BPF_S_LD_MEM: A = mem[fentry->k]; continue; - case BPF_LDX|BPF_MEM: + case BPF_S_LDX_MEM: X = mem[fentry->k]; continue; - case BPF_MISC|BPF_TAX: + case BPF_S_MISC_TAX: X = A; continue; - case BPF_MISC|BPF_TXA: + case BPF_S_MISC_TXA: A = X; continue; - case BPF_RET|BPF_K: + case BPF_S_RET_K: return fentry->k; - case BPF_RET|BPF_A: + case BPF_S_RET_A: return A; - case BPF_ST: + case BPF_S_ST: mem[fentry->k] = A; continue; - case BPF_STX: + case BPF_S_STX: mem[fentry->k] = X; continue; default: @@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen) /* Only allow valid instructions */ switch (ftest->code) { case BPF_ALU|BPF_ADD|BPF_K: + ftest->code = BPF_S_ALU_ADD_K; + break; case BPF_ALU|BPF_ADD|BPF_X: + ftest->code = BPF_S_ALU_ADD_X; + break; case BPF_ALU|BPF_SUB|BPF_K: + ftest->code = BPF_S_ALU_SUB_K; + break; case BPF_ALU|BPF_SUB|BPF_X: + ftest->code = BPF_S_ALU_SUB_X; + break; case BPF_ALU|BPF_MUL|BPF_K: + ftest->code = BPF_S_ALU_MUL_K; + break; case BPF_ALU|BPF_MUL|BPF_X: + ftest->code = BPF_S_ALU_MUL_X; + break; case BPF_ALU|BPF_DIV|BPF_X: + ftest->code = BPF_S_ALU_DIV_X; + break; case BPF_ALU|BPF_AND|BPF_K: + ftest->code = BPF_S_ALU_AND_K; + break; case BPF_ALU|BPF_AND|BPF_X: + ftest->code = BPF_S_ALU_AND_X; + break; case BPF_ALU|BPF_OR|BPF_K: + ftest->code = BPF_S_ALU_OR_K; + break; case BPF_ALU|BPF_OR|BPF_X: + ftest->code = BPF_S_ALU_OR_X; + break; case BPF_ALU|BPF_LSH|BPF_K: + ftest->code = BPF_S_ALU_LSH_K; + break; case BPF_ALU|BPF_LSH|BPF_X: + ftest->code = BPF_S_ALU_LSH_X; + break; case BPF_ALU|BPF_RSH|BPF_K: + ftest->code = BPF_S_ALU_RSH_K; + break; case BPF_ALU|BPF_RSH|BPF_X: + ftest->code = BPF_S_ALU_RSH_X; + break; case BPF_ALU|BPF_NEG: + ftest->code = BPF_S_ALU_NEG; + break; case BPF_LD|BPF_W|BPF_ABS: + ftest->code = BPF_S_LD_W_ABS; + break; case BPF_LD|BPF_H|BPF_ABS: + ftest->code = BPF_S_LD_H_ABS; + break; case BPF_LD|BPF_B|BPF_ABS: + ftest->code = BPF_S_LD_B_ABS; + break; case BPF_LD|BPF_W|BPF_LEN: + ftest->code = BPF_S_LD_W_LEN; + break; case BPF_LD|BPF_W|BPF_IND: + ftest->code = BPF_S_LD_W_IND; + break; case BPF_LD|BPF_H|BPF_IND: + ftest->code = BPF_S_LD_H_IND; + break; case BPF_LD|BPF_B|BPF_IND: + ftest->code = BPF_S_LD_B_IND; + break; case BPF_LD|BPF_IMM: + ftest->code = BPF_S_LD_IMM; + break; case BPF_LDX|BPF_W|BPF_LEN: + ftest->code = BPF_S_LDX_W_LEN; + break; case BPF_LDX|BPF_B|BPF_MSH: + ftest->code = BPF_S_LDX_B_MSH; + break; case BPF_LDX|BPF_IMM: + ftest->code = BPF_S_LDX_IMM; + break; case BPF_MISC|BPF_TAX: + ftest->code = BPF_S_MISC_TAX; + break; case BPF_MISC|BPF_TXA: + ftest->code = BPF_S_MISC_TXA; + break; case BPF_RET|BPF_K: + ftest->code = BPF_S_RET_K; + break; case BPF_RET|BPF_A: + ftest->code = BPF_S_RET_A; break; /* Some instructions need special checks */ - case BPF_ALU|BPF_DIV|BPF_K: /* check for division by zero */ + case BPF_ALU|BPF_DIV|BPF_K: if (ftest->k == 0) return -EINVAL; + ftest->code = BPF_S_ALU_DIV_K; break; + /* check for invalid memory addresses */ case BPF_LD|BPF_MEM: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_LD_MEM; + break; case BPF_LDX|BPF_MEM: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_LDX_MEM; + break; case BPF_ST: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_ST; + break; case BPF_STX: - /* check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; + ftest->code = BPF_S_STX; break; case BPF_JMP|BPF_JA: @@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ if (ftest->k >= (unsigned)(flen-pc-1)) return -EINVAL; + ftest->code = BPF_S_JMP_JA; break; case BPF_JMP|BPF_JEQ|BPF_K: + ftest->code = BPF_S_JMP_JEQ_K; + break; case BPF_JMP|BPF_JEQ|BPF_X: + ftest->code = BPF_S_JMP_JEQ_X; + break; case BPF_JMP|BPF_JGE|BPF_K: + ftest->code = BPF_S_JMP_JGE_K; + break; case BPF_JMP|BPF_JGE|BPF_X: + ftest->code = BPF_S_JMP_JGE_X; + break; case BPF_JMP|BPF_JGT|BPF_K: + ftest->code = BPF_S_JMP_JGT_K; + break; case BPF_JMP|BPF_JGT|BPF_X: + ftest->code = BPF_S_JMP_JGT_X; + break; case BPF_JMP|BPF_JSET|BPF_K: + ftest->code = BPF_S_JMP_JSET_K; + break; case BPF_JMP|BPF_JSET|BPF_X: + ftest->code = BPF_S_JMP_JSET_X; + break; + + default: + return -EINVAL; + } + /* for conditionals both must be safe */ + switch (ftest->code) { + case BPF_S_JMP_JEQ_K: + case BPF_S_JMP_JEQ_X: + case BPF_S_JMP_JGE_K: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JSET_X: + case BPF_S_JMP_JSET_K: if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; - break; + } + } + /* last instruction must be a RET code */ + switch (filter[flen - 1].code) { + case BPF_S_RET_K: + case BPF_S_RET_A: + return 0; + break; default: return -EINVAL; } - } - - return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); diff --git a/net/core/flow.c b/net/core/flow.c index 161900674009..f67dcbfe54ef 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -62,6 +62,7 @@ struct flow_cache { }; atomic_t flow_cache_genid = ATOMIC_INIT(0); +EXPORT_SYMBOL(flow_cache_genid); static struct flow_cache flow_cache_global; static struct kmem_cache *flow_cachep; @@ -222,7 +223,7 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, unsigned int hash; local_bh_disable(); - fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); + fcp = this_cpu_ptr(fc->percpu); fle = NULL; flo = NULL; @@ -291,6 +292,7 @@ ret_object: local_bh_enable(); return flo; } +EXPORT_SYMBOL(flow_cache_lookup); static void flow_cache_flush_tasklet(unsigned long data) { @@ -302,7 +304,7 @@ static void flow_cache_flush_tasklet(unsigned long data) LIST_HEAD(gc_list); int i, deleted = 0; - fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); + fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { hlist_for_each_entry_safe(fle, entry, tmp, &fcp->hash_table[i], u.hlist) { @@ -424,6 +426,3 @@ static int __init flow_cache_init_global(void) } module_init(flow_cache_init_global); - -EXPORT_SYMBOL(flow_cache_genid); -EXPORT_SYMBOL(flow_cache_lookup); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 785e5276a300..9fbe7f7429b0 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head) * * Removes the rate estimator specified by &bstats and &rate_est. * + * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 393b1d8618e2..0452eb27a272 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, return 0; } +EXPORT_SYMBOL(gnet_stats_start_copy_compat); /** * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode @@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, { return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); } +EXPORT_SYMBOL(gnet_stats_start_copy); /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV @@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) } return 0; } +EXPORT_SYMBOL(gnet_stats_copy_basic); /** * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV @@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, return 0; } +EXPORT_SYMBOL(gnet_stats_copy_rate_est); /** * gnet_stats_copy_queue - copy queue statistics into statistics TLV @@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q) return 0; } +EXPORT_SYMBOL(gnet_stats_copy_queue); /** * gnet_stats_copy_app - copy application specific statistics into statistics TLV @@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return 0; } +EXPORT_SYMBOL(gnet_stats_copy_app); /** * gnet_stats_finish_copy - finish dumping procedure @@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d) spin_unlock_bh(d->lock); return 0; } - - -EXPORT_SYMBOL(gnet_stats_start_copy); -EXPORT_SYMBOL(gnet_stats_start_copy_compat); -EXPORT_SYMBOL(gnet_stats_copy_basic); -EXPORT_SYMBOL(gnet_stats_copy_rate_est); -EXPORT_SYMBOL(gnet_stats_copy_queue); -EXPORT_SYMBOL(gnet_stats_copy_app); EXPORT_SYMBOL(gnet_stats_finish_copy); diff --git a/net/core/iovec.c b/net/core/iovec.c index 1e7f4e91a935..1cd98df412df 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -95,6 +95,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) return 0; } +EXPORT_SYMBOL(memcpy_toiovec); /* * Copy kernel to iovec. Returns -EFAULT on error. @@ -120,6 +121,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, return 0; } +EXPORT_SYMBOL(memcpy_toiovecend); /* * Copy iovec to kernel. Returns -EFAULT on error. @@ -144,6 +146,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) return 0; } +EXPORT_SYMBOL(memcpy_fromiovec); /* * Copy iovec from kernel. Returns -EFAULT on error. @@ -172,6 +175,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, return 0; } +EXPORT_SYMBOL(memcpy_fromiovecend); /* * And now for the all-in-one: copy and checksum from a user iovec @@ -256,9 +260,4 @@ out_fault: err = -EFAULT; goto out; } - EXPORT_SYMBOL(csum_partial_copy_fromiovecend); -EXPORT_SYMBOL(memcpy_fromiovec); -EXPORT_SYMBOL(memcpy_fromiovecend); -EXPORT_SYMBOL(memcpy_toiovec); -EXPORT_SYMBOL(memcpy_toiovecend); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index bdbce2f5875b..01a1101b5936 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -243,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev) linkwatch_schedule_work(urgent); } - EXPORT_SYMBOL(linkwatch_fire_event); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6ba1c0eece03..a4e0a7482c2b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) - = neigh->dev->header_ops->cache_update; + = NULL; + + if (neigh->dev->header_ops) + update = neigh->dev->header_ops->cache_update; if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 99e7052d7323..af4dfbadf2a0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -29,6 +29,7 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; +static const char fmt_u64[] = "%llu\n"; static inline int dev_isalive(const struct net_device *dev) { @@ -94,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW(dev_id, fmt_hex); +NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); @@ -294,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev, } static struct device_attribute net_class_attributes[] = { + __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), @@ -324,14 +327,15 @@ static ssize_t netstat_show(const struct device *d, struct net_device *dev = to_net_dev(d); ssize_t ret = -EINVAL; - WARN_ON(offset > sizeof(struct net_device_stats) || - offset % sizeof(unsigned long) != 0); + WARN_ON(offset > sizeof(struct rtnl_link_stats64) || + offset % sizeof(u64) != 0); read_lock(&dev_base_lock); if (dev_isalive(dev)) { - const struct net_device_stats *stats = dev_get_stats(dev); - ret = sprintf(buf, fmt_ulong, - *(unsigned long *)(((u8 *) stats) + offset)); + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + + ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset)); } read_unlock(&dev_base_lock); return ret; @@ -343,7 +347,7 @@ static ssize_t show_##name(struct device *d, \ struct device_attribute *attr, char *buf) \ { \ return netstat_show(d, attr, buf, \ - offsetof(struct net_device_stats, name)); \ + offsetof(struct rtnl_link_stats64, name)); \ } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) @@ -922,13 +926,12 @@ int netdev_class_create_file(struct class_attribute *class_attr) { return class_create_file(&net_class, class_attr); } +EXPORT_SYMBOL(netdev_class_create_file); void netdev_class_remove_file(struct class_attribute *class_attr) { class_remove_file(&net_class, class_attr); } - -EXPORT_SYMBOL(netdev_class_create_file); EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) diff --git a/net/core/netevent.c b/net/core/netevent.c index 95f81de87502..865f0ceb81fb 100644 --- a/net/core/netevent.c +++ b/net/core/netevent.c @@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb) err = atomic_notifier_chain_register(&netevent_notif_chain, nb); return err; } +EXPORT_SYMBOL_GPL(register_netevent_notifier); /** * netevent_unregister_notifier - unregister a netevent notifier block @@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); } +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); /** * call_netevent_notifiers - call all netevent notifier blocks @@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v) { return atomic_notifier_call_chain(&netevent_notif_chain, val, v); } - -EXPORT_SYMBOL_GPL(register_netevent_notifier); -EXPORT_SYMBOL_GPL(unregister_netevent_notifier); EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 94825b109551..537e01afd81b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -199,11 +199,13 @@ void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } +EXPORT_SYMBOL(netpoll_poll_dev); void netpoll_poll(struct netpoll *np) { netpoll_poll_dev(np->dev); } +EXPORT_SYMBOL(netpoll_poll); static void refill_skbs(void) { @@ -292,6 +294,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) unsigned long tries; struct net_device *dev = np->dev; const struct net_device_ops *ops = dev->netdev_ops; + /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { @@ -343,6 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) schedule_delayed_work(&npinfo->tx_work,0); } } +EXPORT_SYMBOL(netpoll_send_skb); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { @@ -404,6 +408,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) netpoll_send_skb(np, skb); } +EXPORT_SYMBOL(netpoll_send_udp); static void arp_reply(struct sk_buff *skb) { @@ -630,6 +635,7 @@ void netpoll_print_options(struct netpoll *np) printk(KERN_INFO "%s: remote ethernet address %pM\n", np->name, np->remote_mac); } +EXPORT_SYMBOL(netpoll_print_options); int netpoll_parse_options(struct netpoll *np, char *opt) { @@ -722,30 +728,29 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->name, cur); return -1; } +EXPORT_SYMBOL(netpoll_parse_options); -int netpoll_setup(struct netpoll *np) +int __netpoll_setup(struct netpoll *np) { - struct net_device *ndev = NULL; - struct in_device *in_dev; + struct net_device *ndev = np->dev; struct netpoll_info *npinfo; - struct netpoll *npe, *tmp; + const struct net_device_ops *ops; unsigned long flags; int err; - if (np->dev_name) - ndev = dev_get_by_name(&init_net, np->dev_name); - if (!ndev) { - printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", + if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || + !ndev->netdev_ops->ndo_poll_controller) { + printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); - return -ENODEV; + err = -ENOTSUPP; + goto out; } - np->dev = ndev; if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; - goto put; + goto out; } npinfo->rx_flags = 0; @@ -757,6 +762,13 @@ int netpoll_setup(struct netpoll *np) INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); atomic_set(&npinfo->refcnt, 1); + + ops = np->dev->netdev_ops; + if (ops->ndo_netpoll_setup) { + err = ops->ndo_netpoll_setup(ndev, npinfo); + if (err) + goto free_npinfo; + } } else { npinfo = ndev->npinfo; atomic_inc(&npinfo->refcnt); @@ -764,12 +776,37 @@ int netpoll_setup(struct netpoll *np) npinfo->netpoll = np; - if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || - !ndev->netdev_ops->ndo_poll_controller) { - printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", + if (np->rx_hook) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; + list_add_tail(&np->rx, &npinfo->rx_np); + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + /* last thing to do is link it to the net device structure */ + rcu_assign_pointer(ndev->npinfo, npinfo); + + return 0; + +free_npinfo: + kfree(npinfo); +out: + return err; +} +EXPORT_SYMBOL_GPL(__netpoll_setup); + +int netpoll_setup(struct netpoll *np) +{ + struct net_device *ndev = NULL; + struct in_device *in_dev; + int err; + + if (np->dev_name) + ndev = dev_get_by_name(&init_net, np->dev_name); + if (!ndev) { + printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); - err = -ENOTSUPP; - goto release; + return -ENODEV; } if (!netif_running(ndev)) { @@ -785,7 +822,7 @@ int netpoll_setup(struct netpoll *np) if (err) { printk(KERN_ERR "%s: failed to open %s\n", np->name, ndev->name); - goto release; + goto put; } atleast = jiffies + HZ/10; @@ -822,7 +859,7 @@ int netpoll_setup(struct netpoll *np) printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); err = -EDESTADDRREQ; - goto release; + goto put; } np->local_ip = in_dev->ifa_list->ifa_local; @@ -830,38 +867,25 @@ int netpoll_setup(struct netpoll *np) printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip); } - if (np->rx_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_flags |= NETPOLL_RX_ENABLED; - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } + np->dev = ndev; /* fill up the skb queue */ refill_skbs(); - /* last thing to do is link it to the net device structure */ - ndev->npinfo = npinfo; + rtnl_lock(); + err = __netpoll_setup(np); + rtnl_unlock(); - /* avoid racing with NAPI reading npinfo */ - synchronize_rcu(); + if (err) + goto put; return 0; - release: - if (!ndev->npinfo) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) { - npe->dev = NULL; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - - kfree(npinfo); - } put: dev_put(ndev); return err; } +EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { @@ -870,49 +894,65 @@ static int __init netpoll_init(void) } core_initcall(netpoll_init); -void netpoll_cleanup(struct netpoll *np) +void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; unsigned long flags; - if (np->dev) { - npinfo = np->dev->npinfo; - if (npinfo) { - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - if (list_empty(&npinfo->rx_np)) - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } + npinfo = np->dev->npinfo; + if (!npinfo) + return; - if (atomic_dec_and_test(&npinfo->refcnt)) { - const struct net_device_ops *ops; - skb_queue_purge(&npinfo->arp_tx); - skb_queue_purge(&npinfo->txq); - cancel_rearming_delayed_work(&npinfo->tx_work); - - /* clean after last, unfinished work */ - __skb_queue_purge(&npinfo->txq); - kfree(npinfo); - ops = np->dev->netdev_ops; - if (ops->ndo_netpoll_cleanup) - ops->ndo_netpoll_cleanup(np->dev); - else - np->dev->npinfo = NULL; - } - } + if (!list_empty(&npinfo->rx_np)) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_del(&np->rx); + if (list_empty(&npinfo->rx_np)) + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + if (atomic_dec_and_test(&npinfo->refcnt)) { + const struct net_device_ops *ops; + + ops = np->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(np->dev); + + rcu_assign_pointer(np->dev->npinfo, NULL); + + /* avoid racing with NAPI reading npinfo */ + synchronize_rcu_bh(); + + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + cancel_rearming_delayed_work(&npinfo->tx_work); - dev_put(np->dev); + /* clean after last, unfinished work */ + __skb_queue_purge(&npinfo->txq); + kfree(npinfo); } +} +EXPORT_SYMBOL_GPL(__netpoll_cleanup); + +void netpoll_cleanup(struct netpoll *np) +{ + if (!np->dev) + return; + rtnl_lock(); + __netpoll_cleanup(np); + rtnl_unlock(); + + dev_put(np->dev); np->dev = NULL; } +EXPORT_SYMBOL(netpoll_cleanup); int netpoll_trap(void) { return atomic_read(&trapped); } +EXPORT_SYMBOL(netpoll_trap); void netpoll_set_trap(int trap) { @@ -921,14 +961,4 @@ void netpoll_set_trap(int trap) else atomic_dec(&trapped); } - -EXPORT_SYMBOL(netpoll_send_skb); EXPORT_SYMBOL(netpoll_set_trap); -EXPORT_SYMBOL(netpoll_trap); -EXPORT_SYMBOL(netpoll_print_options); -EXPORT_SYMBOL(netpoll_parse_options); -EXPORT_SYMBOL(netpoll_setup); -EXPORT_SYMBOL(netpoll_cleanup); -EXPORT_SYMBOL(netpoll_send_udp); -EXPORT_SYMBOL(netpoll_poll_dev); -EXPORT_SYMBOL(netpoll_poll); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1dacd7ba8dbb..10a1ea72010d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -115,6 +115,9 @@ * command by Adit Ranadive <adit.262@gmail.com> * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sys.h> #include <linux/types.h> #include <linux/module.h> @@ -169,11 +172,13 @@ #include <asm/dma.h> #include <asm/div64.h> /* do_div */ -#define VERSION "2.73" +#define VERSION "2.74" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) +#define func_enter() pr_debug("entering %s\n", __func__); + /* Device flag bits */ #define F_IPSRC_RND (1<<0) /* IP-Src Random */ #define F_IPDST_RND (1<<1) /* IP-Dst Random */ @@ -424,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) } static const char version[] = - "pktgen " VERSION ": Packet Generator for packet performance testing.\n"; + "Packet Generator for packet performance testing. " + "Version: " VERSION "\n"; static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); @@ -495,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, pktgen_reset_all_threads(); else - printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); + pr_warning("Unknown command: %s\n", data); err = count; @@ -840,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file, const char __user * user_buffer, size_t count, loff_t * offset) { - struct seq_file *seq = (struct seq_file *)file->private_data; + struct seq_file *seq = file->private_data; struct pktgen_dev *pkt_dev = seq->private; int i = 0, max, len; char name[16], valstr[32]; @@ -852,14 +858,14 @@ static ssize_t pktgen_if_write(struct file *file, pg_result = &(pkt_dev->result[0]); if (count < 1) { - printk(KERN_WARNING "pktgen: wrong command format\n"); + pr_warning("wrong command format\n"); return -EINVAL; } max = count - i; tmp = count_trail_chars(&user_buffer[i], max); if (tmp < 0) { - printk(KERN_WARNING "pktgen: illegal format\n"); + pr_warning("illegal format\n"); return tmp; } i += tmp; @@ -980,6 +986,36 @@ static ssize_t pktgen_if_write(struct file *file, (unsigned long long) pkt_dev->delay); return count; } + if (!strcmp(name, "rate")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + if (!value) + return len; + pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value; + if (debug) + pr_info("Delay set at: %llu ns\n", pkt_dev->delay); + + sprintf(pg_result, "OK: rate=%lu", value); + return count; + } + if (!strcmp(name, "ratep")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + if (!value) + return len; + pkt_dev->delay = NSEC_PER_SEC/value; + if (debug) + pr_info("Delay set at: %llu ns\n", pkt_dev->delay); + + sprintf(pg_result, "OK: rate=%lu", value); + return count; + } if (!strcmp(name, "udp_src_min")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) @@ -1398,18 +1434,12 @@ static ssize_t pktgen_if_write(struct file *file, i += len; for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } + int value; + + value = hex_to_bin(*v); + if (value >= 0) + *m = *m * 16 + value; + if (*v == ':') { m++; *m = 0; @@ -1440,18 +1470,12 @@ static ssize_t pktgen_if_write(struct file *file, i += len; for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } + int value; + + value = hex_to_bin(*v); + if (value >= 0) + *m = *m * 16 + value; + if (*v == ':') { m++; *m = 0; @@ -1740,7 +1764,7 @@ static ssize_t pktgen_thread_write(struct file *file, const char __user * user_buffer, size_t count, loff_t * offset) { - struct seq_file *seq = (struct seq_file *)file->private_data; + struct seq_file *seq = file->private_data; struct pktgen_thread *t = seq->private; int i = 0, max, len, ret; char name[40]; @@ -1781,7 +1805,7 @@ static ssize_t pktgen_thread_write(struct file *file, name, (unsigned long)count); if (!t) { - printk(KERN_ERR "pktgen: ERROR: No thread\n"); + pr_err("ERROR: No thread\n"); ret = -EINVAL; goto out; } @@ -1874,7 +1898,7 @@ static void pktgen_mark_device(const char *ifname) int i = 0; mutex_lock(&pktgen_thread_lock); - pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname); + pr_debug("%s: marking %s for removal\n", __func__, ifname); while (1) { @@ -1883,15 +1907,14 @@ static void pktgen_mark_device(const char *ifname) break; /* success */ mutex_unlock(&pktgen_thread_lock); - pr_debug("pktgen: pktgen_mark_device waiting for %s " - "to disappear....\n", ifname); + pr_debug("%s: waiting for %s to disappear....\n", + __func__, ifname); schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try)); mutex_lock(&pktgen_thread_lock); if (++i >= max_tries) { - printk(KERN_ERR "pktgen_mark_device: timed out after " - "waiting %d msec for device %s to be removed\n", - msec_per_try * i, ifname); + pr_err("%s: timed out after waiting %d msec for device %s to be removed\n", + __func__, msec_per_try * i, ifname); break; } @@ -1918,8 +1941,8 @@ static void pktgen_change_name(struct net_device *dev) &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) - printk(KERN_ERR "pktgen: can't move proc " - " entry for '%s'\n", dev->name); + pr_err("can't move proc entry for '%s'\n", + dev->name); break; } } @@ -1983,15 +2006,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) odev = pktgen_dev_get_by_name(pkt_dev, ifname); if (!odev) { - printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); + pr_err("no such netdevice: \"%s\"\n", ifname); return -ENODEV; } if (odev->type != ARPHRD_ETHER) { - printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); + pr_err("not an ethernet device: \"%s\"\n", ifname); err = -EINVAL; } else if (!netif_running(odev)) { - printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); + pr_err("device is down: \"%s\"\n", ifname); err = -ENETDOWN; } else { pkt_dev->odev = odev; @@ -2010,8 +2033,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) int ntxq; if (!pkt_dev->odev) { - printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " - "setup_inject.\n"); + pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n"); sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); return; @@ -2021,19 +2043,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) ntxq = pkt_dev->odev->real_num_tx_queues; if (ntxq <= pkt_dev->queue_map_min) { - printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_min (zero-based) (%d) exceeds valid range " - "[0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odevname); + pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_min = ntxq - 1; } if (pkt_dev->queue_map_max >= ntxq) { - printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_max (zero-based) (%d) exceeds valid range " - "[0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odevname); + pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_max = ntxq - 1; } @@ -2093,8 +2111,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } rcu_read_unlock(); if (err) - printk(KERN_ERR "pktgen: ERROR: IPv6 link " - "address not availble.\n"); + pr_err("ERROR: IPv6 link address not available\n"); } #endif } else { @@ -2142,15 +2159,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_set_expires(&t.timer, spin_until); - remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer)); + remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); if (remaining <= 0) { pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); return; } start_time = ktime_now(); - if (remaining < 100) - udelay(remaining); /* really small just spin */ + if (remaining < 100000) + ndelay(remaining); /* really small just spin */ else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); @@ -2528,8 +2545,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); if (ret < 0) { - printk(KERN_ERR "Error expanding " - "ipsec packet %d\n", ret); + pr_err("Error expanding ipsec packet %d\n", + ret); goto err; } } @@ -2538,8 +2555,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, skb_pull(skb, ETH_HLEN); ret = pktgen_output_ipsec(skb, pkt_dev); if (ret) { - printk(KERN_ERR "Error creating ipsec " - "packet %d\n", ret); + pr_err("Error creating ipsec packet %d\n", ret); goto err; } /* restore ll */ @@ -3015,8 +3031,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); if (net_ratelimit()) - printk(KERN_INFO "pktgen: increased datalen to %d\n", - datalen); + pr_info("increased datalen to %d\n", datalen); } udph->source = htons(pkt_dev->cur_udp_src); @@ -3143,7 +3158,7 @@ static void pktgen_run(struct pktgen_thread *t) struct pktgen_dev *pkt_dev; int started = 0; - pr_debug("pktgen: entering pktgen_run. %p\n", t); + func_enter(); if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) { @@ -3176,7 +3191,7 @@ static void pktgen_stop_all_threads_ifs(void) { struct pktgen_thread *t; - pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n"); + func_enter(); mutex_lock(&pktgen_thread_lock); @@ -3241,7 +3256,7 @@ static void pktgen_run_all_threads(void) { struct pktgen_thread *t; - pr_debug("pktgen: entering pktgen_run_all_threads.\n"); + func_enter(); mutex_lock(&pktgen_thread_lock); @@ -3260,7 +3275,7 @@ static void pktgen_reset_all_threads(void) { struct pktgen_thread *t; - pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); + func_enter(); mutex_lock(&pktgen_thread_lock); @@ -3310,8 +3325,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; if (!pkt_dev->running) { - printk(KERN_WARNING "pktgen: interface: %s is already " - "stopped\n", pkt_dev->odevname); + pr_warning("interface: %s is already stopped\n", + pkt_dev->odevname); return -EINVAL; } @@ -3347,7 +3362,7 @@ static void pktgen_stop(struct pktgen_thread *t) { struct pktgen_dev *pkt_dev; - pr_debug("pktgen: entering pktgen_stop\n"); + func_enter(); if_lock(t); @@ -3367,7 +3382,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) struct list_head *q, *n; struct pktgen_dev *cur; - pr_debug("pktgen: entering pktgen_rem_one_if\n"); + func_enter(); if_lock(t); @@ -3393,9 +3408,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) struct list_head *q, *n; struct pktgen_dev *cur; + func_enter(); + /* Remove all devices, free mem */ - pr_debug("pktgen: entering pktgen_rem_all_ifs\n"); if_lock(t); list_for_each_safe(q, n, &t->if_list) { @@ -3477,8 +3493,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->skb = fill_packet(odev, pkt_dev); if (pkt_dev->skb == NULL) { - printk(KERN_ERR "pktgen: ERROR: couldn't " - "allocate skb in fill_packet.\n"); + pr_err("ERROR: couldn't allocate skb in fill_packet\n"); schedule(); pkt_dev->clone_count--; /* back out increment, OOM */ return; @@ -3558,8 +3573,7 @@ static int pktgen_thread_worker(void *arg) init_waitqueue_head(&t->queue); complete(&t->start_done); - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", - cpu, task_pid_nr(current)); + pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); set_current_state(TASK_INTERRUPTIBLE); @@ -3612,13 +3626,13 @@ static int pktgen_thread_worker(void *arg) set_current_state(TASK_INTERRUPTIBLE); } - pr_debug("pktgen: %s stopping all device\n", t->tsk->comm); + pr_debug("%s stopping all device\n", t->tsk->comm); pktgen_stop(t); - pr_debug("pktgen: %s removing all device\n", t->tsk->comm); + pr_debug("%s removing all device\n", t->tsk->comm); pktgen_rem_all_ifs(t); - pr_debug("pktgen: %s removing thread.\n", t->tsk->comm); + pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); return 0; @@ -3642,7 +3656,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, } if_unlock(t); - pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev); + pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); return pkt_dev; } @@ -3658,8 +3672,7 @@ static int add_dev_to_thread(struct pktgen_thread *t, if_lock(t); if (pkt_dev->pg_thread) { - printk(KERN_ERR "pktgen: ERROR: already assigned " - "to a thread.\n"); + pr_err("ERROR: already assigned to a thread\n"); rv = -EBUSY; goto out; } @@ -3685,7 +3698,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev = __pktgen_NN_threads(ifname, FIND); if (pkt_dev) { - printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); + pr_err("ERROR: interface already used\n"); return -EBUSY; } @@ -3730,7 +3743,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) { - printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", + pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, ifname); err = -EINVAL; goto out2; @@ -3761,8 +3774,7 @@ static int __init pktgen_create_thread(int cpu) t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL, cpu_to_node(cpu)); if (!t) { - printk(KERN_ERR "pktgen: ERROR: out of memory, can't " - "create new thread.\n"); + pr_err("ERROR: out of memory, can't create new thread\n"); return -ENOMEM; } @@ -3776,8 +3788,7 @@ static int __init pktgen_create_thread(int cpu) p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { - printk(KERN_ERR "pktgen: kernel_thread() failed " - "for cpu %d\n", t->cpu); + pr_err("kernel_thread() failed for cpu %d\n", t->cpu); list_del(&t->th_list); kfree(t); return PTR_ERR(p); @@ -3788,7 +3799,7 @@ static int __init pktgen_create_thread(int cpu) pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, &pktgen_thread_fops, t); if (!pe) { - printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", + pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, t->tsk->comm); kthread_stop(p); list_del(&t->th_list); @@ -3822,11 +3833,10 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) { - pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); + pr_debug("remove_device pkt_dev=%p\n", pkt_dev); if (pkt_dev->running) { - printk(KERN_WARNING "pktgen: WARNING: trying to remove a " - "running interface, stopping it now.\n"); + pr_warning("WARNING: trying to remove a running interface, stopping it now\n"); pktgen_stop_device(pkt_dev); } @@ -3857,7 +3867,7 @@ static int __init pg_init(void) int cpu; struct proc_dir_entry *pe; - printk(KERN_INFO "%s", version); + pr_info("%s", version); pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) @@ -3865,8 +3875,7 @@ static int __init pg_init(void) pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); if (pe == NULL) { - printk(KERN_ERR "pktgen: ERROR: cannot create %s " - "procfs entry.\n", PGCTRL); + pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); proc_net_remove(&init_net, PG_PROC_DIR); return -EINVAL; } @@ -3879,13 +3888,12 @@ static int __init pg_init(void) err = pktgen_create_thread(cpu); if (err) - printk(KERN_WARNING "pktgen: WARNING: Cannot create " - "thread for cpu %d (%d)\n", cpu, err); + pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n", + cpu, err); } if (list_empty(&pktgen_threads)) { - printk(KERN_ERR "pktgen: ERROR: Initialization failed for " - "all threads\n"); + pr_err("ERROR: Initialization failed for all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); proc_net_remove(&init_net, PG_PROC_DIR); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1a2af24e9e3d..f78d821bd935 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -579,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - const struct net_device_stats *b) + const struct rtnl_link_stats64 *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -610,7 +610,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; } -static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b) +static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) { struct rtnl_link_stats64 a; @@ -686,7 +686,7 @@ static size_t rtnl_port_size(const struct net_device *dev) return port_self_size; } -static inline size_t if_nlmsg_size(const struct net_device *dev) +static noinline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -791,7 +791,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, { struct ifinfomsg *ifm; struct nlmsghdr *nlh; - const struct net_device_stats *stats; + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -847,7 +848,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (attr == NULL) goto nla_put_failure; - stats = dev_get_stats(dev); + stats = dev_get_stats(dev, &temp); copy_rtnl_link_stats(nla_data(attr), stats); attr = nla_reserve(skb, IFLA_STATS64, diff --git a/net/core/scm.c b/net/core/scm.c index b88f6f9d0b97..413cab89017d 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -130,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm) } } } +EXPORT_SYMBOL(__scm_destroy); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) { @@ -170,6 +171,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) err = scm_check_creds(&p->creds); if (err) goto error; + + if (pid_vnr(p->pid) != p->creds.pid) { + struct pid *pid; + err = -ESRCH; + pid = find_get_pid(p->creds.pid); + if (!pid) + goto error; + put_pid(p->pid); + p->pid = pid; + } + + if ((p->cred->euid != p->creds.uid) || + (p->cred->egid != p->creds.gid)) { + struct cred *cred; + err = -ENOMEM; + cred = prepare_creds(); + if (!cred) + goto error; + + cred->uid = cred->euid = p->creds.uid; + cred->gid = cred->egid = p->creds.uid; + put_cred(p->cred); + p->cred = cred; + } break; default: goto error; @@ -187,6 +212,7 @@ error: scm_destroy(p); return err; } +EXPORT_SYMBOL(__scm_send); int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { @@ -225,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) out: return err; } +EXPORT_SYMBOL(put_cmsg); void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { @@ -294,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) */ __scm_destroy(scm); } +EXPORT_SYMBOL(scm_detach_fds); struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) { @@ -311,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) } return new_fpl; } - -EXPORT_SYMBOL(__scm_destroy); -EXPORT_SYMBOL(__scm_send); -EXPORT_SYMBOL(put_cmsg); -EXPORT_SYMBOL(scm_detach_fds); EXPORT_SYMBOL(scm_fp_dup); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f07e749d7b1..3a2513f0d0c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; + new->deliver_no_wcard = old->deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif @@ -569,7 +570,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(len); C(data_len); C(mac_len); - C(rxhash); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; @@ -817,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, memcpy(data + nhead, skb->head, skb->tail - skb->head); #endif memcpy(data + size, skb_end_pointer(skb), - sizeof(struct skb_shared_info)); + offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); @@ -843,7 +843,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; - skb->csum_start += nhead; + /* Only adjust this if it actually is csum_start rather than csum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -930,7 +932,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); off = newheadroom - oldheadroom; - n->csum_start += off; + if (n->ip_summed == CHECKSUM_PARTIAL) + n->csum_start += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; @@ -2483,7 +2486,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) skb_postpull_rcsum(skb, skb->data, len); return skb->data += len; } - EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** diff --git a/net/core/sock.c b/net/core/sock.c index 2cf7f9f7e775..b05b9b6ddb87 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -110,6 +110,7 @@ #include <linux/tcp.h> #include <linux/init.h> #include <linux/highmem.h> +#include <linux/user_namespace.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -156,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { @@ -172,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { @@ -188,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_MAX" }; @@ -749,6 +750,20 @@ set_rcvbuf: EXPORT_SYMBOL(sock_setsockopt); +void cred_to_ucred(struct pid *pid, const struct cred *cred, + struct ucred *ucred) +{ + ucred->pid = pid_vnr(pid); + ucred->uid = ucred->gid = -1; + if (cred) { + struct user_namespace *current_ns = current_user_ns(); + + ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid); + ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid); + } +} +EXPORT_SYMBOL_GPL(cred_to_ucred); + int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -901,11 +916,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PEERCRED: - if (len > sizeof(sk->sk_peercred)) - len = sizeof(sk->sk_peercred); - if (copy_to_user(optval, &sk->sk_peercred, len)) + { + struct ucred peercred; + if (len > sizeof(peercred)) + len = sizeof(peercred); + cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + if (copy_to_user(optval, &peercred, len)) return -EFAULT; goto lenout; + } case SO_PEERNAME: { @@ -1119,6 +1138,9 @@ static void __sk_free(struct sock *sk) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", __func__, atomic_read(&sk->sk_omem_alloc)); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + put_pid(sk->sk_peer_pid); put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } @@ -1317,9 +1339,10 @@ EXPORT_SYMBOL(sock_wfree); void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + unsigned int len = skb->truesize; - atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - sk_mem_uncharge(skb->sk, skb->truesize); + atomic_sub(len, &sk->sk_rmem_alloc); + sk_mem_uncharge(sk, len); } EXPORT_SYMBOL(sock_rfree); @@ -1954,9 +1977,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - sk->sk_peercred.pid = 0; - sk->sk_peercred.uid = -1; - sk->sk_peercred.gid = -1; + sk->sk_peer_pid = NULL; + sk->sk_peer_cred = NULL; sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; @@ -2210,8 +2232,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); #ifdef CONFIG_NET_NS void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - int cpu = smp_processor_id(); - per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val; + __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add); @@ -2257,7 +2278,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; + __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add); diff --git a/net/core/stream.c b/net/core/stream.c index cc196f42b8d8..d959e0f41528 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk) rcu_read_unlock(); } } - EXPORT_SYMBOL(sk_stream_write_space); /** @@ -81,7 +80,6 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) } while (!done); return 0; } - EXPORT_SYMBOL(sk_stream_wait_connect); /** @@ -109,7 +107,6 @@ void sk_stream_wait_close(struct sock *sk, long timeout) finish_wait(sk_sleep(sk), &wait); } } - EXPORT_SYMBOL(sk_stream_wait_close); /** @@ -174,7 +171,6 @@ do_interrupted: err = sock_intr_errno(*timeo_p); goto out; } - EXPORT_SYMBOL(sk_stream_wait_memory); int sk_stream_error(struct sock *sk, int flags, int err) @@ -185,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err) send_sig(SIGPIPE, current, 0); return err; } - EXPORT_SYMBOL(sk_stream_error); void sk_stream_kill_queues(struct sock *sk) @@ -210,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk) * have gone away, only the net layer knows can touch it. */ } - EXPORT_SYMBOL(sk_stream_kill_queues); diff --git a/net/core/timestamping.c b/net/core/timestamping.c new file mode 100644 index 000000000000..0ae6c22da85b --- /dev/null +++ b/net/core/timestamping.c @@ -0,0 +1,126 @@ +/* + * PTP 1588 clock support - support for timestamping in PHY devices + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/errqueue.h> +#include <linux/phy.h> +#include <linux/ptp_classify.h> +#include <linux/skbuff.h> + +static struct sock_filter ptp_filter[] = { + PTP_FILTER +}; + +static unsigned int classify(struct sk_buff *skb) +{ + if (likely(skb->dev && + skb->dev->phydev && + skb->dev->phydev->drv)) + return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); + else + return PTP_CLASS_NONE; +} + +void skb_clone_tx_timestamp(struct sk_buff *skb) +{ + struct phy_device *phydev; + struct sk_buff *clone; + struct sock *sk = skb->sk; + unsigned int type; + + if (!sk) + return; + + type = classify(skb); + + switch (type) { + case PTP_CLASS_V1_IPV4: + case PTP_CLASS_V1_IPV6: + case PTP_CLASS_V2_IPV4: + case PTP_CLASS_V2_IPV6: + case PTP_CLASS_V2_L2: + case PTP_CLASS_V2_VLAN: + phydev = skb->dev->phydev; + if (likely(phydev->drv->txtstamp)) { + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) + return; + clone->sk = sk; + phydev->drv->txtstamp(phydev, clone, type); + } + break; + default: + break; + } +} + +void skb_complete_tx_timestamp(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct sock *sk = skb->sk; + struct sock_exterr_skb *serr; + int err; + + if (!hwtstamps) + return; + + *skb_hwtstamps(skb) = *hwtstamps; + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + skb->sk = NULL; + err = sock_queue_err_skb(sk, skb); + if (err) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); + +bool skb_defer_rx_timestamp(struct sk_buff *skb) +{ + struct phy_device *phydev; + unsigned int type; + + skb_push(skb, ETH_HLEN); + + type = classify(skb); + + skb_pull(skb, ETH_HLEN); + + switch (type) { + case PTP_CLASS_V1_IPV4: + case PTP_CLASS_V1_IPV6: + case PTP_CLASS_V2_IPV4: + case PTP_CLASS_V2_IPV6: + case PTP_CLASS_V2_L2: + case PTP_CLASS_V2_VLAN: + phydev = skb->dev->phydev; + if (likely(phydev->drv->rxtstamp)) + return phydev->drv->rxtstamp(phydev, skb, type); + break; + default: + break; + } + + return false; +} + +void __init skb_timestamping_init(void) +{ + BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter))); +} diff --git a/net/core/utils.c b/net/core/utils.c index 838250241d26..f41854470539 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -77,7 +77,6 @@ __be32 in_aton(const char *str) } return(htonl(l)); } - EXPORT_SYMBOL(in_aton); #define IN6PTON_XDIGIT 0x00010000 @@ -162,7 +161,6 @@ out: *end = s; return ret; } - EXPORT_SYMBOL(in4_pton); int in6_pton(const char *src, int srclen, @@ -280,7 +278,6 @@ out: *end = s; return ret; } - EXPORT_SYMBOL(in6_pton); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 01e4d39fa232..92a6fcb40d7d 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -82,7 +82,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) elapsed_time = delta / 10; if (elapsed_time != 0 && - dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) + dccp_insert_option_elapsed_time(skb, elapsed_time)) return -1; avr = dccp_ackvec_record_new(); @@ -201,7 +201,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, const unsigned int packets, const unsigned char state) { - unsigned int gap; + long gap; long new_head; if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d3235899c7e3..95f752986497 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -715,9 +715,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) x_recv = htonl(hc->rx_x_recv); pinv = htonl(hc->rx_pinv); - if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, + if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)) || - dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, + dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv))) return -1; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index a10a61a1ded2..3ccef1b70fee 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -446,16 +446,12 @@ extern void dccp_feat_list_purge(struct list_head *fn_list); extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); -extern int dccp_insert_option_elapsed_time(struct sock *sk, - struct sk_buff *skb, - u32 elapsed_time); +extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); extern u32 dccp_timestamp(void); extern void dccp_timestamping_init(void); -extern int dccp_insert_option_timestamp(struct sock *sk, - struct sk_buff *skb); -extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb, - unsigned char option, - const void *value, unsigned char len); +extern int dccp_insert_option_timestamp(struct sk_buff *skb); +extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, + const void *value, unsigned char len); #ifdef CONFIG_SYSCTL extern int dccp_sysctl_init(void); diff --git a/net/dccp/input.c b/net/dccp/input.c index 6beb6a7d6fba..10c957a88f4f 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -430,7 +430,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dccp_parse_options(sk, NULL, skb)) return 1; - /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ + /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - dp->dccps_options_received.dccpor_timestamp_echo)); @@ -535,6 +535,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, const struct dccp_hdr *dh, const unsigned len) { + struct dccp_sock *dp = dccp_sk(sk); + u32 sample = dp->dccps_options_received.dccpor_timestamp_echo; int queued = 0; switch (dh->dccph_type) { @@ -559,7 +561,14 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, if (sk->sk_state == DCCP_PARTOPEN) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); - dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; + /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ + if (likely(sample)) { + long delta = dccp_timestamp() - sample; + + dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta); + } + + dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_set_state(sk, DCCP_OPEN); if (dh->dccph_type == DCCP_PKT_DATAACK || diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d9b11ef8694c..d4a166f0f391 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; /* OK, now commit destination to socket. */ - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, inet->inet_daddr, @@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return NULL; } - return &rt->u.dst; + return &rt->dst; } static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 091698899594..6e3f32575df7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct ipv6_txoptions *opt = NULL; - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; int err = -1; struct dst_entry *dst; @@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, opt = np->opt; - if (opt != NULL && opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, goto out_overflow; if (dst == NULL) { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - if (opt != NULL && opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; @@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct in6_addr *saddr = NULL, *final_p, final; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); - if (np->opt != NULL && np->opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/dccp/options.c b/net/dccp/options.c index 07395f861d35..bfda087bd90d 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -299,9 +299,8 @@ static inline u8 dccp_ndp_len(const u64 ndp) return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); } -int dccp_insert_option(struct sock *sk, struct sk_buff *skb, - const unsigned char option, - const void *value, const unsigned char len) +int dccp_insert_option(struct sk_buff *skb, const unsigned char option, + const void *value, const unsigned char len) { unsigned char *to; @@ -354,8 +353,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } -int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, - u32 elapsed_time) +int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) { const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 2 + elapsed_time_len; @@ -386,13 +384,13 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) +int dccp_insert_option_timestamp(struct sk_buff *skb) { __be32 now = htonl(dccp_timestamp()); /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ - return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); + return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); @@ -531,9 +529,9 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { /* * Obtain RTT sample from Request/Response exchange. - * This is currently used in CCID 3 initialisation. + * This is currently used for TFRC initialisation. */ - if (dccp_insert_option_timestamp(sk, skb)) + if (dccp_insert_option_timestamp(skb)) return -1; } else if (dp->dccps_hc_rx_ackvec != NULL && @@ -564,6 +562,10 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb) if (dccp_feat_insert_opts(NULL, dreq, skb)) return -1; + /* Obtain RTT sample from Response/Ack exchange (used by TFRC). */ + if (dccp_insert_option_timestamp(skb)) + return -1; + if (dreq->dreq_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(NULL, dreq, skb)) return -1; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b03ecf6b2bb0..096250d1323b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -473,14 +473,9 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type, if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) return -EINVAL; - val = kmalloc(optlen, GFP_KERNEL); - if (val == NULL) - return -ENOMEM; - - if (copy_from_user(val, optval, optlen)) { - kfree(val); - return -EFAULT; - } + val = memdup_user(optval, optlen); + if (IS_ERR(val)) + return PTR_ERR(val); lock_sock(sk); if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) @@ -1007,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int dccp_mib_init(void) { return snmp_mib_init((void __percpu **)dccp_statistics, - sizeof(struct dccp_mib)); + sizeof(struct dccp_mib), + __alignof__(struct dccp_mib)); } static inline void dccp_mib_exit(void) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 812e6dff6067..6585ea6d1182 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst) static inline void dnrt_free(struct dn_route *rt) { - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline void dnrt_drop(struct dn_route *rt) { - dst_release(&rt->u.dst); - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + dst_release(&rt->dst); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static void dn_dst_check_expire(unsigned long dummy) @@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy) spin_lock(&dn_rt_hash_table[i].lock); while((rt=*rtp) != NULL) { - if (atomic_read(&rt->u.dst.__refcnt) || - (now - rt->u.dst.lastuse) < expire) { - rtp = &rt->u.dst.dn_next; + if (atomic_read(&rt->dst.__refcnt) || + (now - rt->dst.lastuse) < expire) { + rtp = &rt->dst.dn_next; continue; } - *rtp = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + *rtp = rt->dst.dn_next; + rt->dst.dn_next = NULL; dnrt_free(rt); } spin_unlock(&dn_rt_hash_table[i].lock); @@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops) rtp = &dn_rt_hash_table[i].chain; while((rt=*rtp) != NULL) { - if (atomic_read(&rt->u.dst.__refcnt) || - (now - rt->u.dst.lastuse) < expire) { - rtp = &rt->u.dst.dn_next; + if (atomic_read(&rt->dst.__refcnt) || + (now - rt->dst.lastuse) < expire) { + rtp = &rt->dst.dn_next; continue; } - *rtp = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + *rtp = rt->dst.dn_next; + rt->dst.dn_next = NULL; dnrt_drop(rt); break; } @@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * while((rth = *rthp) != NULL) { if (compare_keys(&rth->fl, &rt->fl)) { /* Put it first */ - *rthp = rth->u.dst.dn_next; - rcu_assign_pointer(rth->u.dst.dn_next, + *rthp = rth->dst.dn_next; + rcu_assign_pointer(rth->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - dst_use(&rth->u.dst, now); + dst_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); dnrt_drop(rt); *rp = rth; return 0; } - rthp = &rth->u.dst.dn_next; + rthp = &rth->dst.dn_next; } - rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain); + rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - dst_use(&rt->u.dst, now); + dst_use(&rt->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); *rp = rt; return 0; @@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy) goto nothing_to_declare; for(; rt; rt=next) { - next = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + next = rt->dst.dn_next; + rt->dst.dn_next = NULL; dst_free((struct dst_entry *)rt); } @@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb) /* Ensure that we have enough space for headers */ rt = (struct dn_route *)skb_dst(skb); header_len = dn_db->use_long ? 21 : 6; - if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len)) + if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len)) goto drop; /* @@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb) if (++cb->hops > 30) goto drop; - skb->dev = rt->u.dst.dev; + skb->dev = rt->dst.dev; /* * If packet goes out same interface it came in on, then set @@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb) static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; - struct net_device *dev = rt->u.dst.dev; + struct net_device *dev = rt->dst.dev; struct neighbour *n; unsigned mss; @@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - memcpy(rt->u.dst.metrics, fi->fib_metrics, - sizeof(rt->u.dst.metrics)); + memcpy(rt->dst.metrics, fi->fib_metrics, + sizeof(rt->dst.metrics)); } rt->rt_type = res->type; - if (dev != NULL && rt->u.dst.neighbour == NULL) { + if (dev != NULL && rt->dst.neighbour == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - rt->u.dst.neighbour = n; + rt->dst.neighbour = n; } - if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || - dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) - rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || - dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) - rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; + if (dst_metric(&rt->dst, RTAX_MTU) == 0 || + dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) + rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); + if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || + dst_metric(&rt->dst, RTAX_ADVMSS) > mss) + rt->dst.metrics[RTAX_ADVMSS-1] = mss; return 0; } @@ -1096,8 +1096,8 @@ make_route: if (rt == NULL) goto e_nobufs; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.flags = DST_HOST; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.flags = DST_HOST; rt->fl.fld_src = oldflp->fld_src; rt->fl.fld_dst = oldflp->fld_dst; @@ -1113,17 +1113,17 @@ make_route: rt->rt_dst_map = fl.fld_dst; rt->rt_src_map = fl.fld_src; - rt->u.dst.dev = dev_out; + rt->dst.dev = dev_out; dev_hold(dev_out); - rt->u.dst.neighbour = neigh; + rt->dst.neighbour = neigh; neigh = NULL; - rt->u.dst.lastuse = jiffies; - rt->u.dst.output = dn_output; - rt->u.dst.input = dn_rt_bug; + rt->dst.lastuse = jiffies; + rt->dst.output = dn_output; + rt->dst.input = dn_rt_bug; rt->rt_flags = flags; if (flags & RTCF_LOCAL) - rt->u.dst.input = dn_nsp_rx; + rt->dst.input = dn_nsp_rx; err = dn_rt_set_next_hop(rt, &res); if (err) @@ -1152,7 +1152,7 @@ e_nobufs: err = -ENOBUFS; goto done; e_neighbour: - dst_free(&rt->u.dst); + dst_free(&rt->dst); goto e_nobufs; } @@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; - rt = rcu_dereference_bh(rt->u.dst.dn_next)) { + rt = rcu_dereference_bh(rt->dst.dn_next)) { if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && (flp->mark == rt->fl.mark) && (rt->fl.iif == 0) && (rt->fl.oif == flp->oif)) { - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); - *pprt = &rt->u.dst; + *pprt = &rt->dst; return 0; } } @@ -1375,29 +1375,29 @@ make_route: rt->fl.iif = in_dev->ifindex; rt->fl.mark = fl.mark; - rt->u.dst.flags = DST_HOST; - rt->u.dst.neighbour = neigh; - rt->u.dst.dev = out_dev; - rt->u.dst.lastuse = jiffies; - rt->u.dst.output = dn_rt_bug; + rt->dst.flags = DST_HOST; + rt->dst.neighbour = neigh; + rt->dst.dev = out_dev; + rt->dst.lastuse = jiffies; + rt->dst.output = dn_rt_bug; switch(res.type) { case RTN_UNICAST: - rt->u.dst.input = dn_forward; + rt->dst.input = dn_forward; break; case RTN_LOCAL: - rt->u.dst.output = dn_output; - rt->u.dst.input = dn_nsp_rx; - rt->u.dst.dev = in_dev; + rt->dst.output = dn_output; + rt->dst.input = dn_nsp_rx; + rt->dst.dev = in_dev; flags |= RTCF_LOCAL; break; default: case RTN_UNREACHABLE: case RTN_BLACKHOLE: - rt->u.dst.input = dst_discard; + rt->dst.input = dst_discard; } rt->rt_flags = flags; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + if (rt->dst.dev) + dev_hold(rt->dst.dev); err = dn_rt_set_next_hop(rt, &res); if (err) @@ -1405,7 +1405,7 @@ make_route: hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); dn_insert_route(rt, hash, &rt); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); done: if (neigh) @@ -1427,7 +1427,7 @@ e_nobufs: goto done; e_neighbour: - dst_free(&rt->u.dst); + dst_free(&rt->dst); goto done; } @@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb) rcu_read_lock(); for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; - rt = rcu_dereference(rt->u.dst.dn_next)) { + rt = rcu_dereference(rt->dst.dn_next)) { if ((rt->fl.fld_src == cb->src) && (rt->fl.fld_dst == cb->dst) && (rt->fl.oif == 0) && (rt->fl.mark == skb->mark) && (rt->fl.iif == cb->iif)) { - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); return 0; @@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 16; RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); } - if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + if (rt->dst.dev) + RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); /* * Note to self - change this if input routes reverse direction when * they deal only with inputs and not with replies like they do @@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); if (rt->rt_daddr != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto rtattr_failure; - expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; - if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, - rt->u.dst.error) < 0) + expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, + rt->dst.error) < 0) goto rtattr_failure; if (rt->fl.iif) RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); @@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void local_bh_enable(); memset(cb, 0, sizeof(struct dn_skb_cb)); rt = (struct dn_route *)skb_dst(skb); - if (!err && -rt->u.dst.error) - err = rt->u.dst.error; + if (!err && -rt->dst.error) + err = rt->dst.error; } else { int oif = 0; if (rta[RTA_OIF - 1]) @@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb->dev = NULL; if (err) goto out_free; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock_bh(); for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) { + rt = rcu_dereference_bh(rt->dst.dn_next), idx++) { if (idx < s_idx) continue; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { @@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou { struct dn_rt_cache_iter_state *s = seq->private; - rt = rt->u.dst.dn_next; + rt = rt->dst.dn_next; while(!rt) { rcu_read_unlock_bh(); if (--s->bucket < 0) @@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", - rt->u.dst.dev ? rt->u.dst.dev->name : "*", + rt->dst.dev ? rt->dst.dev->name : "*", dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), - atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, - (int) dst_metric(&rt->u.dst, RTAX_RTT)); + atomic_read(&rt->dst.__refcnt), + rt->dst.__use, + (int) dst_metric(&rt->dst, RTAX_RTT)); return 0; } diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig new file mode 100644 index 000000000000..50d49f7e0472 --- /dev/null +++ b/net/dns_resolver/Kconfig @@ -0,0 +1,27 @@ +# +# Configuration for DNS Resolver +# +config DNS_RESOLVER + tristate "DNS Resolver support" + depends on NET && KEYS + help + Saying Y here will include support for the DNS Resolver key type + which can be used to make upcalls to perform DNS lookups in + userspace. + + DNS Resolver is used to query DNS server for information. Examples + being resolving a UNC hostname element to an IP address for CIFS or + performing a DNS query for AFSDB records so that AFS can locate a + cell's volume location database servers. + + DNS Resolver is used by the CIFS and AFS modules, and would support + SMB2 later. DNS Resolver is supported by the userspace upcall + helper "/sbin/dns.resolver" via /etc/request-key.conf. + + See <file:Documentation/networking/dns_resolver.txt> for further + information. + + To compile this as a module, choose M here: the module will be called + dnsresolver. + + If unsure, say N. diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile new file mode 100644 index 000000000000..c0ef4e71dc49 --- /dev/null +++ b/net/dns_resolver/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Linux DNS Resolver. +# + +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o + +dns_resolver-objs := dns_key.o dns_query.o diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c new file mode 100644 index 000000000000..400a04d5c9a1 --- /dev/null +++ b/net/dns_resolver/dns_key.c @@ -0,0 +1,211 @@ +/* Key type used to cache DNS lookups made by the kernel + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/keyctl.h> +#include <linux/err.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> +#include "internal.h" + +MODULE_DESCRIPTION("DNS Resolver"); +MODULE_AUTHOR("Wang Lei"); +MODULE_LICENSE("GPL"); + +unsigned dns_resolver_debug; +module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(debug, "DNS Resolver debugging mask"); + +const struct cred *dns_resolver_cache; + +/* + * Instantiate a user defined key for dns_resolver. + * + * The data must be a NUL-terminated string, with the NUL char accounted in + * datalen. + * + * If the data contains a '#' characters, then we take the clause after each + * one to be an option of the form 'key=value'. The actual data of interest is + * the string leading up to the first '#'. For instance: + * + * "ip1,ip2,...#foo=bar" + */ +static int +dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +{ + struct user_key_payload *upayload; + int ret; + size_t result_len = 0; + const char *data = _data, *opt; + + kenter("%%%d,%s,'%s',%zu", + key->serial, key->description, data, datalen); + + if (datalen <= 1 || !data || data[datalen - 1] != '\0') + return -EINVAL; + datalen--; + + /* deal with any options embedded in the data */ + opt = memchr(data, '#', datalen); + if (!opt) { + kdebug("no options currently supported"); + return -EINVAL; + } + + result_len = datalen; + ret = key_payload_reserve(key, result_len); + if (ret < 0) + return -EINVAL; + + upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); + if (!upayload) { + kleave(" = -ENOMEM"); + return -ENOMEM; + } + + upayload->datalen = result_len; + memcpy(upayload->data, data, result_len); + upayload->data[result_len] = '\0'; + rcu_assign_pointer(key->payload.data, upayload); + + kleave(" = 0"); + return 0; +} + +/* + * The description is of the form "[<type>:]<domain_name>" + * + * The domain name may be a simple name or an absolute domain name (which + * should end with a period). The domain name is case-independent. + */ +static int +dns_resolver_match(const struct key *key, const void *description) +{ + int slen, dlen, ret = 0; + const char *src = key->description, *dsp = description; + + kenter("%s,%s", src, dsp); + + if (!src || !dsp) + goto no_match; + + if (strcasecmp(src, dsp) == 0) + goto matched; + + slen = strlen(src); + dlen = strlen(dsp); + if (slen <= 0 || dlen <= 0) + goto no_match; + if (src[slen - 1] == '.') + slen--; + if (dsp[dlen - 1] == '.') + dlen--; + if (slen != dlen || strncasecmp(src, dsp, slen) != 0) + goto no_match; + +matched: + ret = 1; +no_match: + kleave(" = %d", ret); + return ret; +} + +struct key_type key_type_dns_resolver = { + .name = "dns_resolver", + .instantiate = dns_resolver_instantiate, + .match = dns_resolver_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, +}; + +static int __init init_dns_resolver(void) +{ + struct cred *cred; + struct key *keyring; + int ret; + + printk(KERN_NOTICE "Registering the %s key type\n", + key_type_dns_resolver.name); + + /* create an override credential set with a special thread keyring in + * which DNS requests are cached + * + * this is used to prevent malicious redirections from being installed + * with add_key(). + */ + cred = prepare_kernel_cred(NULL); + if (!cred) + return -ENOMEM; + + keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto failed_put_cred; + } + + ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); + if (ret < 0) + goto failed_put_key; + + ret = register_key_type(&key_type_dns_resolver); + if (ret < 0) + goto failed_put_key; + + /* instruct request_key() to use this special keyring as a cache for + * the results it looks up */ + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + dns_resolver_cache = cred; + + kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); + return 0; + +failed_put_key: + key_put(keyring); +failed_put_cred: + put_cred(cred); + return ret; +} + +static void __exit exit_dns_resolver(void) +{ + key_revoke(dns_resolver_cache->thread_keyring); + unregister_key_type(&key_type_dns_resolver); + put_cred(dns_resolver_cache); + printk(KERN_NOTICE "Unregistered %s key type\n", + key_type_dns_resolver.name); +} + +module_init(init_dns_resolver) +module_exit(exit_dns_resolver) +MODULE_LICENSE("GPL"); diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c new file mode 100644 index 000000000000..03d5255f5cf2 --- /dev/null +++ b/net/dns_resolver/dns_query.c @@ -0,0 +1,160 @@ +/* Upcall routine, designed to work as a key type and working through + * /sbin/request-key to contact userspace when handling DNS queries. + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * The upcall wrapper used to make an arbitrary DNS query. + * + * This function requires the appropriate userspace tool dns.upcall to be + * installed and something like the following lines should be added to the + * /etc/request-key.conf file: + * + * create dns_resolver * * /sbin/dns.upcall %k + * + * For example to use this module to query AFSDB RR: + * + * create dns_resolver afsdb:* * /sbin/dns.afsdb %k + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/dns_resolver.h> +#include <linux/err.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> + +#include "internal.h" + +/** + * dns_query - Query the DNS + * @type: Query type (or NULL for straight host->IP lookup) + * @name: Name to look up + * @namelen: Length of name + * @options: Request options (or NULL if no options) + * @_result: Where to place the returned data. + * @_expiry: Where to store the result expiry time (or NULL) + * + * The data will be returned in the pointer at *result, and the caller is + * responsible for freeing it. + * + * The description should be of the form "[<query_type>:]<domain_name>", and + * the options need to be appropriate for the query type requested. If no + * query_type is given, then the query is a straight hostname to IP address + * lookup. + * + * The DNS resolution lookup is performed by upcalling to userspace by way of + * requesting a key of type dns_resolver. + * + * Returns the size of the result on success, -ve error code otherwise. + */ +int dns_query(const char *type, const char *name, size_t namelen, + const char *options, char **_result, time_t *_expiry) +{ + struct key *rkey; + struct user_key_payload *upayload; + const struct cred *saved_cred; + size_t typelen, desclen; + char *desc, *cp; + int ret, len; + + kenter("%s,%*.*s,%zu,%s", + type, (int)namelen, (int)namelen, name, namelen, options); + + if (!name || namelen == 0 || !_result) + return -EINVAL; + + /* construct the query key description as "[<type>:]<name>" */ + typelen = 0; + desclen = 0; + if (type) { + typelen = strlen(type); + if (typelen < 1) + return -EINVAL; + desclen += typelen + 1; + } + + if (!namelen) + namelen = strlen(name); + if (namelen < 3) + return -EINVAL; + desclen += namelen + 1; + + desc = kmalloc(desclen, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + cp = desc; + if (type) { + memcpy(cp, type, typelen); + cp += typelen; + *cp++ = ':'; + } + memcpy(cp, name, namelen); + cp += namelen; + *cp = '\0'; + + if (!options) + options = ""; + kdebug("call request_key(,%s,%s)", desc, options); + + /* make the upcall, using special credentials to prevent the use of + * add_key() to preinstall malicious redirections + */ + saved_cred = override_creds(dns_resolver_cache); + rkey = request_key(&key_type_dns_resolver, desc, options); + revert_creds(saved_cred); + kfree(desc); + if (IS_ERR(rkey)) { + ret = PTR_ERR(rkey); + goto out; + } + + down_read(&rkey->sem); + rkey->perm |= KEY_USR_VIEW; + + ret = key_validate(rkey); + if (ret < 0) + goto put; + + upayload = rcu_dereference_protected(rkey->payload.data, + lockdep_is_held(&rkey->sem)); + len = upayload->datalen; + + ret = -ENOMEM; + *_result = kmalloc(len + 1, GFP_KERNEL); + if (!*_result) + goto put; + + memcpy(*_result, upayload->data, len + 1); + if (_expiry) + *_expiry = rkey->expiry; + + ret = len; +put: + up_read(&rkey->sem); + key_put(rkey); +out: + kleave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(dns_query); diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h new file mode 100644 index 000000000000..189ca9e9b785 --- /dev/null +++ b/net/dns_resolver/internal.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 Wang Lei + * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved. + * + * Internal DNS Rsolver stuff + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +/* + * dns_key.c + */ +extern const struct cred *dns_resolver_cache; + +/* + * debug tracing + */ +extern unsigned dns_resolver_debug; + +#define kdebug(FMT, ...) \ +do { \ + if (unlikely(dns_resolver_debug)) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", \ + current->comm, ##__VA_ARGS__); \ +} while (0) + +#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index c51b55400dc5..11201784d29a 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -1,7 +1,7 @@ menuconfig NET_DSA bool "Distributed Switch Architecture support" default n - depends on EXPERIMENTAL && !S390 + depends on EXPERIMENTAL && NET_ETHERNET && !S390 select PHYLIB ---help--- This allows you to use hardware switch chips that use diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8fdca56bb08f..64ca2a6fa0d4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -164,10 +164,9 @@ out: static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); - struct mii_ioctl_data *mii_data = if_mii(ifr); if (p->phy != NULL) - return phy_mii_ioctl(p->phy, mii_data, cmd); + return phy_mii_ioctl(p->phy, ifr, cmd); return -EOPNOTSUPP; } diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 2a5a8053e000..dc54bd0d083b 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -48,7 +48,7 @@ static const struct proto_ops econet_ops; static struct hlist_head econet_sklist; -static DEFINE_RWLOCK(econet_lock); +static DEFINE_SPINLOCK(econet_lock); static DEFINE_MUTEX(econet_mutex); /* Since there are only 256 possible network numbers (or fewer, depends @@ -98,16 +98,16 @@ struct ec_cb static void econet_remove_socket(struct hlist_head *list, struct sock *sk) { - write_lock_bh(&econet_lock); + spin_lock_bh(&econet_lock); sk_del_node_init(sk); - write_unlock_bh(&econet_lock); + spin_unlock_bh(&econet_lock); } static void econet_insert_socket(struct hlist_head *list, struct sock *sk) { - write_lock_bh(&econet_lock); + spin_lock_bh(&econet_lock); sk_add_node(sk, list); - write_unlock_bh(&econet_lock); + spin_unlock_bh(&econet_lock); } /* @@ -782,15 +782,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char struct sock *sk; struct hlist_node *node; + spin_lock(&econet_lock); sk_for_each(sk, node, &econet_sklist) { struct econet_sock *opt = ec_sk(sk); if ((opt->port == port || opt->port == 0) && (opt->station == station || opt->station == 0) && - (opt->net == net || opt->net == 0)) + (opt->net == net || opt->net == 0)) { + sock_hold(sk); goto found; + } } sk = NULL; found: + spin_unlock(&econet_lock); return sk; } @@ -852,7 +856,7 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) { struct iphdr *ip = ip_hdr(skb); unsigned char stn = ntohl(ip->saddr) & 0xff; - struct sock *sk; + struct sock *sk = NULL; struct sk_buff *newskb; struct ec_device *edev = skb->dev->ec_ptr; @@ -882,10 +886,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) } aun_send_response(ip->saddr, ah->handle, 3, 0); + sock_put(sk); return; bad: aun_send_response(ip->saddr, ah->handle, 4, 0); + if (sk) + sock_put(sk); } /* @@ -1050,7 +1057,7 @@ release: static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; - struct sock *sk; + struct sock *sk = NULL; struct ec_device *edev = dev->ec_ptr; if (!net_eq(dev_net(dev), &init_net)) @@ -1085,10 +1092,12 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb, hdr->port)) goto drop; - + sock_put(sk); return NET_RX_SUCCESS; drop: + if (sk) + sock_put(sk); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 61ec0329316c..215c83986a9d 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; - unsigned char *rawp; skb->dev = dev; skb_reset_mac_header(skb); @@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; - rawp = skb->data; - /* * This is a magic hack to spot IPX packets. Older Novell breaks * the protocol design and runs IPX over 802.3 without an 802.2 LLC * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This * won't work for fault tolerant netware but does for the rest. */ - if (*(unsigned short *)rawp == 0xFFFF) + if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF) return htons(ETH_P_802_3); /* diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c index eb00796758c3..85d574addbc1 100644 --- a/net/ethernet/pe2.c +++ b/net/ethernet/pe2.c @@ -28,11 +28,10 @@ struct datalink_proto *make_EII_client(void) return proto; } +EXPORT_SYMBOL(make_EII_client); void destroy_EII_client(struct datalink_proto *dl) { kfree(dl); } - EXPORT_SYMBOL(destroy_EII_client); -EXPORT_SYMBOL(make_EII_client); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 551ce564b035..6a1100c25a9f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -355,6 +355,8 @@ lookup_protocol: inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; + inet->nodefrag = 0; + if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) @@ -725,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sock_rps_record_flow(sk); /* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->sendmsg(iocb, sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); -static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) +ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) { struct sock *sk = sock->sk; sock_rps_record_flow(sk); /* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + inet_autobind(sk)) return -EAGAIN; if (sk->sk_prot->sendpage) return sk->sk_prot->sendpage(sk, page, offset, size, flags); return sock_no_sendpage(sock, page, offset, size, flags); } +EXPORT_SYMBOL(inet_sendpage); int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -892,10 +897,10 @@ const struct proto_ops inet_stream_ops = { .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, - .sendmsg = tcp_sendmsg, + .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = tcp_sendpage, + .sendpage = inet_sendpage, .splice_read = tcp_splice_read, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, @@ -1100,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (err) return err; - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); new_saddr = rt->rt_src; @@ -1166,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk) err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); } if (!err) - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); else { /* Routing failed... */ sk->sk_route_caps = 0; @@ -1425,13 +1430,49 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt) } EXPORT_SYMBOL_GPL(snmp_fold_field); -int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) +#if BITS_PER_LONG==32 + +u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) +{ + u64 res = 0; + int cpu; + + for_each_possible_cpu(cpu) { + void *bhptr, *userptr; + struct u64_stats_sync *syncp; + u64 v_bh, v_user; + unsigned int start; + + /* first mib used by softirq context, we must use _bh() accessors */ + bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); + syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); + do { + start = u64_stats_fetch_begin_bh(syncp); + v_bh = *(((u64 *) bhptr) + offt); + } while (u64_stats_fetch_retry_bh(syncp, start)); + + /* second mib used in USER context */ + userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); + syncp = (struct u64_stats_sync *)(userptr + syncp_offset); + do { + start = u64_stats_fetch_begin(syncp); + v_user = *(((u64 *) userptr) + offt); + } while (u64_stats_fetch_retry(syncp, start)); + + res += v_bh + v_user; + } + return res; +} +EXPORT_SYMBOL_GPL(snmp_fold_field64); +#endif + +int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) { BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long)); + ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long)); + ptr[1] = __alloc_percpu(mibsize, align); if (!ptr[1]) goto err1; return 0; @@ -1488,25 +1529,32 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, - sizeof(struct tcp_mib)) < 0) + sizeof(struct tcp_mib), + __alignof__(struct tcp_mib)) < 0) goto err_tcp_mib; if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, - sizeof(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; if (snmp_mib_init((void __percpu **)net->mib.net_statistics, - sizeof(struct linux_mib)) < 0) + sizeof(struct linux_mib), + __alignof__(struct linux_mib)) < 0) goto err_net_mib; if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, - sizeof(struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) goto err_udp_mib; if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, - sizeof(struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) goto err_udplite_mib; if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, - sizeof(struct icmp_mib)) < 0) + sizeof(struct icmp_mib), + __alignof__(struct icmp_mib)) < 0) goto err_icmp_mib; if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, - sizeof(struct icmpmsg_mib)) < 0) + sizeof(struct icmpmsg_mib), + __alignof__(struct icmpmsg_mib)) < 0) goto err_icmpmsg_mib; tcp_mib_init(net); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f094b75810db..96c1955b3e2f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -116,6 +116,7 @@ #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) #include <net/atmclip.h> struct neigh_table *clip_tbl_hook; +EXPORT_SYMBOL(clip_tbl_hook); #endif #include <asm/system.h> @@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = { .hh_output = dev_queue_xmit, .queue_xmit = dev_queue_xmit, }; +EXPORT_SYMBOL(arp_broken_ops); struct neigh_table arp_tbl = { .family = AF_INET, @@ -198,6 +200,7 @@ struct neigh_table arp_tbl = { .gc_thresh2 = 512, .gc_thresh3 = 1024, }; +EXPORT_SYMBOL(arp_tbl); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) { @@ -333,11 +336,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) struct net_device *dev = neigh->dev; __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev; - if (!in_dev) + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) { + rcu_read_unlock(); return; - + } switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ @@ -358,9 +364,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) case 2: /* Avoid secondary IPs, get a primary/preferred one */ break; } + rcu_read_unlock(); - if (in_dev) - in_dev_put(in_dev); if (!saddr) saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); @@ -427,7 +432,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) if (ip_route_output_key(net, &rt, &fl) < 0) return 1; - if (rt->u.dst.dev != dev) { + if (rt->dst.dev != dev) { NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); flag = 1; } @@ -497,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) kfree_skb(skb); return 1; } +EXPORT_SYMBOL(arp_find); /* END OF OBSOLETE FUNCTIONS */ @@ -532,7 +538,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct in_device *out_dev; int imi, omi = -1; - if (rt->u.dst.dev == dev) + if (rt->dst.dev == dev) return 0; if (!IN_DEV_PROXY_ARP(in_dev)) @@ -545,10 +551,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, /* place to check for proxy_arp for routes */ - if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { + out_dev = __in_dev_get_rcu(rt->dst.dev); + if (out_dev) omi = IN_DEV_MEDIUM_ID(out_dev); - in_dev_put(out_dev); - } + return (omi != imi && omi != -1); } @@ -576,7 +582,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev, __be32 sip, __be32 tip) { /* Private VLAN is only concerned about the same ethernet segment */ - if (rt->u.dst.dev != dev) + if (rt->dst.dev != dev) return 0; /* Don't reply on self probes (often done by windowz boxes)*/ @@ -698,6 +704,7 @@ out: kfree_skb(skb); return NULL; } +EXPORT_SYMBOL(arp_create); /* * Send an arp packet. @@ -707,6 +714,7 @@ void arp_xmit(struct sk_buff *skb) /* Send it off, maybe filter it using firewalling first. */ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); } +EXPORT_SYMBOL(arp_xmit); /* * Create and send an arp packet. @@ -733,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, arp_xmit(skb); } +EXPORT_SYMBOL(arp_send); /* * Process an arp request. @@ -741,7 +750,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, static int arp_process(struct sk_buff *skb) { struct net_device *dev = skb->dev; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct arphdr *arp; unsigned char *arp_ptr; struct rtable *rt; @@ -890,7 +899,6 @@ static int arp_process(struct sk_buff *skb) arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); } else { pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); - in_dev_put(in_dev); return 0; } goto out; @@ -936,8 +944,6 @@ static int arp_process(struct sk_buff *skb) } out: - if (in_dev) - in_dev_put(in_dev); consume_skb(skb); return 0; } @@ -1045,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, struct rtable * rt; if ((err = ip_route_output_key(net, &rt, &fl)) != 0) return err; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; @@ -1152,7 +1158,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, struct rtable * rt; if ((err = ip_route_output_key(net, &rt, &fl)) != 0) return err; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; @@ -1453,14 +1459,3 @@ static int __init arp_proc_init(void) } #endif /* CONFIG_PROC_FS */ - -EXPORT_SYMBOL(arp_broken_ops); -EXPORT_SYMBOL(arp_find); -EXPORT_SYMBOL(arp_create); -EXPORT_SYMBOL(arp_xmit); -EXPORT_SYMBOL(arp_send); -EXPORT_SYMBOL(arp_tbl); - -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) -EXPORT_SYMBOL(clip_tbl_hook); -#endif diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index fb2465811b48..f0550941df7b 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -69,9 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; - sk_dst_set(sk, &rt->u.dst); + sk_dst_set(sk, &rt->dst); return(0); } - EXPORT_SYMBOL(ip4_datagram_connect); - diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 382bc768ed56..da14c49284f4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } ip_mc_up(in_dev); /* fall through */ + case NETDEV_NOTIFY_PEERS: case NETDEV_CHANGEADDR: /* Send gratuitous ARP to notify of link change */ if (IN_DEV_ARP_NOTIFY(in_dev)) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4f0ed458c883..a43968918350 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -175,6 +175,7 @@ out: fib_res_put(&res); return dev; } +EXPORT_SYMBOL(ip_dev_find); /* * Find address type as if only "dev" was present in the system. If @@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr) { return __inet_dev_addr_type(net, NULL, addr); } +EXPORT_SYMBOL(inet_addr_type); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { return __inet_dev_addr_type(net, dev, addr); } +EXPORT_SYMBOL(inet_dev_addr_type); /* Given (packet source, input interface) and optional (dst, oif, tos): - (main) check, that source is valid i.e. not broadcast or our local @@ -284,7 +287,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (no_addr) goto last_resort; if (rpf == 1) - goto e_inval; + goto e_rpf; fl.oif = dev->ifindex; ret = 0; @@ -299,7 +302,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, last_resort: if (rpf) - goto e_inval; + goto e_rpf; *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); *itag = 0; return 0; @@ -308,6 +311,8 @@ e_inval_res: fib_res_put(&res); e_inval: return -EINVAL; +e_rpf: + return -EXDEV; } static inline __be32 sk_extract_addr(struct sockaddr *addr) @@ -1075,7 +1080,3 @@ void __init ip_fib_init(void) fib_hash_init(); } - -EXPORT_SYMBOL(inet_addr_type); -EXPORT_SYMBOL(inet_dev_addr_type); -EXPORT_SYMBOL(ip_dev_find); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d65e9215bcd7..a0d847c7cba5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = { .fatal = 1, }, }; +EXPORT_SYMBOL(icmp_err_convert); /* * ICMP control array. This specifies what to do with each ICMP. @@ -267,11 +268,12 @@ int xrlim_allow(struct dst_entry *dst, int timeout) dst->rate_tokens = token; return rc; } +EXPORT_SYMBOL(xrlim_allow); static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, int type, int code) { - struct dst_entry *dst = &rt->u.dst; + struct dst_entry *dst = &rt->dst; int rc = 1; if (type > NR_ICMP_TYPES) @@ -327,7 +329,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net((*rt)->u.dst.dev)); + sk = icmp_sk(dev_net((*rt)->dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -359,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); struct sock *sk; struct inet_sock *inet; __be32 daddr; @@ -427,7 +429,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; - net = dev_net(rt->u.dst.dev); + net = dev_net(rt->dst.dev); /* * Find the original header. It is expected to be valid, of course. @@ -596,9 +598,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, - RT_TOS(tos), rt2->u.dst.dev); + RT_TOS(tos), rt2->dst.dev); - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); skb_in->_skb_refdst = orefdst; /* restore old refdst */ } @@ -610,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) XFRM_LOOKUP_ICMP); switch (err) { case 0: - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = rt2; break; case -EPERM: @@ -629,7 +631,7 @@ route_done: /* RFC says return as much as we can without exceeding 576 bytes. */ - room = dst_mtu(&rt->u.dst); + room = dst_mtu(&rt->dst); if (room > 576) room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; @@ -647,6 +649,7 @@ out_unlock: icmp_xmit_unlock(sk); out:; } +EXPORT_SYMBOL(icmp_send); /* @@ -925,6 +928,7 @@ static void icmp_address(struct sk_buff *skb) /* * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain * loudly if an inconsistency is found. + * called with rcu_read_lock() */ static void icmp_address_reply(struct sk_buff *skb) @@ -935,12 +939,12 @@ static void icmp_address_reply(struct sk_buff *skb) struct in_ifaddr *ifa; if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) - goto out; + return; - in_dev = in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) - goto out; - rcu_read_lock(); + return; + if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { @@ -958,9 +962,6 @@ static void icmp_address_reply(struct sk_buff *skb) mp, dev->name, &rt->rt_src); } } - rcu_read_unlock(); - in_dev_put(in_dev); -out:; } static void icmp_discard(struct sk_buff *skb) @@ -974,7 +975,7 @@ int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -1216,7 +1217,3 @@ int __init icmp_init(void) { return register_pernet_subsys(&icmp_sk_ops); } - -EXPORT_SYMBOL(icmp_err_convert); -EXPORT_SYMBOL(icmp_send); -EXPORT_SYMBOL(xrlim_allow); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5fff865a4fa7..a1ad0e7180d2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) return NULL; } - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); skb->dev = dev; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = rt->rt_src; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(pip, &rt->u.dst, NULL); + ip_select_ident(pip, &rt->dst, NULL); ((u8*)&pip[1])[0] = IPOPT_RA; ((u8*)&pip[1])[1] = 4; ((u8*)&pip[1])[2] = 0; @@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = rt->rt_src; iph->protocol = IPPROTO_IGMP; - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); ((u8*)&iph[1])[0] = IPOPT_RA; ((u8*)&iph[1])[1] = 4; ((u8*)&iph[1])[2] = 0; @@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, read_unlock(&in_dev->mc_list_lock); } +/* called in rcu_read_lock() section */ int igmp_rcv(struct sk_buff *skb) { /* This basically follows the spec line by line -- see RFC1112 */ struct igmphdr *ih; - struct in_device *in_dev = in_dev_get(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); int len = skb->len; if (in_dev == NULL) goto drop; if (!pskb_may_pull(skb, sizeof(struct igmphdr))) - goto drop_ref; + goto drop; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; if (__skb_checksum_complete(skb)) - goto drop_ref; + goto drop; } ih = igmp_hdr(skb); @@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb) break; case IGMP_PIM: #ifdef CONFIG_IP_PIMSM_V1 - in_dev_put(in_dev); return pim_rcv_v1(skb); #endif case IGMPV3_HOST_MEMBERSHIP_REPORT: @@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb) break; } -drop_ref: - in_dev_put(in_dev); drop: kfree_skb(skb); return 0; @@ -1246,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) out: return; } +EXPORT_SYMBOL(ip_mc_inc_group); /* * Resend IGMP JOIN report; used for bonding. @@ -1268,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im) igmp_ifc_event(in_dev); #endif } +EXPORT_SYMBOL(ip_mc_rejoin_group); /* * A socket has left a multicast group on device dev @@ -1298,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) } } } +EXPORT_SYMBOL(ip_mc_dec_group); /* Device changing type */ @@ -1427,7 +1428,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) } if (!dev && !ip_route_output_key(net, &rt, &fl)) { - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); } if (dev) { @@ -1646,8 +1647,7 @@ static int sf_setstate(struct ip_mc_list *pmc) if (dpsf->sf_inaddr == psf->sf_inaddr) break; if (!dpsf) { - dpsf = (struct ip_sf_list *) - kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); if (!dpsf) continue; *dpsf = *psf; @@ -1807,6 +1807,7 @@ done: rtnl_unlock(); return err; } +EXPORT_SYMBOL(ip_mc_join_group); static void ip_sf_socklist_reclaim(struct rcu_head *rp) { @@ -2679,8 +2680,3 @@ int __init igmp_mc_proc_init(void) return register_pernet_subsys(&igmp_net_ops); } #endif - -EXPORT_SYMBOL(ip_mc_dec_group); -EXPORT_SYMBOL(ip_mc_inc_group); -EXPORT_SYMBOL(ip_mc_join_group); -EXPORT_SYMBOL(ip_mc_rejoin_group); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 70eb3507c406..7174370b1195 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -84,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk, } return node != NULL; } - EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); /* Obtain a reference to a local port for the given sock, @@ -212,7 +211,6 @@ fail: local_bh_enable(); return ret; } - EXPORT_SYMBOL_GPL(inet_csk_get_port); /* @@ -305,7 +303,6 @@ out_err: *err = error; goto out; } - EXPORT_SYMBOL(inet_csk_accept); /* @@ -327,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk, setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); icsk->icsk_pending = icsk->icsk_ack.pending = 0; } - EXPORT_SYMBOL(inet_csk_init_xmit_timers); void inet_csk_clear_xmit_timers(struct sock *sk) @@ -340,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk) sk_stop_timer(sk, &icsk->icsk_delack_timer); sk_stop_timer(sk, &sk->sk_timer); } - EXPORT_SYMBOL(inet_csk_clear_xmit_timers); void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } - EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) { sk_reset_timer(sk, &sk->sk_timer, jiffies + len); } - EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, @@ -383,7 +376,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; - return &rt->u.dst; + return &rt->dst; route_err: ip_rt_put(rt); @@ -391,7 +384,6 @@ no_route: IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } - EXPORT_SYMBOL_GPL(inet_csk_route_req); static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, @@ -433,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, return req; } - EXPORT_SYMBOL_GPL(inet_csk_search_req); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, @@ -447,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); inet_csk_reqsk_queue_added(sk, timeout); } +EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); /* Only thing we need from tcp.h */ extern int sysctl_tcp_synack_retries; -EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); /* Decide when to expire the request and when to resend SYN-ACK */ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, @@ -569,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, if (lopt->qlen) inet_csk_reset_keepalive_timer(parent, interval); } - EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, @@ -599,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, } return newsk; } - EXPORT_SYMBOL_GPL(inet_csk_clone); /* @@ -630,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk) percpu_counter_dec(sk->sk_prot->orphan_count); sock_put(sk); } - EXPORT_SYMBOL(inet_csk_destroy_sock); int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) @@ -665,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) __reqsk_queue_destroy(&icsk->icsk_accept_queue); return -EADDRINUSE; } - EXPORT_SYMBOL_GPL(inet_csk_listen_start); /* @@ -720,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk) } WARN_ON(sk->sk_ack_backlog); } - EXPORT_SYMBOL_GPL(inet_csk_listen_stop); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) @@ -732,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) sin->sin_addr.s_addr = inet->inet_daddr; sin->sin_port = inet->inet_dport; } - EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); #ifdef CONFIG_COMPAT @@ -747,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, return icsk->icsk_af_ops->getsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, @@ -761,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); #endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a2ca6aed763b..5ff2a51b6d0c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) fq->last_in |= INET_FRAG_COMPLETE; } } - EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d3e160a88219..fb7ad5a21ff3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk) __inet_put_port(sk); local_bh_enable(); } - EXPORT_SYMBOL(inet_put_port); void __inet_inherit_port(struct sock *sk, struct sock *child) @@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); } - EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, @@ -546,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), __inet_check_established, __inet_hash_nolisten); } - EXPORT_SYMBOL_GPL(inet_hash_connect); void inet_hashinfo_init(struct inet_hashinfo *h) @@ -560,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h) i + LISTENING_NULLS_BASE); } } - EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 6bcfe52a9c87..9ffa24b9a804 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -51,8 +51,8 @@ * lookups performed with disabled BHs. * * Serialisation issues. - * 1. Nodes may appear in the tree only with the pool write lock held. - * 2. Nodes may disappear from the tree only with the pool write lock held + * 1. Nodes may appear in the tree only with the pool lock held. + * 2. Nodes may disappear from the tree only with the pool lock held * AND reference count being 0. * 3. Nodes appears and disappears from unused node list only under * "inet_peer_unused_lock". @@ -64,23 +64,31 @@ * usually under some other lock to prevent node disappearing * dtime: unused node list lock * v4daddr: unchangeable - * ip_id_count: idlock + * ip_id_count: atomic value (no lock needed) */ static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height -static struct inet_peer peer_fake_node = { - .avl_left = &peer_fake_node, - .avl_right = &peer_fake_node, + +#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) +static const struct inet_peer peer_fake_node = { + .avl_left = peer_avl_empty, + .avl_right = peer_avl_empty, .avl_height = 0 }; -#define peer_avl_empty (&peer_fake_node) -static struct inet_peer *peer_root = peer_avl_empty; -static DEFINE_RWLOCK(peer_pool_lock); + +static struct { + struct inet_peer *root; + spinlock_t lock; + int total; +} peers = { + .root = peer_avl_empty, + .lock = __SPIN_LOCK_UNLOCKED(peers.lock), + .total = 0, +}; #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static int peer_total; /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more * aggressively at this stage */ @@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min int inet_peer_gc_mintime __read_mostly = 10 * HZ; int inet_peer_gc_maxtime __read_mostly = 120 * HZ; -static LIST_HEAD(unused_peers); -static DEFINE_SPINLOCK(inet_peer_unused_lock); +static struct { + struct list_head list; + spinlock_t lock; +} unused_peers = { + .list = LIST_HEAD_INIT(unused_peers.list), + .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), +}; static void peer_check_expire(unsigned long dummy); static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); @@ -116,7 +129,7 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* All the timers, started at system startup tend @@ -131,38 +144,69 @@ void __init inet_initpeers(void) /* Called with or without local BH being disabled. */ static void unlink_from_unused(struct inet_peer *p) { - spin_lock_bh(&inet_peer_unused_lock); - list_del_init(&p->unused); - spin_unlock_bh(&inet_peer_unused_lock); + if (!list_empty(&p->unused)) { + spin_lock_bh(&unused_peers.lock); + list_del_init(&p->unused); + spin_unlock_bh(&unused_peers.lock); + } } /* * Called with local BH disabled and the pool lock held. - * _stack is known to be NULL or not at compile time, - * so compiler will optimize the if (_stack) tests. */ #define lookup(_daddr, _stack) \ ({ \ struct inet_peer *u, **v; \ - if (_stack != NULL) { \ - stackptr = _stack; \ - *stackptr++ = &peer_root; \ - } \ - for (u = peer_root; u != peer_avl_empty; ) { \ + \ + stackptr = _stack; \ + *stackptr++ = &peers.root; \ + for (u = peers.root; u != peer_avl_empty; ) { \ if (_daddr == u->v4daddr) \ break; \ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ - if (_stack != NULL) \ - *stackptr++ = v; \ + *stackptr++ = v; \ u = *v; \ } \ u; \ }) -/* Called with local BH disabled and the pool write lock held. */ +/* + * Called with rcu_read_lock_bh() + * Because we hold no lock against a writer, its quite possible we fall + * in an endless loop. + * But every pointer we follow is guaranteed to be valid thanks to RCU. + * We exit from this function if number of links exceeds PEER_MAXDEPTH + */ +static struct inet_peer *lookup_rcu_bh(__be32 daddr) +{ + struct inet_peer *u = rcu_dereference_bh(peers.root); + int count = 0; + + while (u != peer_avl_empty) { + if (daddr == u->v4daddr) { + /* Before taking a reference, check if this entry was + * deleted, unlink_from_pool() sets refcnt=-1 to make + * distinction between an unused entry (refcnt=0) and + * a freed one. + */ + if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1))) + u = NULL; + return u; + } + if ((__force __u32)daddr < (__force __u32)u->v4daddr) + u = rcu_dereference_bh(u->avl_left); + else + u = rcu_dereference_bh(u->avl_right); + if (unlikely(++count == PEER_MAXDEPTH)) + break; + } + return NULL; +} + +/* Called with local BH disabled and the pool lock held. */ #define lookup_rightempty(start) \ ({ \ struct inet_peer *u, **v; \ @@ -176,9 +220,10 @@ static void unlink_from_unused(struct inet_peer *p) u; \ }) -/* Called with local BH disabled and the pool write lock held. +/* Called with local BH disabled and the pool lock held. * Variable names are the proof of operation correctness. - * Look into mm/map_avl.c for more detail description of the ideas. */ + * Look into mm/map_avl.c for more detail description of the ideas. + */ static void peer_avl_rebalance(struct inet_peer **stack[], struct inet_peer ***stackend) { @@ -254,15 +299,21 @@ static void peer_avl_rebalance(struct inet_peer **stack[], } } -/* Called with local BH disabled and the pool write lock held. */ +/* Called with local BH disabled and the pool lock held. */ #define link_to_pool(n) \ do { \ n->avl_height = 1; \ n->avl_left = peer_avl_empty; \ n->avl_right = peer_avl_empty; \ + smp_wmb(); /* lockless readers can catch us now */ \ **--stackptr = n; \ peer_avl_rebalance(stack, stackptr); \ -} while(0) +} while (0) + +static void inetpeer_free_rcu(struct rcu_head *head) +{ + kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); +} /* May be called with local BH enabled. */ static void unlink_from_pool(struct inet_peer *p) @@ -271,13 +322,14 @@ static void unlink_from_pool(struct inet_peer *p) do_free = 0; - write_lock_bh(&peer_pool_lock); + spin_lock_bh(&peers.lock); /* Check the reference counter. It was artificially incremented by 1 - * in cleanup() function to prevent sudden disappearing. If the - * reference count is still 1 then the node is referenced only as `p' - * here and from the pool. So under the exclusive pool lock it's safe - * to remove the node and free it later. */ - if (atomic_read(&p->refcnt) == 1) { + * in cleanup() function to prevent sudden disappearing. If we can + * atomically (because of lockless readers) take this last reference, + * it's safe to remove the node and free it later. + * We use refcnt=-1 to alert lockless readers this entry is deleted. + */ + if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { struct inet_peer **stack[PEER_MAXDEPTH]; struct inet_peer ***stackptr, ***delp; if (lookup(p->v4daddr, stack) != p) @@ -303,20 +355,21 @@ static void unlink_from_pool(struct inet_peer *p) delp[1] = &t->avl_left; /* was &p->avl_left */ } peer_avl_rebalance(stack, stackptr); - peer_total--; + peers.total--; do_free = 1; } - write_unlock_bh(&peer_pool_lock); + spin_unlock_bh(&peers.lock); if (do_free) - kmem_cache_free(peer_cachep, p); + call_rcu_bh(&p->rcu, inetpeer_free_rcu); else /* The node is used again. Decrease the reference counter * back. The loop "cleanup -> unlink_from_unused * -> unlink_from_pool -> putpeer -> link_to_unused * -> cleanup (for the same node)" * doesn't really exist because the entry will have a - * recent deletion time and will not be cleaned again soon. */ + * recent deletion time and will not be cleaned again soon. + */ inet_putpeer(p); } @@ -326,16 +379,16 @@ static int cleanup_once(unsigned long ttl) struct inet_peer *p = NULL; /* Remove the first entry from the list of unused nodes. */ - spin_lock_bh(&inet_peer_unused_lock); - if (!list_empty(&unused_peers)) { + spin_lock_bh(&unused_peers.lock); + if (!list_empty(&unused_peers.list)) { __u32 delta; - p = list_first_entry(&unused_peers, struct inet_peer, unused); + p = list_first_entry(&unused_peers.list, struct inet_peer, unused); delta = (__u32)jiffies - p->dtime; if (delta < ttl) { /* Do not prune fresh entries. */ - spin_unlock_bh(&inet_peer_unused_lock); + spin_unlock_bh(&unused_peers.lock); return -1; } @@ -345,7 +398,7 @@ static int cleanup_once(unsigned long ttl) * before unlink_from_pool() call. */ atomic_inc(&p->refcnt); } - spin_unlock_bh(&inet_peer_unused_lock); + spin_unlock_bh(&unused_peers.lock); if (p == NULL) /* It means that the total number of USED entries has @@ -360,62 +413,56 @@ static int cleanup_once(unsigned long ttl) /* Called with or without local BH being disabled. */ struct inet_peer *inet_getpeer(__be32 daddr, int create) { - struct inet_peer *p, *n; + struct inet_peer *p; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; - /* Look up for the address quickly. */ - read_lock_bh(&peer_pool_lock); - p = lookup(daddr, NULL); - if (p != peer_avl_empty) - atomic_inc(&p->refcnt); - read_unlock_bh(&peer_pool_lock); + /* Look up for the address quickly, lockless. + * Because of a concurrent writer, we might not find an existing entry. + */ + rcu_read_lock_bh(); + p = lookup_rcu_bh(daddr); + rcu_read_unlock_bh(); + + if (p) { + /* The existing node has been found. + * Remove the entry from unused list if it was there. + */ + unlink_from_unused(p); + return p; + } + /* retry an exact lookup, taking the lock before. + * At least, nodes should be hot in our cache. + */ + spin_lock_bh(&peers.lock); + p = lookup(daddr, stack); if (p != peer_avl_empty) { - /* The existing node has been found. */ + atomic_inc(&p->refcnt); + spin_unlock_bh(&peers.lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); return p; } + p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; + if (p) { + p->v4daddr = daddr; + atomic_set(&p->refcnt, 1); + atomic_set(&p->rid, 0); + atomic_set(&p->ip_id_count, secure_ip_id(daddr)); + p->tcp_ts_stamp = 0; + INIT_LIST_HEAD(&p->unused); + + + /* Link the node. */ + link_to_pool(p); + peers.total++; + } + spin_unlock_bh(&peers.lock); - if (!create) - return NULL; - - /* Allocate the space outside the locked region. */ - n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); - if (n == NULL) - return NULL; - n->v4daddr = daddr; - atomic_set(&n->refcnt, 1); - atomic_set(&n->rid, 0); - atomic_set(&n->ip_id_count, secure_ip_id(daddr)); - n->tcp_ts_stamp = 0; - - write_lock_bh(&peer_pool_lock); - /* Check if an entry has suddenly appeared. */ - p = lookup(daddr, stack); - if (p != peer_avl_empty) - goto out_free; - - /* Link the node. */ - link_to_pool(n); - INIT_LIST_HEAD(&n->unused); - peer_total++; - write_unlock_bh(&peer_pool_lock); - - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ cleanup_once(0); - return n; - -out_free: - /* The appropriate node is already in the pool. */ - atomic_inc(&p->refcnt); - write_unlock_bh(&peer_pool_lock); - /* Remove the entry from unused list if it was there. */ - unlink_from_unused(p); - /* Free preallocated the preallocated node. */ - kmem_cache_free(peer_cachep, n); return p; } @@ -425,12 +472,12 @@ static void peer_check_expire(unsigned long dummy) unsigned long now = jiffies; int ttl; - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) ttl = inet_peer_minttl; else ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * - peer_total / inet_peer_threshold * HZ; + peers.total / inet_peer_threshold * HZ; while (!cleanup_once(ttl)) { if (jiffies != now) break; @@ -439,22 +486,25 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; else peer_periodic_timer.expires = jiffies + inet_peer_gc_maxtime - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; + peers.total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } void inet_putpeer(struct inet_peer *p) { - spin_lock_bh(&inet_peer_unused_lock); - if (atomic_dec_and_test(&p->refcnt)) { - list_add_tail(&p->unused, &unused_peers); + local_bh_disable(); + + if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { + list_add_tail(&p->unused, &unused_peers.list); p->dtime = (__u32)jiffies; + spin_unlock(&unused_peers.lock); } - spin_unlock_bh(&inet_peer_unused_lock); + + local_bh_enable(); } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 56cdf68a074c..99461f09320f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; - if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && + if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { - IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dst_mtu(&rt->u.dst))); + htonl(dst_mtu(&rt->dst))); goto drop; } /* We are about to mangle packet. Copy it! */ - if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) + if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len)) goto drop; iph = ip_hdr(skb); @@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, - rt->u.dst.dev, ip_forward_finish); + rt->dst.dev, ip_forward_finish); sr_failed: /* diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 75347ea70ea0..b7c41654dde5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a) } /* Memory Tracking Functions. */ -static __inline__ void frag_kfree_skb(struct netns_frags *nf, - struct sk_buff *skb, int *work) +static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) { - if (work) - *work -= skb->truesize; atomic_sub(skb->truesize, &nf->mem); kfree_skb(skb); } @@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp) fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(qp->q.net, fp, NULL); + frag_kfree_skb(qp->q.net, fp); fp = xp; } while (fp); @@ -317,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.len = 0; qp->q.meat = 0; qp->q.fragments = NULL; + qp->q.fragments_tail = NULL; qp->iif = 0; return 0; @@ -389,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = qp->q.fragments_tail; + if (!prev || FRAG_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for (next = qp->q.fragments; next != NULL; next = next->next) { if (FRAG_CB(next)->offset >= offset) @@ -396,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) prev = next; } +found: /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. @@ -446,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->q.fragments = next; qp->q.meat -= free_it->len; - frag_kfree_skb(qp->q.net, free_it, NULL); + frag_kfree_skb(qp->q.net, free_it); } } @@ -454,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + qp->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -507,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_nomem; fp->next = head->next; + if (!fp->next) + qp->q.fragments_tail = fp; prev->next = fp; skb_morph(head, qp->q.fragments); @@ -556,7 +564,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &qp->q.net->mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -566,8 +573,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &qp->q.net->mem); } + atomic_sub(head->truesize, &qp->q.net->mem); head->next = NULL; head->dev = dev; @@ -578,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->tot_len = htons(len); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; + qp->q.fragments_tail = NULL; return 0; out_nomem: @@ -624,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) kfree_skb(skb); return -ENOMEM; } +EXPORT_SYMBOL(ip_defrag); #ifdef CONFIG_SYSCTL static int zero; @@ -777,5 +786,3 @@ void __init ipfrag_init(void) ip4_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&ip4_frags); } - -EXPORT_SYMBOL(ip_defrag); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 32618e11076d..945b20a5ad50 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -731,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tos = 0; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); } { @@ -745,7 +747,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -755,7 +757,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev df = tiph->frag_off; if (df) - mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; + mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -803,7 +805,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tunnel->err_count = 0; } - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { @@ -830,7 +832,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -853,7 +855,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else - iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); + iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); } ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; @@ -915,7 +917,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_GRE }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } @@ -1174,7 +1176,7 @@ static int ipgre_open(struct net_device *dev) struct rtable *rt; if (ip_route_output_key(dev_net(dev), &rt, &fl)) return -EADDRNOTAVAIL; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d930dc5e4d85..d859bcc26cb7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -146,7 +146,7 @@ #include <linux/netlink.h> /* - * Process Router Attention IP option + * Process Router Attention IP option (RFC 2113) */ int ip_call_ra_chain(struct sk_buff *skb) { @@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb) struct sock *last = NULL; struct net_device *dev = skb->dev; - read_lock(&ip_ra_lock); - for (ra = ip_ra_chain; ra; ra = ra->next) { + for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report @@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb) sk->sk_bound_dev_if == dev->ifindex) && net_eq(sock_net(sk), dev_net(dev))) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { - read_unlock(&ip_ra_lock); + if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) return 1; - } } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb) if (last) { raw_rcv(last, skb); - read_unlock(&ip_ra_lock); return 1; } - read_unlock(&ip_ra_lock); return 0; } @@ -298,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb) } if (unlikely(opt->srr)) { - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); + if (in_dev) { if (!IN_DEV_SOURCE_ROUTE(in_dev)) { if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) printk(KERN_INFO "source route option %pI4 -> %pI4\n", &iph->saddr, &iph->daddr); - in_dev_put(in_dev); goto drop; } - - in_dev_put(in_dev); } if (ip_options_rcv_srr(skb)) @@ -340,13 +333,16 @@ static int ip_rcv_finish(struct sk_buff *skb) else if (err == -ENETUNREACH) IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_INNOROUTES); + else if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); goto drop; } } #ifdef CONFIG_NET_CLS_ROUTE if (unlikely(skb_dst(skb)->tclassid)) { - struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); + struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; @@ -360,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) - IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, skb->len); return dst_input(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9a4a6c96cb0d..04b69896df5f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph) iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } +EXPORT_SYMBOL(ip_send_check); int __ip_local_out(struct sk_buff *skb) { @@ -151,15 +152,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->version = 4; iph->ihl = 5; iph->tos = inet->tos; - if (ip_dont_fragment(sk, &rt->u.dst)) + if (ip_dont_fragment(sk, &rt->dst)) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; - iph->ttl = ip_select_ttl(inet, &rt->u.dst); + iph->ttl = ip_select_ttl(inet, &rt->dst); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->protocol = sk->sk_protocol; - ip_select_ident(iph, &rt->u.dst, sk); + ip_select_ident(iph, &rt->dst, sk); if (opt && opt->optlen) { iph->ihl += opt->optlen>>2; @@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, /* Send it out. */ return ip_local_out(skb); } - EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static inline int ip_finish_output2(struct sk_buff *skb) @@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb) { struct sock *sk = skb->sk; struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->u.dst.dev; + struct net_device *dev = rt->dst.dev; /* * If the indicated interface is up and running, send the packet. @@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb) if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) goto no_route; } - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); } - skb_dst_set_noref(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); packet_routed: if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) @@ -372,11 +372,11 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df) + if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; - iph->ttl = ip_select_ttl(inet, &rt->u.dst); + iph->ttl = ip_select_ttl(inet, &rt->dst); iph->protocol = sk->sk_protocol; iph->saddr = rt->rt_src; iph->daddr = rt->rt_dst; @@ -387,7 +387,7 @@ packet_routed: ip_options_build(skb, opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(iph, &rt->u.dst, sk, + ip_select_ident_more(iph, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; @@ -403,6 +403,7 @@ no_route: kfree_skb(skb); return -EHOSTUNREACH; } +EXPORT_SYMBOL(ip_queue_xmit); static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) @@ -411,7 +412,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->priority = from->priority; to->protocol = from->protocol; skb_dst_drop(to); - skb_dst_set(to, dst_clone(skb_dst(from))); + skb_dst_copy(to, from); to->dev = from->dev; to->mark = from->mark; @@ -442,17 +443,16 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct iphdr *iph; - int raw = 0; int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs, pad; + unsigned int mtu, hlen, left, len, ll_rs; int offset; __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); int err = 0; - dev = rt->u.dst.dev; + dev = rt->dst.dev; /* * Point into the IP datagram header. @@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) */ hlen = iph->ihl * 4; - mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ + mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) mtu -= nf_bridge_mtu_reduction(skb); @@ -580,14 +580,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) slow_path: left = skb->len - hlen; /* Space per frame */ - ptr = raw + hlen; /* Where to start from */ + ptr = hlen; /* Where to start from */ /* for bridged IP traffic encapsulated inside f.e. a vlan header, * we need to make room for the encapsulating header */ - pad = nf_bridge_pad(skb); - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); - mtu -= pad; + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); /* * Fragment the datagram. @@ -697,7 +695,6 @@ fail: IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } - EXPORT_SYMBOL(ip_fragment); int @@ -716,6 +713,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk } return 0; } +EXPORT_SYMBOL(ip_generic_getfrag); static inline __wsum csum_page(struct page *page, int offset, int copy) @@ -833,13 +831,13 @@ int ip_append_data(struct sock *sk, */ *rtp = NULL; inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->u.dst.dev->mtu : - dst_mtu(rt->u.dst.path); - inet->cork.dst = &rt->u.dst; + rt->dst.dev->mtu : + dst_mtu(rt->dst.path); + inet->cork.dst = &rt->dst; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - if ((exthdrlen = rt->u.dst.header_len) != 0) { + if ((exthdrlen = rt->dst.header_len) != 0) { length += exthdrlen; transhdrlen += exthdrlen; } @@ -852,7 +850,7 @@ int ip_append_data(struct sock *sk, exthdrlen = 0; mtu = inet->cork.fragsize; } - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; @@ -869,14 +867,16 @@ int ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->u.dst.dev->features & NETIF_F_V4_CSUM && + rt->dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen) csummode = CHECKSUM_PARTIAL; + skb = skb_peek_tail(&sk->sk_write_queue); + inet->cork.length += length; - if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && + if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); @@ -892,7 +892,7 @@ int ip_append_data(struct sock *sk, * adding appropriate IP header. */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { @@ -924,7 +924,7 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && - !(rt->u.dst.dev->features&NETIF_F_SG)) + !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -935,7 +935,7 @@ alloc_new_skb: * the last. */ if (datalen == length + fraggap) - alloclen += rt->u.dst.trailer_len; + alloclen += rt->dst.trailer_len; if (transhdrlen) { skb = sock_alloc_send_skb(sk, @@ -1008,7 +1008,7 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; @@ -1103,10 +1103,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) + if (!(rt->dst.dev->features&NETIF_F_SG)) return -EOPNOTSUPP; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); mtu = inet->cork.fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); @@ -1121,8 +1121,9 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, return -EINVAL; inet->cork.length += size; - if ((sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + if ((size + skb->len > mtu) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } @@ -1274,8 +1275,8 @@ int ip_push_pending_frames(struct sock *sk) * If local_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc >= IP_PMTUDISC_DO || - (skb->len <= dst_mtu(&rt->u.dst) && - ip_dont_fragment(sk, &rt->u.dst))) + (skb->len <= dst_mtu(&rt->dst) && + ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); if (inet->cork.flags & IPCORK_OPT) @@ -1284,7 +1285,7 @@ int ip_push_pending_frames(struct sock *sk) if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; else - ttl = ip_select_ttl(inet, &rt->u.dst); + ttl = ip_select_ttl(inet, &rt->dst); iph = (struct iphdr *)skb->data; iph->version = 4; @@ -1295,7 +1296,7 @@ int ip_push_pending_frames(struct sock *sk) } iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->u.dst, sk); + ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; iph->saddr = rt->rt_src; @@ -1308,7 +1309,7 @@ int ip_push_pending_frames(struct sock *sk) * on dst refcount */ inet->cork.dst = NULL; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) @@ -1445,7 +1446,3 @@ void __init ip_init(void) igmp_mc_proc_init(); #endif } - -EXPORT_SYMBOL(ip_generic_getfrag); -EXPORT_SYMBOL(ip_queue_xmit); -EXPORT_SYMBOL(ip_send_check); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ce231780a2b1..6c40a8c46e79 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -239,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) sent to multicast group to reach destination designated router. */ struct ip_ra_chain *ip_ra_chain; -DEFINE_RWLOCK(ip_ra_lock); +static DEFINE_SPINLOCK(ip_ra_lock); + + +static void ip_ra_destroy_rcu(struct rcu_head *head) +{ + struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); + + sock_put(ra->saved_sk); + kfree(ra); +} int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)) @@ -251,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on, new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - write_lock_bh(&ip_ra_lock); + spin_lock_bh(&ip_ra_lock); for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); kfree(new_ra); return -EADDRINUSE; } - *rap = ra->next; - write_unlock_bh(&ip_ra_lock); + /* dont let ip_call_ra_chain() use sk again */ + ra->sk = NULL; + rcu_assign_pointer(*rap, ra->next); + spin_unlock_bh(&ip_ra_lock); if (ra->destructor) ra->destructor(sk); - sock_put(sk); - kfree(ra); + /* + * Delay sock_put(sk) and kfree(ra) after one rcu grace + * period. This guarantee ip_call_ra_chain() dont need + * to mess with socket refcounts. + */ + ra->saved_sk = sk; + call_rcu(&ra->rcu, ip_ra_destroy_rcu); return 0; } } if (new_ra == NULL) { - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); return -ENOBUFS; } new_ra->sk = sk; new_ra->destructor = destructor; new_ra->next = ra; - *rap = new_ra; + rcu_assign_pointer(*rap, new_ra); sock_hold(sk); - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); return 0; } @@ -449,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | - (1<<IP_MINTTL))) || + (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) || optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_ALL || optname == IP_MULTICAST_LOOP || @@ -572,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, } inet->hdrincl = val ? 1 : 0; break; + case IP_NODEFRAG: + if (sk->sk_type != SOCK_RAW) { + err = -ENOPROTOOPT; + break; + } + inet->nodefrag = val ? 1 : 0; + break; case IP_MTU_DISCOVER: if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) goto e_inval; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b9d84e800cf4..3a6e1ec5e9ae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options) memcpy(e, ic_req_params, sizeof(ic_req_params)); e += sizeof(ic_req_params); + if (ic_host_name_set) { + *e++ = 12; /* host-name */ + len = strlen(utsname()->nodename); + *e++ = len; + memcpy(e, utsname()->nodename, len); + e += len; + } if (*vendor_class_identifier) { printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", vendor_class_identifier); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7fd636711037..ec036731a70b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error_icmp; } } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) df |= old_iph->frag_off & htons(IP_DF); if (df) { - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { stats->collisions++; @@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_IPIP }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 757f25eb9b4b..179fcab866fc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -442,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) int err; err = ipmr_fib_lookup(net, &fl, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -1553,9 +1555,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - dev = rt->u.dst.dev; + dev = rt->dst.dev; - if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { + if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not allow to send ICMP, so that packets will disappear to blackhole. @@ -1566,7 +1568,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; + encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; if (skb_cow(skb, encap)) { ip_rt_put(rt); @@ -1577,7 +1579,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->bytes_out += skb->len; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_decrease_ttl(ip_hdr(skb)); /* FIXME: forward and output firewalls used to be called here. @@ -1728,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb) goto dont_forward; err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } if (!local) { if (IPCB(skb)->opt.router_alert) { diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 07de855e2175..d88a46c54fd1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* Drop old route. */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) orefdst = skb->_skb_refdst; if (ip_route_input(skb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); + RT_TOS(iph->tos), rt->dst.dev) != 0) { + dst_release(&rt->dst); return -1; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); refdst_drop(orefdst); } @@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, skb->len - dataoff, 0); skb->ip_summed = CHECKSUM_NONE; - csum = __skb_checksum_complete_head(skb, dataoff + len); - if (!csum) - skb->ip_summed = CHECKSUM_UNNECESSARY; + return __skb_checksum_complete_head(skb, dataoff + len); } return csum; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1ac01b128621..6bccba31d132 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { const struct arpt_entry_target *t; - int hdr_len; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { e = arpt_next_entry(e); continue; } - hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * skb->dev->addr_len); - ADD_COUNTER(e->counters, hdr_len, 1); + ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); @@ -713,7 +710,7 @@ static void get_counters(const struct xt_table_info *t, struct arpt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -723,14 +720,16 @@ static void get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -744,7 +743,7 @@ static void get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -758,7 +757,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) * about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vmalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1005,8 +1004,7 @@ static int __do_replace(struct net *net, const char *name, struct arpt_entry *iter; ret = 0; - counters = vmalloc_node(num_counters * sizeof(struct xt_counters), - numa_node_id()); + counters = vmalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; @@ -1159,7 +1157,7 @@ static int do_add_counters(struct net *net, const void __user *user, if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len - size, numa_node_id()); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index a4e5fc5df4bf..d2c1311cb28d 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; -static DEFINE_RWLOCK(queue_lock); +static DEFINE_SPINLOCK(queue_lock); static int peer_pid __read_mostly; static unsigned int copy_range __read_mostly; static unsigned int queue_total; @@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range) break; case IPQ_COPY_PACKET: - copy_mode = mode; + if (range > 0xFFFF) + range = 0xFFFF; copy_range = range; - if (copy_range > 0xFFFF) - copy_range = 0xFFFF; + copy_mode = mode; break; default: @@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id) { struct nf_queue_entry *entry = NULL, *i; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); list_for_each_entry(i, &queue_list, list) { if ((unsigned long)i == id) { @@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id) queue_total--; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return entry; } @@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) static void ipq_flush(ipq_cmpfn cmpfn, unsigned long data) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); __ipq_flush(cmpfn, data); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } static struct sk_buff * @@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) struct nlmsghdr *nlh; struct timeval tv; - read_lock_bh(&queue_lock); - - switch (copy_mode) { + switch (ACCESS_ONCE(copy_mode)) { case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); @@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_PACKET: if (entry->skb->ip_summed == CHECKSUM_PARTIAL && - (*errp = skb_checksum_help(entry->skb))) { - read_unlock_bh(&queue_lock); + (*errp = skb_checksum_help(entry->skb))) return NULL; - } - if (copy_range == 0 || copy_range > entry->skb->len) + + data_len = ACCESS_ONCE(copy_range); + if (data_len == 0 || data_len > entry->skb->len) data_len = entry->skb->len; - else - data_len = copy_range; size = NLMSG_SPACE(sizeof(*pmsg) + data_len); break; default: *errp = -EINVAL; - read_unlock_bh(&queue_lock); return NULL; } - read_unlock_bh(&queue_lock); - skb = alloc_skb(size, GFP_ATOMIC); if (!skb) goto nlmsg_failure; @@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) if (nskb == NULL) return status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (!peer_pid) goto err_out_free_nskb; @@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) __ipq_enqueue_entry(entry); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; err_out_free_nskb: kfree_skb(nskb); err_out_unlock: - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range) { int status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); status = __ipq_set_mode(mode, range); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb) if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (peer_pid) { if (peer_pid != pid) { - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); RCV_SKB_FAIL(-EBUSY); } } else { @@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb) peer_pid = pid; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); @@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this, struct netlink_notify *n = ptr; if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } return NOTIFY_DONE; } @@ -527,7 +520,7 @@ static ctl_table ipq_table[] = { #ifdef CONFIG_PROC_FS static int ip_queue_show(struct seq_file *m, void *v) { - read_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); seq_printf(m, "Peer PID : %d\n" @@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v) queue_dropped, queue_user_dropped); - read_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return 0; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4b6c5ca610fc..c439721b165a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + ADD_COUNTER(e->counters, skb->len, 1); t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t, struct ipt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -928,7 +930,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vmalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1352,7 +1354,7 @@ do_add_counters(struct net *net, const void __user *user, if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len - size, numa_node_id()); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f91c94b9a790..3a43cf36db87 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -53,12 +53,13 @@ struct clusterip_config { #endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ + struct rcu_head rcu; }; static LIST_HEAD(clusterip_configs); /* clusterip_lock protects the clusterip_configs list */ -static DEFINE_RWLOCK(clusterip_lock); +static DEFINE_SPINLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS static const struct file_operations clusterip_proc_fops; @@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c) atomic_inc(&c->refcount); } + +static void clusterip_config_rcu_free(struct rcu_head *head) +{ + kfree(container_of(head, struct clusterip_config, rcu)); +} + static inline void clusterip_config_put(struct clusterip_config *c) { if (atomic_dec_and_test(&c->refcount)) - kfree(c); + call_rcu_bh(&c->rcu, clusterip_config_rcu_free); } /* decrease the count of entries using/referencing this config. If last @@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c) static inline void clusterip_config_entry_put(struct clusterip_config *c) { - write_lock_bh(&clusterip_lock); - if (atomic_dec_and_test(&c->entries)) { - list_del(&c->list); - write_unlock_bh(&clusterip_lock); + local_bh_disable(); + if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { + list_del_rcu(&c->list); + spin_unlock(&clusterip_lock); + local_bh_enable(); dev_mc_del(c->dev, c->clustermac); dev_put(c->dev); @@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c) #endif return; } - write_unlock_bh(&clusterip_lock); + local_bh_enable(); } static struct clusterip_config * @@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip) { struct clusterip_config *c; - list_for_each_entry(c, &clusterip_configs, list) { + list_for_each_entry_rcu(c, &clusterip_configs, list) { if (c->clusterip == clusterip) return c; } @@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry) { struct clusterip_config *c; - read_lock_bh(&clusterip_lock); + rcu_read_lock_bh(); c = __clusterip_config_find(clusterip); - if (!c) { - read_unlock_bh(&clusterip_lock); - return NULL; + if (c) { + if (unlikely(!atomic_inc_not_zero(&c->refcount))) + c = NULL; + else if (entry) + atomic_inc(&c->entries); } - atomic_inc(&c->refcount); - if (entry) - atomic_inc(&c->entries); - read_unlock_bh(&clusterip_lock); + rcu_read_unlock_bh(); return c; } @@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, } #endif - write_lock_bh(&clusterip_lock); - list_add(&c->list, &clusterip_configs); - write_unlock_bh(&clusterip_lock); + spin_lock_bh(&clusterip_lock); + list_add_rcu(&c->list, &clusterip_configs); + spin_unlock_bh(&clusterip_lock); return c; } @@ -462,7 +469,7 @@ struct arp_payload { __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; __be32 dst_ip; -} __attribute__ ((packed)); +} __packed; #ifdef DEBUG static void arp_print(struct arp_payload *payload) @@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void) #endif nf_unregister_hook(&cip_arp_ops); xt_unregister_target(&clusterip_tg_reg); + + /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ + rcu_barrier_bh(); } module_init(clusterip_tg_init); diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 5234f4f3499a..915fc17d7ce2 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> +#include <linux/if_arp.h> #include <linux/ip.h> #include <net/icmp.h> #include <net/udp.h> @@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info, /* maxlen = 230+ 91 + 230 + 252 = 803 */ } +static void dump_mac_header(const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & IPT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + printk("MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + printk("%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + printk(":%02x", *p); + } + printk(" "); +} + static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf, } #endif - if (in && !out) { - /* MAC logging for input chain only. */ - printk("MAC="); - if (skb->dev && skb->dev->hard_header_len && - skb->mac_header != skb->network_header) { - int i; - const unsigned char *p = skb_mac_header(skb); - for (i = 0; i < skb->dev->hard_header_len; i++,p++) - printk("%02x%c", *p, - i==skb->dev->hard_header_len - 1 - ? ' ':':'); - } else - printk(" "); - } + /* MAC logging for input path only. */ + if (in && !out) + dump_mac_header(loginfo, skb); dump_packet(loginfo, skb, 0); printk("\n"); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index f43867d1697f..6cdb298f1035 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); + par->hooknum == NF_INET_LOCAL_OUT || + par->hooknum == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); @@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = { .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT), + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg_check, .me = THIS_MODULE }; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f5f4a888e4ec..b254dafaf429 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) } tcph->rst = 1; - tcph->check = tcp_v4_check(sizeof(struct tcphdr), - niph->saddr, niph->daddr, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); addr_type = RTN_UNSPEC; if (hook != NF_INET_FORWARD @@ -109,13 +110,12 @@ static void send_reset(struct sk_buff *oldskb, int hook) addr_type = RTN_LOCAL; /* ip_route_me_harder expects skb->dst to be set */ - skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); + skb_dst_set_noref(nskb, skb_dst(oldskb)); if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); - nskb->ip_summed = CHECKSUM_NONE; /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index cb763ae9ed90..eab8de32f200 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct inet_sock *inet = inet_sk(skb->sk); + + if (inet && inet->nodefrag) + return NF_ACCEPT; + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 4f8bddb760c9..8c8632d9b93c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, rcu_read_lock(); proto = __nf_nat_proto_find(orig_tuple->dst.protonum); - /* Change protocol info to have some randomization */ - if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { - proto->unique_tuple(tuple, range, maniptype, ct); - goto out; - } - /* Only bother mapping if it's not already in range and unique */ - if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && + (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) && !nf_nat_used_tuple(tuple, ct)) goto out; @@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)skb->data + hdrlen; /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ @@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - if (!nf_ct_get_tuple(skb, - ip_hdrlen(skb) + sizeof(struct icmphdr), - (ip_hdrlen(skb) + + if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), + (hdrlen + sizeof(struct icmphdr) + inside->ip.ihl * 4), - (u_int16_t)AF_INET, - inside->ip.protocol, + (u_int16_t)AF_INET, inside->ip.protocol, &inner, l3proto, l4proto)) return 0; @@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, pass all hooks (locally-generated ICMP). Consider incoming packet: PREROUTING (DST manip), routing produces ICMP, goes through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, skb, - ip_hdrlen(skb) + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, - !manip)) + if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), + &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)skb->data + hdrlen; inside->icmp.checksum = 0; inside->icmp.checksum = csum_fold(skb_checksum(skb, hdrlen, @@ -742,7 +733,7 @@ static int __init nf_nat_init(void) spin_unlock_bh(&nf_nat_lock); /* Initialize fake conntrack so that NAT will skip it */ - nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; + nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 6c4f11f51446..3e61faf23a9a 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); -bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, +void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, @@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ if (maniptype == IP_NAT_MANIP_DST) - return false; + return; if (ntohs(*portptr) < 1024) { /* Loose convention: >> 512 is credential passing */ @@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, else off = *rover; - for (i = 0; i < range_size; i++, off++) { + for (i = 0; ; ++off) { *portptr = htons(min + off % range_size); - if (nf_nat_used_tuple(tuple, ct)) + if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) *rover = off; - return true; + return; } - return false; + return; } EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 22485ce306d4..570faf2667b2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c @@ -22,14 +22,14 @@ static u_int16_t dccp_port_rover; -static bool +static void dccp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &dccp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &dccp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d7e89201351e..bc8d83a31c73 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ -static bool +static void gre_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* If there is no master conntrack we are not PPTP, do not change tuples */ if (!ct->master) - return false; + return; if (maniptype == IP_NAT_MANIP_SRC) keyptr = &tuple->src.u.gre.key; @@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, pr_debug("min = %u, range_size = %u\n", min, range_size); - for (i = 0; i < range_size; i++, key++) { + for (i = 0; ; ++key) { *keyptr = htons(min + key % range_size); - if (!nf_nat_used_tuple(tuple, ct)) - return true; + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; } pr_debug("%p: no NAT mapping\n", ct); - return false; + return; } /* manipulate a GRE packet according to maniptype */ diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 19a8b0b07d8e..5744c3ec847c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); } -static bool +static void icmp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; - for (i = 0; i < range_size; i++, id++) { + for (i = 0; ; ++id) { tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + (id % range_size)); - if (!nf_nat_used_tuple(tuple, ct)) - return true; + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; } - return false; + return; } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 3fc598eeeb1a..756331d42661 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -16,14 +16,14 @@ static u_int16_t nf_sctp_port_rover; -static bool +static void sctp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &nf_sctp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &nf_sctp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 399e2cfa263b..aa460a595d5d 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -20,14 +20,13 @@ static u_int16_t tcp_port_rover; -static bool +static void tcp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &tcp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 9e61c79492e4..dfe65c7e2925 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -19,14 +19,13 @@ static u_int16_t udp_port_rover; -static bool +static void udp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index 440a229bbd87..3cc8c8af39ef 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c @@ -18,14 +18,14 @@ static u_int16_t udplite_port_rover; -static bool +static void udplite_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udplite_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &udplite_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index 14381c62acea..a50f2bc1c732 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, return true; } -static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, +static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { /* Sorry: we can't help you; if it's not unique, we can't frob anything. */ - return false; + return; } static bool diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 98ed78281aee..ebbd319f62f5 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,7 +28,8 @@ #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_POST_ROUTING) | \ - (1 << NF_INET_LOCAL_OUT)) + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_LOCAL_IN)) static const struct xt_table nat_table = { .name = "nat", @@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) enum ip_conntrack_info ctinfo; const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); @@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) return 0; } -unsigned int +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for @@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { .target = ipt_snat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", - .hooks = 1 << NF_INET_POST_ROUTING, + .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .checkentry = ipt_snat_checkentry, .family = AF_INET, }; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index beb25819c9c9..95481fee8bdb 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum, return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); @@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - if (hooknum == NF_INET_LOCAL_IN) - /* LOCAL_IN hook doesn't have a chain! */ - ret = alloc_null_binding(ct, hooknum); - else - ret = nf_nat_rule_find(skb, hooknum, in, out, - ct); - + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3dc9914c1dce..4ae1f203f7cb 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), + SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_SENTINEL }; @@ -342,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v) IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); + BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.ip_statistics, - snmp4_ipstats_list[i].entry)); + seq_printf(seq, " %llu", + snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp4_ipstats_list[i].entry, + offsetof(struct ipstats_mib, syncp))); icmp_put(seq); /* RFC 2011 compatibility */ icmpmsg_put(seq); @@ -431,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.ip_statistics, - snmp4_ipextstats_list[i].entry)); + seq_printf(seq, " %llu", + snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp4_ipextstats_list[i].entry, + offsetof(struct ipstats_mib, syncp))); seq_putc(seq, '\n'); return 0; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 542f22fc98b3..f2d297351405 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) return ret; } +EXPORT_SYMBOL(inet_add_protocol); /* * Remove a protocol from the hash tables. @@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) return ret; } - -EXPORT_SYMBOL(inet_add_protocol); EXPORT_SYMBOL(inet_del_protocol); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c7a1639388a..009a7b2aa1ef 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, - struct rtable *rt, + struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); @@ -323,25 +323,27 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, struct sk_buff *skb; unsigned int iphlen; int err; + struct rtable *rt = *rtp; - if (length > rt->u.dst.dev->mtu) { + if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, - rt->u.dst.dev->mtu); + rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, &rt->dst); + *rtp = NULL; skb_reset_network_header(skb); iph = ip_hdr(skb); @@ -373,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } @@ -382,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb_transport_header(skb))->type); err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - rt->u.dst.dev, dst_output); + rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) @@ -576,7 +578,7 @@ back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, msg->msg_iov, len, - rt, msg->msg_flags); + &rt, msg->msg_flags); else { if (!ipc.addr) @@ -604,7 +606,7 @@ out: return len; do_confirm: - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 560acc677ce4..3f56b6e6c6aa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -253,8 +253,7 @@ static unsigned rt_hash_mask __read_mostly; static unsigned int rt_hash_log __read_mostly; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); -#define RT_CACHE_STAT_INC(field) \ - (__raw_get_cpu_var(rt_cache_stat).field++) +#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, int genid) @@ -287,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); while (r) { - if (dev_net(r->u.dst.dev) == seq_file_net(seq) && + if (dev_net(r->dst.dev) == seq_file_net(seq) && r->rt_genid == st->genid) return r; - r = rcu_dereference_bh(r->u.dst.rt_next); + r = rcu_dereference_bh(r->dst.rt_next); } rcu_read_unlock_bh(); } @@ -302,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, { struct rt_cache_iter_state *st = seq->private; - r = r->u.dst.rt_next; + r = r->dst.rt_next; while (!r) { rcu_read_unlock_bh(); do { @@ -320,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, { struct rt_cache_iter_state *st = seq->private; while ((r = __rt_cache_get_next(seq, r)) != NULL) { - if (dev_net(r->u.dst.dev) != seq_file_net(seq)) + if (dev_net(r->dst.dev) != seq_file_net(seq)) continue; if (r->rt_genid == st->genid) break; @@ -378,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", - r->u.dst.dev ? r->u.dst.dev->name : "*", + r->dst.dev ? r->dst.dev->name : "*", (__force u32)r->rt_dst, (__force u32)r->rt_gateway, - r->rt_flags, atomic_read(&r->u.dst.__refcnt), - r->u.dst.__use, 0, (__force u32)r->rt_src, - (dst_metric(&r->u.dst, RTAX_ADVMSS) ? - (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), - dst_metric(&r->u.dst, RTAX_WINDOW), - (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + - dst_metric(&r->u.dst, RTAX_RTTVAR)), + r->rt_flags, atomic_read(&r->dst.__refcnt), + r->dst.__use, 0, (__force u32)r->rt_src, + (dst_metric(&r->dst, RTAX_ADVMSS) ? + (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), + dst_metric(&r->dst, RTAX_WINDOW), + (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + + dst_metric(&r->dst, RTAX_RTTVAR)), r->fl.fl4_tos, - r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, - r->u.dst.hh ? (r->u.dst.hh->hh_output == + r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, + r->dst.hh ? (r->dst.hh->hh_output == dev_queue_xmit) : 0, r->rt_spec_dst, &len); @@ -609,13 +608,13 @@ static inline int ip_rt_proc_init(void) static inline void rt_free(struct rtable *rt) { - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline void rt_drop(struct rtable *rt) { ip_rt_put(rt); - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline int rt_fast_clean(struct rtable *rth) @@ -623,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth) /* Kill broadcast/multicast entries very aggresively, if they collide in hash table with more useful entries */ return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && - rth->fl.iif && rth->u.dst.rt_next; + rth->fl.iif && rth->dst.rt_next; } static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->u.dst.expires; + rth->dst.expires; } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -637,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t unsigned long age; int ret = 0; - if (atomic_read(&rth->u.dst.__refcnt)) + if (atomic_read(&rth->dst.__refcnt)) goto out; ret = 1; - if (rth->u.dst.expires && - time_after_eq(jiffies, rth->u.dst.expires)) + if (rth->dst.expires && + time_after_eq(jiffies, rth->dst.expires)) goto out; - age = jiffies - rth->u.dst.lastuse; + age = jiffies - rth->dst.lastuse; ret = 0; if ((age <= tmo1 && !rt_fast_clean(rth)) || (age <= tmo2 && rt_valuable(rth))) @@ -661,7 +660,7 @@ out: return ret; */ static inline u32 rt_score(struct rtable *rt) { - u32 score = jiffies - rt->u.dst.lastuse; + u32 score = jiffies - rt->dst.lastuse; score = ~score & ~(3<<30); @@ -701,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) { - return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); + return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); } static inline int rt_is_expired(struct rtable *rth) { - return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); + return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } /* @@ -735,7 +734,7 @@ static void rt_do_flush(int process_context) rth = rt_hash_table[i].chain; /* defer releasing the head of the list after spin_unlock */ - for (tail = rth; tail; tail = tail->u.dst.rt_next) + for (tail = rth; tail; tail = tail->dst.rt_next) if (!rt_is_expired(tail)) break; if (rth != tail) @@ -744,9 +743,9 @@ static void rt_do_flush(int process_context) /* call rt_free on entries after the tail requiring flush */ prev = &rt_hash_table[i].chain; for (p = *prev; p; p = next) { - next = p->u.dst.rt_next; + next = p->dst.rt_next; if (!rt_is_expired(p)) { - prev = &p->u.dst.rt_next; + prev = &p->dst.rt_next; } else { *prev = next; rt_free(p); @@ -761,7 +760,7 @@ static void rt_do_flush(int process_context) spin_unlock_bh(rt_hash_lock_addr(i)); for (; rth != tail; rth = next) { - next = rth->u.dst.rt_next; + next = rth->dst.rt_next; rt_free(rth); } } @@ -792,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) while (aux != rth) { if (compare_hash_inputs(&aux->fl, &rth->fl)) return 0; - aux = aux->u.dst.rt_next; + aux = aux->dst.rt_next; } return ONE; } @@ -832,18 +831,18 @@ static void rt_check_expire(void) length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { - prefetch(rth->u.dst.rt_next); + prefetch(rth->dst.rt_next); if (rt_is_expired(rth)) { - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); continue; } - if (rth->u.dst.expires) { + if (rth->dst.expires) { /* Entry is expired even if it is in use */ - if (time_before_eq(jiffies, rth->u.dst.expires)) { + if (time_before_eq(jiffies, rth->dst.expires)) { nofree: tmo >>= 1; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; /* * We only count entries on * a chain with equal hash inputs once @@ -859,7 +858,7 @@ nofree: goto nofree; /* Cleanup aged off entries. */ - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); } spin_unlock_bh(rt_hash_lock_addr(i)); @@ -1000,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops) if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; continue; } - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); goal--; } @@ -1069,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head) while (rth) { length += has_noalias(head, rth); - rth = rth->u.dst.rt_next; + rth = rth->dst.rt_next; } return length >> FRACT_BITS; } @@ -1091,7 +1090,7 @@ restart: candp = NULL; now = jiffies; - if (!rt_caching(dev_net(rt->u.dst.dev))) { + if (!rt_caching(dev_net(rt->dst.dev))) { /* * If we're not caching, just tell the caller we * were successful and don't touch the route. The @@ -1109,7 +1108,7 @@ restart: */ if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { - int err = arp_bind_neighbour(&rt->u.dst); + int err = arp_bind_neighbour(&rt->dst); if (err) { if (net_ratelimit()) printk(KERN_WARNING @@ -1128,19 +1127,19 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { if (rt_is_expired(rth)) { - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); continue; } if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { /* Put it first */ - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; /* * Since lookup is lockfree, the deletion * must be visible to another weakly ordered CPU before * the insertion at the start of the hash chain. */ - rcu_assign_pointer(rth->u.dst.rt_next, + rcu_assign_pointer(rth->dst.rt_next, rt_hash_table[hash].chain); /* * Since lookup is lockfree, the update writes @@ -1148,18 +1147,18 @@ restart: */ rcu_assign_pointer(rt_hash_table[hash].chain, rth); - dst_use(&rth->u.dst, now); + dst_use(&rth->dst, now); spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); if (rp) *rp = rth; else - skb_dst_set(skb, &rth->u.dst); + skb_dst_set(skb, &rth->dst); return 0; } - if (!atomic_read(&rth->u.dst.__refcnt)) { + if (!atomic_read(&rth->dst.__refcnt)) { u32 score = rt_score(rth); if (score <= min_score) { @@ -1171,7 +1170,7 @@ restart: chain_length++; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; } if (cand) { @@ -1182,17 +1181,17 @@ restart: * only 2 entries per bucket. We will see. */ if (chain_length > ip_rt_gc_elasticity) { - *candp = cand->u.dst.rt_next; + *candp = cand->dst.rt_next; rt_free(cand); } } else { if (chain_length > rt_chain_length_max && slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; if (!rt_caching(net)) { printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", - rt->u.dst.dev->name, num); + rt->dst.dev->name, num); } rt_emergency_hash_rebuild(net); spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1207,7 +1206,7 @@ restart: route or unicast forwarding path. */ if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { - int err = arp_bind_neighbour(&rt->u.dst); + int err = arp_bind_neighbour(&rt->dst); if (err) { spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1238,14 +1237,14 @@ restart: } } - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 - if (rt->u.dst.rt_next) { + if (rt->dst.rt_next) { struct rtable *trt; printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); - for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) + for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) printk(" . %pI4", &trt->rt_dst); printk("\n"); } @@ -1263,7 +1262,7 @@ skip_hashing: if (rp) *rp = rt; else - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); return 0; } @@ -1325,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) ip_select_fb_ident(iph); } +EXPORT_SYMBOL(__ip_select_ident); static void rt_del(unsigned hash, struct rtable *rt) { @@ -1335,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt) ip_rt_put(rt); while ((aux = *rthp) != NULL) { if (aux == rt || rt_is_expired(aux)) { - *rthp = aux->u.dst.rt_next; + *rthp = aux->dst.rt_next; rt_free(aux); continue; } - rthp = &aux->u.dst.rt_next; + rthp = &aux->dst.rt_next; } spin_unlock_bh(rt_hash_lock_addr(hash)); } +/* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { int i, k; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct rtable *rth, **rthp; __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; @@ -1384,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rthp=&rt_hash_table[hash].chain; - rcu_read_lock(); while ((rth = rcu_dereference(*rthp)) != NULL) { struct rtable *rt; @@ -1393,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || rt_is_expired(rth) || - !net_eq(dev_net(rth->u.dst.dev), net)) { - rthp = &rth->u.dst.rt_next; + !net_eq(dev_net(rth->dst.dev), net)) { + rthp = &rth->dst.rt_next; continue; } if (rth->rt_dst != daddr || rth->rt_src != saddr || - rth->u.dst.error || + rth->dst.error || rth->rt_gateway != old_gw || - rth->u.dst.dev != dev) + rth->dst.dev != dev) break; - dst_hold(&rth->u.dst); - rcu_read_unlock(); + dst_hold(&rth->dst); rt = dst_alloc(&ipv4_dst_ops); if (rt == NULL) { ip_rt_put(rth); - in_dev_put(in_dev); return; } /* Copy all the information. */ *rt = *rth; - rt->u.dst.__use = 1; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.child = NULL; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + rt->dst.__use = 1; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.child = NULL; + if (rt->dst.dev) + dev_hold(rt->dst.dev); if (rt->idev) in_dev_hold(rt->idev); - rt->u.dst.obsolete = -1; - rt->u.dst.lastuse = jiffies; - rt->u.dst.path = &rt->u.dst; - rt->u.dst.neighbour = NULL; - rt->u.dst.hh = NULL; + rt->dst.obsolete = -1; + rt->dst.lastuse = jiffies; + rt->dst.path = &rt->dst; + rt->dst.neighbour = NULL; + rt->dst.hh = NULL; #ifdef CONFIG_XFRM - rt->u.dst.xfrm = NULL; + rt->dst.xfrm = NULL; #endif rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; @@ -1439,23 +1437,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway = new_gw; /* Redirect received -> path was valid */ - dst_confirm(&rth->u.dst); + dst_confirm(&rth->dst); if (rt->peer) atomic_inc(&rt->peer->refcnt); - if (arp_bind_neighbour(&rt->u.dst) || - !(rt->u.dst.neighbour->nud_state & + if (arp_bind_neighbour(&rt->dst) || + !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->u.dst.neighbour) - neigh_event_send(rt->u.dst.neighbour, NULL); + if (rt->dst.neighbour) + neigh_event_send(rt->dst.neighbour, NULL); ip_rt_put(rth); rt_drop(rt); goto do_next; } - netevent.old = &rth->u.dst; - netevent.new = &rt->u.dst; + netevent.old = &rth->dst; + netevent.new = &rt->dst; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); @@ -1464,12 +1462,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, ip_rt_put(rt); goto do_next; } - rcu_read_unlock(); do_next: ; } } - in_dev_put(in_dev); return; reject_redirect: @@ -1480,7 +1476,7 @@ reject_redirect: &old_gw, dev->name, &new_gw, &saddr, &daddr); #endif - in_dev_put(in_dev); + ; } static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) @@ -1493,8 +1489,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || - (rt->u.dst.expires && - time_after_eq(jiffies, rt->u.dst.expires))) { + (rt->dst.expires && + time_after_eq(jiffies, rt->dst.expires))) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, rt->fl.oif, rt_genid(dev_net(dst->dev))); @@ -1532,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) int log_martians; rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->dst.dev); if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { rcu_read_unlock(); return; @@ -1543,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) - rt->u.dst.rate_tokens = 0; + if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) + rt->dst.rate_tokens = 0; /* Too many ignored redirects; do not send anything - * set u.dst.rate_last to the last seen redirected packet. + * set dst.rate_last to the last seen redirected packet. */ - if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { - rt->u.dst.rate_last = jiffies; + if (rt->dst.rate_tokens >= ip_rt_redirect_number) { + rt->dst.rate_last = jiffies; return; } /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (rt->u.dst.rate_tokens == 0 || + if (rt->dst.rate_tokens == 0 || time_after(jiffies, - (rt->u.dst.rate_last + - (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { + (rt->dst.rate_last + + (ip_rt_redirect_load << rt->dst.rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); - rt->u.dst.rate_last = jiffies; - ++rt->u.dst.rate_tokens; + rt->dst.rate_last = jiffies; + ++rt->dst.rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - rt->u.dst.rate_tokens == ip_rt_redirect_number && + rt->dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", &rt->rt_src, rt->rt_iif, @@ -1581,7 +1577,7 @@ static int ip_error(struct sk_buff *skb) unsigned long now; int code; - switch (rt->u.dst.error) { + switch (rt->dst.error) { case EINVAL: default: goto out; @@ -1590,7 +1586,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(dev_net(rt->u.dst.dev), + IP_INC_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INNOROUTES); break; case EACCES: @@ -1599,12 +1595,12 @@ static int ip_error(struct sk_buff *skb) } now = jiffies; - rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; - if (rt->u.dst.rate_tokens > ip_rt_error_burst) - rt->u.dst.rate_tokens = ip_rt_error_burst; - rt->u.dst.rate_last = now; - if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { - rt->u.dst.rate_tokens -= ip_rt_error_cost; + rt->dst.rate_tokens += now - rt->dst.rate_last; + if (rt->dst.rate_tokens > ip_rt_error_burst) + rt->dst.rate_tokens = ip_rt_error_burst; + rt->dst.rate_last = now; + if (rt->dst.rate_tokens >= ip_rt_error_cost) { + rt->dst.rate_tokens -= ip_rt_error_cost; icmp_send(skb, ICMP_DEST_UNREACH, code, 0); } @@ -1649,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + rth = rcu_dereference(rth->dst.rt_next)) { unsigned short mtu = new_mtu; if (rth->fl.fl4_dst != daddr || @@ -1658,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->rt_src != iph->saddr || rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || - dst_metric_locked(&rth->u.dst, RTAX_MTU) || - !net_eq(dev_net(rth->u.dst.dev), net) || + dst_metric_locked(&rth->dst, RTAX_MTU) || + !net_eq(dev_net(rth->dst.dev), net) || rt_is_expired(rth)) continue; @@ -1667,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= dst_mtu(&rth->u.dst) && + old_mtu >= dst_mtu(&rth->dst) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= dst_mtu(&rth->u.dst)) { - if (mtu < dst_mtu(&rth->u.dst)) { - dst_confirm(&rth->u.dst); + if (mtu <= dst_mtu(&rth->dst)) { + if (mtu < dst_mtu(&rth->dst)) { + dst_confirm(&rth->dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; - rth->u.dst.metrics[RTAX_LOCK-1] |= + rth->dst.metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); } - rth->u.dst.metrics[RTAX_MTU-1] = mtu; - dst_set_expires(&rth->u.dst, + rth->dst.metrics[RTAX_MTU-1] = mtu; + dst_set_expires(&rth->dst, ip_rt_mtu_expires); } est_mtu = mtu; @@ -1755,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb) rt = skb_rtable(skb); if (rt) - dst_set_expires(&rt->u.dst, 0); + dst_set_expires(&rt->dst, 0); } static int ip_rt_bug(struct sk_buff *skb) @@ -1783,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt->fl.iif == 0) src = rt->rt_src; - else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { + else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { src = FIB_RES_PREFSRC(res); fib_res_put(&res); } else - src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, + src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); memcpy(addr, &src, 4); } @@ -1795,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) #ifdef CONFIG_NET_CLS_ROUTE static void set_class_tag(struct rtable *rt, u32 tag) { - if (!(rt->u.dst.tclassid & 0xFFFF)) - rt->u.dst.tclassid |= tag & 0xFFFF; - if (!(rt->u.dst.tclassid & 0xFFFF0000)) - rt->u.dst.tclassid |= tag & 0xFFFF0000; + if (!(rt->dst.tclassid & 0xFFFF)) + rt->dst.tclassid |= tag & 0xFFFF; + if (!(rt->dst.tclassid & 0xFFFF0000)) + rt->dst.tclassid |= tag & 0xFFFF0000; } #endif @@ -1810,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - memcpy(rt->u.dst.metrics, fi->fib_metrics, - sizeof(rt->u.dst.metrics)); + memcpy(rt->dst.metrics, fi->fib_metrics, + sizeof(rt->dst.metrics)); if (fi->fib_mtu == 0) { - rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && + rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + if (dst_metric_locked(&rt->dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && - rt->u.dst.dev->mtu > 576) - rt->u.dst.metrics[RTAX_MTU-1] = 576; + rt->dst.dev->mtu > 576) + rt->dst.metrics[RTAX_MTU-1] = 576; } #ifdef CONFIG_NET_CLS_ROUTE - rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; #endif } else - rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; - - if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) - rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) - rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, + rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; + + if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; + if (dst_mtu(&rt->dst) > IP_MAX_MTU) + rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; + if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) + rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, ip_rt_min_advmss); - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) - rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; + if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) + rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; #ifdef CONFIG_NET_CLS_ROUTE #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1844,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->rt_type = res->type; } +/* called in rcu_read_lock() section */ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { - unsigned hash; + unsigned int hash; struct rtable *rth; __be32 spec_dst; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; + int err; /* Primary sanity checks. */ @@ -1866,21 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!ipv4_is_local_multicast(daddr)) goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); - } else if (fib_validate_source(saddr, 0, tos, 0, - dev, &spec_dst, &itag, 0) < 0) - goto e_inval; - + } else { + err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, + &itag, 0); + if (err < 0) + goto e_err; + } rth = dst_alloc(&ipv4_dst_ops); if (!rth) goto e_nobufs; - rth->u.dst.output = ip_rt_bug; - rth->u.dst.obsolete = -1; + rth->dst.output = ip_rt_bug; + rth->dst.obsolete = -1; - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1888,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE - rth->u.dst.tclassid = itag; + rth->dst.tclassid = itag; #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = init_net.loopback_dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = init_net.loopback_dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; @@ -1902,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; if (our) { - rth->u.dst.input= ip_local_deliver; + rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; } #ifdef CONFIG_IP_MROUTE if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) - rth->u.dst.input = ip_mr_input; + rth->dst.input = ip_mr_input; #endif RT_CACHE_STAT_INC(in_slow_mc); - in_dev_put(in_dev); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); e_nobufs: - in_dev_put(in_dev); return -ENOBUFS; - e_inval: - in_dev_put(in_dev); return -EINVAL; +e_err: + return err; } @@ -1956,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +/* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { - struct rtable *rth; int err; struct in_device *out_dev; - unsigned flags = 0; + unsigned int flags = 0; __be32 spec_dst; u32 itag; /* get a working reference to the output device */ - out_dev = in_dev_get(FIB_RES_DEV(*res)); + out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); if (out_dev == NULL) { if (net_ratelimit()) printk(KERN_CRIT "Bug in ip_route_input" \ @@ -1986,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb, ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); - err = -EINVAL; goto cleanup; } @@ -2020,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) - rth->u.dst.flags |= DST_NOXFRM; + rth->dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2035,16 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_iif = rth->fl.iif = in_dev->dev->ifindex; - rth->u.dst.dev = (out_dev)->dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = (out_dev)->dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; - rth->u.dst.obsolete = -1; - rth->u.dst.input = ip_forward; - rth->u.dst.output = ip_output; - rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); + rth->dst.obsolete = -1; + rth->dst.input = ip_forward; + rth->dst.output = ip_output; + rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rt_set_nexthop(rth, res, itag); @@ -2053,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb, *result = rth; err = 0; cleanup: - /* release the working reference to the output device */ - in_dev_put(out_dev); return err; } @@ -2080,7 +2075,7 @@ static int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif, - rt_genid(dev_net(rth->u.dst.dev))); + rt_genid(dev_net(rth->dst.dev))); return rt_intern_hash(hash, rth, NULL, skb, fl->iif); } @@ -2098,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct fib_result res; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct flowi fl = { .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, @@ -2158,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; if (res.type == RTN_LOCAL) { - int result; - result = fib_validate_source(saddr, daddr, tos, + err = fib_validate_source(saddr, daddr, tos, net->loopback_dev->ifindex, dev, &spec_dst, &itag, skb->mark); - if (result < 0) - goto martian_source; - if (result) + if (err < 0) + goto martian_source_keep_err; + if (err) flags |= RTCF_DIRECTSRC; spec_dst = daddr; goto local_input; @@ -2177,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); done: - in_dev_put(in_dev); if (free_res) fib_res_put(&res); out: return err; @@ -2192,7 +2185,7 @@ brd_input: err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, skb->mark); if (err < 0) - goto martian_source; + goto martian_source_keep_err; if (err) flags |= RTCF_DIRECTSRC; } @@ -2205,14 +2198,14 @@ local_input: if (!rth) goto e_nobufs; - rth->u.dst.output= ip_rt_bug; - rth->u.dst.obsolete = -1; + rth->dst.output= ip_rt_bug; + rth->dst.obsolete = -1; rth->rt_genid = rt_genid(net); - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2220,20 +2213,20 @@ local_input: rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE - rth->u.dst.tclassid = itag; + rth->dst.tclassid = itag; #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = net->loopback_dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = net->loopback_dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->u.dst.input= ip_local_deliver; + rth->dst.input= ip_local_deliver; rth->rt_flags = flags|RTCF_LOCAL; if (res.type == RTN_UNREACHABLE) { - rth->u.dst.input= ip_error; - rth->u.dst.error= -err; + rth->dst.input= ip_error; + rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; @@ -2273,8 +2266,10 @@ e_nobufs: goto done; martian_source: + err = -EINVAL; +martian_source_keep_err: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); - goto e_inval; + goto done; } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2284,32 +2279,34 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned hash; int iif = dev->ifindex; struct net *net; + int res; net = dev_net(dev); + rcu_read_lock(); + if (!rt_caching(net)) goto skip_cache; tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif, rt_genid(net)); - rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + rth = rcu_dereference(rth->dst.rt_next)) { if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | (rth->fl.iif ^ iif) | rth->fl.oif | (rth->fl.fl4_tos ^ tos)) == 0 && rth->fl.mark == skb->mark && - net_eq(dev_net(rth->u.dst.dev), net) && + net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { if (noref) { - dst_use_noref(&rth->u.dst, jiffies); - skb_dst_set_noref(skb, &rth->u.dst); + dst_use_noref(&rth->dst, jiffies); + skb_dst_set_noref(skb, &rth->dst); } else { - dst_use(&rth->u.dst, jiffies); - skb_dst_set(skb, &rth->u.dst); + dst_use(&rth->dst, jiffies); + skb_dst_set(skb, &rth->dst); } RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2317,7 +2314,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, } RT_CACHE_STAT_INC(in_hlist_search); } - rcu_read_unlock(); skip_cache: /* Multicast recognition logic is moved from route cache to here. @@ -2332,12 +2328,11 @@ skip_cache: route cache entry is created eventually. */ if (ipv4_is_multicast(daddr)) { - struct in_device *in_dev; + struct in_device *in_dev = __in_dev_get_rcu(dev); - rcu_read_lock(); - if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { + if (in_dev) { int our = ip_check_mc(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE || @@ -2345,15 +2340,18 @@ skip_cache: IN_DEV_MFORWARD(in_dev)) #endif ) { + int res = ip_route_input_mc(skb, daddr, saddr, + tos, dev, our); rcu_read_unlock(); - return ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); + return res; } } rcu_read_unlock(); return -EINVAL; } - return ip_route_input_slow(skb, daddr, saddr, tos, dev); + res = ip_route_input_slow(skb, daddr, saddr, tos, dev); + rcu_read_unlock(); + return res; } EXPORT_SYMBOL(ip_route_input_common); @@ -2415,12 +2413,12 @@ static int __mkroute_output(struct rtable **result, goto cleanup; } - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOXFRM)) - rth->u.dst.flags |= DST_NOXFRM; + rth->dst.flags |= DST_NOXFRM; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; @@ -2432,35 +2430,35 @@ static int __mkroute_output(struct rtable **result, rth->rt_iif = oldflp->oif ? : dev_out->ifindex; /* get references to the devices that are to be hold by the routing cache entry */ - rth->u.dst.dev = dev_out; + rth->dst.dev = dev_out; dev_hold(dev_out); rth->idev = in_dev_get(dev_out); rth->rt_gateway = fl->fl4_dst; rth->rt_spec_dst= fl->fl4_src; - rth->u.dst.output=ip_output; - rth->u.dst.obsolete = -1; + rth->dst.output=ip_output; + rth->dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); if (flags & RTCF_LOCAL) { - rth->u.dst.input = ip_local_deliver; + rth->dst.input = ip_local_deliver; rth->rt_spec_dst = fl->fl4_dst; } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { rth->rt_spec_dst = fl->fl4_src; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { - rth->u.dst.output = ip_mc_output; + rth->dst.output = ip_mc_output; RT_CACHE_STAT_INC(out_slow_mc); } #ifdef CONFIG_IP_MROUTE if (res->type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && !ipv4_is_local_multicast(oldflp->fl4_dst)) { - rth->u.dst.input = ip_mr_input; - rth->u.dst.output = ip_mc_output; + rth->dst.input = ip_mr_input; + rth->dst.output = ip_mc_output; } } #endif @@ -2715,7 +2713,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; - rth = rcu_dereference_bh(rth->u.dst.rt_next)) { + rth = rcu_dereference_bh(rth->dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && @@ -2723,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && - net_eq(dev_net(rth->u.dst.dev), net) && + net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - dst_use(&rth->u.dst, jiffies); + dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); *rp = rth; @@ -2738,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, slow_output: return ip_route_output_slow(net, rp, flp); } - EXPORT_SYMBOL_GPL(__ip_route_output_key); static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -2762,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_alloc(&ipv4_dst_blackhole_ops); if (rt) { - struct dst_entry *new = &rt->u.dst; + struct dst_entry *new = &rt->dst; atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); + memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); - new->dev = ort->u.dst.dev; + new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); @@ -2794,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_free(new); } - dst_release(&(*rp)->u.dst); + dst_release(&(*rp)->dst); *rp = rt; return (rt ? 0 : -ENOMEM); } @@ -2822,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, return 0; } - EXPORT_SYMBOL_GPL(ip_route_output_flow); int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) { return ip_route_output_flow(net, rp, flp, NULL, 0); } +EXPORT_SYMBOL(ip_route_output_key); static int rt_fill_info(struct net *net, struct sk_buff *skb, u32 pid, u32 seq, int event, @@ -2864,11 +2861,11 @@ static int rt_fill_info(struct net *net, r->rtm_src_len = 32; NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); } - if (rt->u.dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); + if (rt->dst.dev) + NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); #ifdef CONFIG_NET_CLS_ROUTE - if (rt->u.dst.tclassid) - NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); + if (rt->dst.tclassid) + NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); #endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); @@ -2878,12 +2875,16 @@ static int rt_fill_info(struct net *net, if (rt->rt_dst != rt->rt_gateway) NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto nla_put_failure; - error = rt->u.dst.error; - expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; + if (rt->fl.mark) + NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); + + error = rt->dst.error; + expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; if (rt->peer) { + inet_peer_refcheck(rt->peer); id = atomic_read(&rt->peer->ip_id_count) & 0xffff; if (rt->peer->tcp_ts_stamp) { ts = rt->peer->tcp_ts; @@ -2914,7 +2915,7 @@ static int rt_fill_info(struct net *net, NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, + if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, expires, error) < 0) goto nla_put_failure; @@ -2935,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void __be32 src = 0; u32 iif; int err; + int mark; struct sk_buff *skb; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); @@ -2962,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; + mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; if (iif) { struct net_device *dev; @@ -2974,13 +2977,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb->protocol = htons(ETH_P_IP); skb->dev = dev; + skb->mark = mark; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); rt = skb_rtable(skb); - if (err == 0 && rt->u.dst.error) - err = -rt->u.dst.error; + if (err == 0 && rt->dst.error) + err = -rt->dst.error; } else { struct flowi fl = { .nl_u = { @@ -2991,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void }, }, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .mark = mark, }; err = ip_route_output_key(net, &rt, &fl); } @@ -2998,7 +3003,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err) goto errout_free; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -3034,12 +3039,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; rcu_read_lock_bh(); for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { - if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) + rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { + if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) continue; if (rt_is_expired(rt)) continue; - skb_dst_set_noref(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { @@ -3365,6 +3370,3 @@ void __init ip_static_sysctl_init(void) register_sysctl_paths(ipv4_path, ipv4_skeleton); } #endif - -EXPORT_SYMBOL(__ip_select_ident); -EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9f6b22206c52..650cace2180d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -18,8 +18,8 @@ #include <net/tcp.h> #include <net/route.h> -/* Timestamps: lowest 9 bits store TCP options */ -#define TSBITS 9 +/* Timestamps: lowest bits store TCP options */ +#define TSBITS 6 #define TSMASK (((__u32)1 << TSBITS) - 1) extern int sysctl_tcp_syncookies; @@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, /* * when syncookies are in effect and tcp timestamps are enabled we encode - * tcp options in the lowest 9 bits of the timestamp value that will be + * tcp options in the lower bits of the timestamp value that will be * sent in the syn-ack. * Since subsequent timestamps use the normal tcp_time_stamp value, we * must make sure that the resulting initial timestamp is <= tcp_time_stamp. @@ -70,11 +70,10 @@ __u32 cookie_init_timestamp(struct request_sock *req) u32 options = 0; ireq = inet_rsk(req); - if (ireq->wscale_ok) { - options = ireq->snd_wscale; - options |= ireq->rcv_wscale << 4; - } - options |= ireq->sack_ok << 8; + + options = ireq->wscale_ok ? ireq->snd_wscale : 0xf; + options |= ireq->sack_ok << 4; + options |= ireq->ecn_ok << 5; ts = ts_now & ~TSMASK; ts |= options; @@ -138,23 +137,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, } /* - * This table has to be sorted and terminated with (__u16)-1. - * XXX generate a better table. - * Unresolved Issues: HIPPI with a 64k MSS is not well supported. + * MSS Values are taken from the 2009 paper + * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson: + * - values 1440 to 1460 accounted for 80% of observed mss values + * - values outside the 536-1460 range are rare (<0.2%). + * + * Table must be sorted. */ static __u16 const msstab[] = { - 64 - 1, - 256 - 1, - 512 - 1, - 536 - 1, - 1024 - 1, - 1440 - 1, - 1460 - 1, - 4312 - 1, - (__u16)-1 + 64, + 512, + 536, + 1024, + 1440, + 1460, + 4312, + 8960, }; -/* The number doesn't include the -1 terminator */ -#define NUM_MSS (ARRAY_SIZE(msstab) - 1) /* * Generate a syncookie. mssp points to the mss, which is returned @@ -169,10 +168,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) tcp_synq_overflow(sk); - /* XXX sort msstab[] by probability? Binary search? */ - for (mssind = 0; mss > msstab[mssind + 1]; mssind++) - ; - *mssp = msstab[mssind] + 1; + for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) + if (mss >= msstab[mssind]) + break; + *mssp = msstab[mssind]; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); @@ -202,7 +201,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) jiffies / (HZ * 60), COUNTER_TRIES); - return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; + return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, @@ -227,26 +226,38 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, * additional tcp options in the timestamp. * This extracts these options from the timestamp echo. * - * The lowest 4 bits are for snd_wscale - * The next 4 lsb are for rcv_wscale - * The next lsb is for sack_ok + * The lowest 4 bits store snd_wscale. + * next 2 bits indicate SACK and ECN support. + * + * return false if we decode an option that should not be. */ -void cookie_check_timestamp(struct tcp_options_received *tcp_opt) +bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) { - /* echoed timestamp, 9 lowest bits contain options */ + /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr & TSMASK; - tcp_opt->snd_wscale = options & 0xf; - options >>= 4; - tcp_opt->rcv_wscale = options & 0xf; + if (!tcp_opt->saw_tstamp) { + tcp_clear_options(tcp_opt); + return true; + } + + if (!sysctl_tcp_timestamps) + return false; tcp_opt->sack_ok = (options >> 4) & 0x1; + *ecn_ok = (options >> 5) & 1; + if (*ecn_ok && !sysctl_tcp_ecn) + return false; + + if (tcp_opt->sack_ok && !sysctl_tcp_sack) + return false; - if (tcp_opt->sack_ok) - tcp_sack_reset(tcp_opt); + if ((options & 0xf) == 0xf) + return true; /* no window scaling */ - if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) - tcp_opt->wscale_ok = 1; + tcp_opt->wscale_ok = 1; + tcp_opt->snd_wscale = options & 0xf; + return sysctl_tcp_window_scaling != 0; } EXPORT_SYMBOL(cookie_check_timestamp); @@ -265,8 +276,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; + bool ecn_ok; - if (!sysctl_tcp_syncookies || !th->ack) + if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk) || @@ -281,8 +293,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0); - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); + if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + goto out; ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ @@ -298,9 +310,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->ecn_ok = 0; + ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; @@ -354,15 +365,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } /* Try to redo what tcp_v4_send_synack did. */ - req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); + req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, &rcv_wscale, - dst_metric(&rt->u.dst, RTAX_INITRWND)); + dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ret = get_cookie_sock(sk, skb, req, &rt->u.dst); + ret = get_cookie_sock(sk, skb, req, &rt->dst); out: return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6596b4feeddc..176e11aaea77 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -315,7 +315,6 @@ struct tcp_splice_state { * is strict, actions are advisory and have some latency. */ int tcp_memory_pressure __read_mostly; - EXPORT_SYMBOL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) @@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk) tcp_memory_pressure = 1; } } - EXPORT_SYMBOL(tcp_enter_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ @@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } return mask; } +EXPORT_SYMBOL(tcp_poll); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { @@ -508,10 +507,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(answ, (int __user *)arg); } +EXPORT_SYMBOL(tcp_ioctl); static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } @@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; - tcb->flags = TCPCB_FLAG_ACK; + tcb->flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); @@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ssize_t spliced; int ret; + sock_rps_record_flow(sk); /* * We can't seek on a socket input */ @@ -675,6 +676,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } +EXPORT_SYMBOL(tcp_splice_read); struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) { @@ -815,7 +817,7 @@ new_segment: skb_shinfo(skb)->gso_segs = 0; if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; copied += copy; poffset += copy; @@ -856,15 +858,15 @@ out_err: return sk_stream_error(sk, flags, err); } -ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { ssize_t res; - struct sock *sk = sock->sk; if (!(sk->sk_route_caps & NETIF_F_SG) || !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) - return sock_no_sendpage(sock, page, offset, size, flags); + return sock_no_sendpage(sk->sk_socket, page, offset, size, + flags); lock_sock(sk); TCP_CHECK_TIMER(sk); @@ -873,6 +875,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, release_sock(sk); return res; } +EXPORT_SYMBOL(tcp_sendpage); #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) @@ -897,10 +900,9 @@ static inline int select_size(struct sock *sk, int sg) return tmp; } -int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, +int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - struct sock *sk = sock->sk; struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1061,7 +1063,7 @@ new_segment: } if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -1121,6 +1123,7 @@ out_err: release_sock(sk); return err; } +EXPORT_SYMBOL(tcp_sendmsg); /* * Handle reading urgent data. BSD has very simple semantics for @@ -1380,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_cleanup_rbuf(sk, copied); return copied; } +EXPORT_SYMBOL(tcp_read_sock); /* * This routine copies from a sock struct into the user buffer. @@ -1774,6 +1778,7 @@ recv_urg: err = tcp_recv_urg(sk, msg, len, flags); goto out; } +EXPORT_SYMBOL(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { @@ -1866,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how) tcp_send_fin(sk); } } +EXPORT_SYMBOL(tcp_shutdown); void tcp_close(struct sock *sk, long timeout) { @@ -1898,6 +1904,10 @@ void tcp_close(struct sock *sk, long timeout) sk_mem_reclaim(sk); + /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ + if (sk->sk_state == TCP_CLOSE) + goto adjudge_to_death; + /* As outlined in RFC 2525, section 2.17, we send a RST here because * data was lost. To witness the awful effects of the old behavior of * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk @@ -2025,6 +2035,7 @@ out: local_bh_enable(); sock_put(sk); } +EXPORT_SYMBOL(tcp_close); /* These states need RST on ABORT according to RFC793 */ @@ -2098,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_error_report(sk); return err; } +EXPORT_SYMBOL(tcp_disconnect); /* * Socket option code for TCP. @@ -2175,6 +2187,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, GFP_KERNEL); if (cvp == NULL) return -ENOMEM; + + kref_init(&cvp->kref); } lock_sock(sk); tp->rx_opt.cookie_in_always = @@ -2189,12 +2203,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, */ kref_put(&tp->cookie_values->kref, tcp_cookie_values_release); - kref_init(&cvp->kref); - tp->cookie_values = cvp; } else { cvp = tp->cookie_values; } } + if (cvp != NULL) { cvp->cookie_desired = ctd.tcpct_cookie_desired; @@ -2208,6 +2221,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, cvp->s_data_desired = ctd.tcpct_s_data_desired; cvp->s_data_constant = 0; /* false */ } + + tp->cookie_values = cvp; } release_sock(sk); return err; @@ -2396,6 +2411,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } +EXPORT_SYMBOL(tcp_setsockopt); #ifdef CONFIG_COMPAT int compat_tcp_setsockopt(struct sock *sk, int level, int optname, @@ -2406,7 +2422,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(compat_tcp_setsockopt); #endif @@ -2472,7 +2487,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; } - EXPORT_SYMBOL_GPL(tcp_get_info); static int do_tcp_getsockopt(struct sock *sk, int level, @@ -2590,6 +2604,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; } + case TCP_THIN_LINEAR_TIMEOUTS: + val = tp->thin_lto; + break; + case TCP_THIN_DUPACK: + val = tp->thin_dupack; + break; default: return -ENOPROTOOPT; } @@ -2611,6 +2631,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } +EXPORT_SYMBOL(tcp_getsockopt); #ifdef CONFIG_COMPAT int compat_tcp_getsockopt(struct sock *sk, int level, int optname, @@ -2621,7 +2642,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(compat_tcp_getsockopt); #endif @@ -2858,7 +2878,6 @@ void tcp_free_md5sig_pool(void) if (pool) __tcp_free_md5sig_pool(pool); } - EXPORT_SYMBOL(tcp_free_md5sig_pool); static struct tcp_md5sig_pool * __percpu * @@ -2934,7 +2953,6 @@ retry: } return pool; } - EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2958,7 +2976,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *per_cpu_ptr(p, smp_processor_id()); + return *this_cpu_ptr(p); local_bh_enable(); return NULL; @@ -2986,7 +3004,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, th->check = old_checksum; return err; } - EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, @@ -2999,6 +3016,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, const unsigned head_data_len = skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); + struct sk_buff *frag_iter; sg_init_table(&sg, 1); @@ -3013,9 +3031,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; } + skb_walk_frags(skb, frag_iter) + if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) + return 1; + return 0; } - EXPORT_SYMBOL(tcp_md5_hash_skb_data); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) @@ -3025,7 +3046,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) sg_init_one(&sg, key->key, key->keylen); return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); } - EXPORT_SYMBOL(tcp_md5_hash_key); #endif @@ -3297,16 +3317,3 @@ void __init tcp_init(void) tcp_secret_retiring = &tcp_secret_two; tcp_secret_secondary = &tcp_secret_two; } - -EXPORT_SYMBOL(tcp_close); -EXPORT_SYMBOL(tcp_disconnect); -EXPORT_SYMBOL(tcp_getsockopt); -EXPORT_SYMBOL(tcp_ioctl); -EXPORT_SYMBOL(tcp_poll); -EXPORT_SYMBOL(tcp_read_sock); -EXPORT_SYMBOL(tcp_recvmsg); -EXPORT_SYMBOL(tcp_sendmsg); -EXPORT_SYMBOL(tcp_splice_read); -EXPORT_SYMBOL(tcp_sendpage); -EXPORT_SYMBOL(tcp_setsockopt); -EXPORT_SYMBOL(tcp_shutdown); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 548d575e6cc6..3c426cb318e7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; +EXPORT_SYMBOL(sysctl_tcp_reordering); int sysctl_tcp_ecn __read_mostly = 2; +EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 2; +EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; @@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) inet_csk(sk)->icsk_ack.rcv_mss = hint; } +EXPORT_SYMBOL(tcp_initialize_rcv_mss); /* Receiver "autotuning" code. * @@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk) } tcp_xmit_retransmit_queue(sk); } +EXPORT_SYMBOL(tcp_simple_retransmit); /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, @@ -3286,7 +3291,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if (!(scb->flags & TCPCB_FLAG_SYN)) { + if (!(scb->flags & TCPHDR_SYN)) { flag |= FLAG_DATA_ACKED; } else { flag |= FLAG_SYN_ACKED; @@ -3858,6 +3863,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } } } +EXPORT_SYMBOL(tcp_parse_options); static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) { @@ -3931,6 +3937,7 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th) } return NULL; } +EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif static inline void tcp_store_ts_recent(struct tcp_sock *tp) @@ -5432,6 +5439,7 @@ discard: __kfree_skb(skb); return 0; } +EXPORT_SYMBOL(tcp_rcv_established); static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) @@ -5931,14 +5939,4 @@ discard: } return 0; } - -EXPORT_SYMBOL(sysctl_tcp_ecn); -EXPORT_SYMBOL(sysctl_tcp_reordering); -EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); -EXPORT_SYMBOL(tcp_parse_options); -#ifdef CONFIG_TCP_MD5SIG -EXPORT_SYMBOL(tcp_parse_md5sig_option); -#endif -EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); -EXPORT_SYMBOL(tcp_initialize_rcv_mss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe193e53af44..020766292bb0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,6 +84,7 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; +EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG @@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) #endif struct inet_hashinfo tcp_hashinfo; +EXPORT_SYMBOL(tcp_hashinfo); static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { @@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } - EXPORT_SYMBOL_GPL(tcp_twsk_unique); /* This will initiate an outgoing connection. */ @@ -204,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * TIME-WAIT * and initialize rx_opt.ts_recent from it, * when trying new connection. */ - if (peer != NULL && - (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; + if (peer) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { + tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; + tp->rx_opt.ts_recent = peer->tcp_ts; + } } } @@ -237,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, @@ -265,6 +268,7 @@ failure: inet->inet_dport = 0; return err; } +EXPORT_SYMBOL(tcp_v4_connect); /* * This routine does path mtu discovery as defined in RFC1191. @@ -543,6 +547,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } +EXPORT_SYMBOL(tcp_v4_send_check); int tcp_v4_gso_send_check(struct sk_buff *skb) { @@ -793,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -#ifdef CONFIG_SYN_COOKIES -static void syn_flood_warning(struct sk_buff *skb) +static void syn_flood_warning(const struct sk_buff *skb) { - static unsigned long warntime; + const char *msg; - if (time_after(jiffies, (warntime + HZ * 60))) { - warntime = jiffies; - printk(KERN_INFO - "possible SYN flooding on port %d. Sending cookies.\n", - ntohs(tcp_hdr(skb)->dest)); - } -} +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) + msg = "Sending cookies"; + else #endif + msg = "Dropping request"; + + pr_info("TCP: Possible SYN flooding on port %d. %s.\n", + ntohs(tcp_hdr(skb)->dest), msg); +} /* * Save and compile IPv4 options into the request_sock if needed. @@ -857,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, { return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); } - EXPORT_SYMBOL(tcp_v4_md5_lookup); static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, @@ -924,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } return 0; } - EXPORT_SYMBOL(tcp_v4_md5_do_add); static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, @@ -962,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) } return -ENOENT; } - EXPORT_SYMBOL(tcp_v4_md5_do_del); static void tcp_v4_clear_md5_list(struct sock *sk) @@ -1135,7 +1138,6 @@ clear_hash_noput: memset(md5_hash, 0, 16); return 1; } - EXPORT_SYMBOL(tcp_v4_md5_hash_skb); static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) @@ -1243,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { + if (net_ratelimit()) + syn_flood_warning(skb); #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; @@ -1323,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - if (!want_cookie) + if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); if (want_cookie) { -#ifdef CONFIG_SYN_COOKIES - syn_flood_warning(skb); - req->cookie_ts = tmp_opt.tstamp_ok; -#endif isn = cookie_v4_init_sequence(sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { struct inet_peer *peer = NULL; @@ -1349,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { + inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { @@ -1393,6 +1395,7 @@ drop_and_free: drop: return 0; } +EXPORT_SYMBOL(tcp_v4_conn_request); /* @@ -1478,6 +1481,7 @@ exit: dst_release(dst); return NULL; } +EXPORT_SYMBOL(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { @@ -1504,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) } #ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) + if (!th->syn) sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); #endif return sk; @@ -1607,6 +1611,7 @@ csum_err: TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; } +EXPORT_SYMBOL(tcp_v4_do_rcv); /* * From tcp_input.c @@ -1793,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk) return 0; } +EXPORT_SYMBOL(tcp_v4_remember_stamp); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { @@ -1832,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .compat_getsockopt = compat_ip_getsockopt, #endif }; +EXPORT_SYMBOL(ipv4_specific); #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { @@ -1960,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk) percpu_counter_dec(&tcp_sockets_allocated); } - EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS @@ -1978,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } +/* + * Get next listener socket follow cur. If cur is NULL, get first socket + * starting from bucket given in st->bucket; when st->bucket is zero the + * very first socket in the hash table is returned. + */ static void *listening_get_next(struct seq_file *seq, void *cur) { struct inet_connection_sock *icsk; @@ -1988,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct net *net = seq_file_net(seq); if (!sk) { - st->bucket = 0; - ilb = &tcp_hashinfo.listening_hash[0]; + ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); sk = sk_nulls_head(&ilb->head); + st->offset = 0; goto get_sk; } ilb = &tcp_hashinfo.listening_hash[st->bucket]; ++st->num; + ++st->offset; if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; @@ -2010,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } + st->offset = 0; if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: @@ -2045,6 +2058,7 @@ start_req: read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } spin_unlock_bh(&ilb->lock); + st->offset = 0; if (++st->bucket < INET_LHTABLE_SIZE) { ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); @@ -2058,7 +2072,12 @@ out: static void *listening_get_idx(struct seq_file *seq, loff_t *pos) { - void *rc = listening_get_next(seq, NULL); + struct tcp_iter_state *st = seq->private; + void *rc; + + st->bucket = 0; + st->offset = 0; + rc = listening_get_next(seq, NULL); while (rc && *pos) { rc = listening_get_next(seq, rc); @@ -2073,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st) hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } +/* + * Get first established socket starting from bucket given in st->bucket. + * If st->bucket is zero, the very first socket in the hash is returned. + */ static void *established_get_first(struct seq_file *seq) { struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); void *rc = NULL; - for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { + st->offset = 0; + for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { struct sock *sk; struct hlist_nulls_node *node; struct inet_timewait_sock *tw; @@ -2124,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) struct net *net = seq_file_net(seq); ++st->num; + ++st->offset; if (st->state == TCP_SEQ_STATE_TIME_WAIT) { tw = cur; @@ -2140,6 +2165,7 @@ get_tw: st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ + st->offset = 0; while (++st->bucket <= tcp_hashinfo.ehash_mask && empty_bucket(st)) ; @@ -2167,7 +2193,11 @@ out: static void *established_get_idx(struct seq_file *seq, loff_t pos) { - void *rc = established_get_first(seq); + struct tcp_iter_state *st = seq->private; + void *rc; + + st->bucket = 0; + rc = established_get_first(seq); while (rc && pos) { rc = established_get_next(seq, rc); @@ -2192,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) return rc; } +static void *tcp_seek_last_pos(struct seq_file *seq) +{ + struct tcp_iter_state *st = seq->private; + int offset = st->offset; + int orig_num = st->num; + void *rc = NULL; + + switch (st->state) { + case TCP_SEQ_STATE_OPENREQ: + case TCP_SEQ_STATE_LISTENING: + if (st->bucket >= INET_LHTABLE_SIZE) + break; + st->state = TCP_SEQ_STATE_LISTENING; + rc = listening_get_next(seq, NULL); + while (offset-- && rc) + rc = listening_get_next(seq, rc); + if (rc) + break; + st->bucket = 0; + /* Fallthrough */ + case TCP_SEQ_STATE_ESTABLISHED: + case TCP_SEQ_STATE_TIME_WAIT: + st->state = TCP_SEQ_STATE_ESTABLISHED; + if (st->bucket > tcp_hashinfo.ehash_mask) + break; + rc = established_get_first(seq); + while (offset-- && rc) + rc = established_get_next(seq, rc); + } + + st->num = orig_num; + + return rc; +} + static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) { struct tcp_iter_state *st = seq->private; + void *rc; + + if (*pos && *pos == st->last_pos) { + rc = tcp_seek_last_pos(seq); + if (rc) + goto out; + } + st->state = TCP_SEQ_STATE_LISTENING; st->num = 0; - return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + st->bucket = 0; + st->offset = 0; + rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + +out: + st->last_pos = *pos; + return rc; } static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct tcp_iter_state *st = seq->private; void *rc = NULL; - struct tcp_iter_state *st; if (v == SEQ_START_TOKEN) { rc = tcp_get_idx(seq, 0); goto out; } - st = seq->private; switch (st->state) { case TCP_SEQ_STATE_OPENREQ: @@ -2217,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) rc = listening_get_next(seq, v); if (!rc) { st->state = TCP_SEQ_STATE_ESTABLISHED; + st->bucket = 0; + st->offset = 0; rc = established_get_first(seq); } break; @@ -2227,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } out: ++*pos; + st->last_pos = *pos; return rc; } @@ -2265,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; + s->last_pos = 0; return 0; } @@ -2288,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) rc = -ENOMEM; return rc; } +EXPORT_SYMBOL(tcp_proc_register); void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) { proc_net_remove(net, afinfo->name); } +EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(struct sock *sk, struct request_sock *req, struct seq_file *f, int i, int uid, int *len) @@ -2516,6 +2600,8 @@ struct proto tcp_prot = { .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, .recvmsg = tcp_recvmsg, + .sendmsg = tcp_sendmsg, + .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, .hash = inet_hash, .unhash = inet_unhash, @@ -2534,11 +2620,13 @@ struct proto tcp_prot = { .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, + .no_autobind = true, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif }; +EXPORT_SYMBOL(tcp_prot); static int __net_init tcp_sk_init(struct net *net) @@ -2569,20 +2657,3 @@ void __init tcp_v4_init(void) if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } - -EXPORT_SYMBOL(ipv4_specific); -EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_prot); -EXPORT_SYMBOL(tcp_v4_conn_request); -EXPORT_SYMBOL(tcp_v4_connect); -EXPORT_SYMBOL(tcp_v4_do_rcv); -EXPORT_SYMBOL(tcp_v4_remember_stamp); -EXPORT_SYMBOL(tcp_v4_send_check); -EXPORT_SYMBOL(tcp_v4_syn_recv_sock); - -#ifdef CONFIG_PROC_FS -EXPORT_SYMBOL(tcp_proc_register); -EXPORT_SYMBOL(tcp_proc_unregister); -#endif -EXPORT_SYMBOL(sysctl_tcp_low_latency); - diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 794c2e122a41..f25b56cb85cb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = { .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, (unsigned long)&tcp_death_row), }; - EXPORT_SYMBOL_GPL(tcp_death_row); static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) @@ -262,6 +261,7 @@ kill: inet_twsk_put(tw); return TCP_TW_SUCCESS; } +EXPORT_SYMBOL(tcp_timewait_state_process); /* * Move a socket to time-wait or dead fin-wait-2 state. @@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk) tcp_free_md5sig_pool(); #endif } - EXPORT_SYMBOL_GPL(tcp_twsk_destructor); static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, @@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, } return newsk; } +EXPORT_SYMBOL(tcp_create_openreq_child); /* * Process an incoming packet for SYN_RECV sockets represented @@ -706,6 +706,7 @@ embryonic_reset: inet_csk_reqsk_queue_drop(sk, req, prev); return NULL; } +EXPORT_SYMBOL(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, @@ -737,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child, sock_put(child); return ret; } - -EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(tcp_child_process); -EXPORT_SYMBOL(tcp_create_openreq_child); -EXPORT_SYMBOL(tcp_timewait_state_process); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4ed957f201a..de3bd8458588 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss, /* Set the clamp no higher than max representable value */ (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); } +EXPORT_SYMBOL(tcp_select_initial_window); /* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return @@ -294,9 +295,9 @@ static u16 tcp_select_window(struct sock *sk) /* Packet ECN state for a SYN-ACK */ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE; } /* Packet ECN state for a SYN. */ @@ -306,7 +307,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) tp->ecn_flags = 0; if (sysctl_tcp_ecn == 1) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; + TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; } } @@ -361,7 +362,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) skb_shinfo(skb)->gso_type = 0; TCP_SKB_CB(skb)->seq = seq; - if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN)) + if (flags & (TCPHDR_SYN | TCPHDR_FIN)) seq++; TCP_SKB_CB(skb)->end_seq = seq; } @@ -820,7 +821,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) + if (unlikely(tcb->flags & TCPHDR_SYN)) tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); else tcp_options_size = tcp_established_options(sk, skb, &opts, @@ -843,7 +844,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { + if (unlikely(tcb->flags & TCPHDR_SYN)) { /* RFC1323: The window in SYN & SYN/ACK segments * is never scaled. */ @@ -866,7 +867,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } tcp_options_write((__be32 *)(th + 1), tp, &opts); - if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) + if (likely((tcb->flags & TCPHDR_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); #ifdef CONFIG_TCP_MD5SIG @@ -880,7 +881,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, icsk->icsk_af_ops->send_check(sk, skb); - if (likely(tcb->flags & TCPCB_FLAG_ACK)) + if (likely(tcb->flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) @@ -1023,7 +1024,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); + TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; @@ -1189,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; } +EXPORT_SYMBOL(tcp_mtup_init); /* This function synchronize snd mss to current pmtu/exthdr set. @@ -1232,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } +EXPORT_SYMBOL(tcp_sync_mss); /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. @@ -1328,8 +1331,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, u32 in_flight, cwnd; /* Don't be strict about the congestion window for the final FIN. */ - if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && - tcp_skb_pcount(skb) == 1) + if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) return 1; in_flight = tcp_packets_in_flight(tp); @@ -1398,7 +1400,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, * Nagle can be ignored during F-RTO too (see RFC4138). */ if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) + (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)) return 1; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) @@ -1461,7 +1463,7 @@ int tcp_may_send_now(struct sock *sk) * packet has never been sent out before (and thus is not cloned). */ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, - unsigned int mss_now) + unsigned int mss_now, gfp_t gfp) { struct sk_buff *buff; int nlen = skb->len - len; @@ -1471,7 +1473,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (skb->len != skb->data_len) return tcp_fragment(sk, skb, len, mss_now); - buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC); + buff = sk_stream_alloc_skb(sk, 0, gfp); if (unlikely(buff == NULL)) return -ENOMEM; @@ -1487,7 +1489,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); + TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->flags = flags; /* This packet was never sent out yet, so no SACK bits. */ @@ -1518,7 +1520,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) + if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) goto send_now; if (icsk->icsk_ca_state != TCP_CA_Open) @@ -1644,7 +1646,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; - TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; + TCP_SKB_CB(nskb)->flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; nskb->ip_summed = skb->ip_summed; @@ -1669,7 +1671,7 @@ static int tcp_mtu_probe(struct sock *sk) sk_wmem_free_skb(sk, skb); } else { TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & - ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); + ~(TCPHDR_FIN|TCPHDR_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -1769,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota); if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now))) + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; TCP_SKB_CB(skb)->when = tcp_time_stamp; @@ -2020,7 +2022,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!sysctl_tcp_retrans_collapse) return; - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) + if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN) return; tcp_for_write_queue_from_safe(skb, tmp, sk) { @@ -2112,7 +2114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * since it is cheap to do so and saves bytes on the network. */ if (skb->len > 0 && - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && + (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { /* Reuse, even though it does some unnecessary work */ @@ -2208,6 +2210,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) int mib_idx; int fwd_rexmitting = 0; + if (!tp->packets_out) + return; + if (!tp->lost_out) tp->retransmit_high = tp->snd_una; @@ -2301,7 +2306,7 @@ void tcp_send_fin(struct sock *sk) mss_now = tcp_current_mss(sk); if (tcp_send_head(sk) != NULL) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; + TCP_SKB_CB(skb)->flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { @@ -2318,7 +2323,7 @@ void tcp_send_fin(struct sock *sk) skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, - TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); + TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); @@ -2343,7 +2348,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), - TCPCB_FLAG_ACK | TCPCB_FLAG_RST); + TCPHDR_ACK | TCPHDR_RST); /* Send it off. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) @@ -2363,11 +2368,11 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *skb; skb = tcp_write_queue_head(sk); - if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { + if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) { printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); return -EFAULT; } - if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { + if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); if (nskb == NULL) @@ -2381,7 +2386,7 @@ int tcp_send_synack(struct sock *sk) skb = nskb; } - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; + TCP_SKB_CB(skb)->flags |= TCPHDR_ACK; TCP_ECN_send_synack(tcp_sk(sk), skb); } TCP_SKB_CB(skb)->when = tcp_time_stamp; @@ -2460,7 +2465,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, * not even correctly set) */ tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, - TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); + TCPHDR_SYN | TCPHDR_ACK); if (OPTION_COOKIE_EXTENSION & opts.options) { if (s_data_desired) { @@ -2515,6 +2520,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, return skb; } +EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) @@ -2592,7 +2598,7 @@ int tcp_connect(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tp->snd_nxt = tp->write_seq; - tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN); + tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); TCP_ECN_send_syn(sk, buff); /* Send it off. */ @@ -2617,6 +2623,7 @@ int tcp_connect(struct sock *sk) inet_csk(sk)->icsk_rto, TCP_RTO_MAX); return 0; } +EXPORT_SYMBOL(tcp_connect); /* Send out a delayed ack, the caller does the policy checking * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() @@ -2698,7 +2705,7 @@ void tcp_send_ack(struct sock *sk) /* Reserve space for headers and prepare control bits. */ skb_reserve(buff, MAX_TCP_HEADER); - tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK); + tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; @@ -2732,7 +2739,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) * end to send an ack. Don't queue or clone SKB, just * send it. */ - tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK); + tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); TCP_SKB_CB(skb)->when = tcp_time_stamp; return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } @@ -2762,13 +2769,13 @@ int tcp_write_wakeup(struct sock *sk) if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || skb->len > mss) { seg_size = min(seg_size, mss); - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; if (tcp_fragment(sk, skb, seg_size, mss)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) @@ -2821,10 +2828,3 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } - -EXPORT_SYMBOL(tcp_select_initial_window); -EXPORT_SYMBOL(tcp_connect); -EXPORT_SYMBOL(tcp_make_synack); -EXPORT_SYMBOL(tcp_simple_retransmit); -EXPORT_SYMBOL(tcp_sync_mss); -EXPORT_SYMBOL(tcp_mtup_init); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 440a5c6004f6..808bb920c9f5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk) inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); } - EXPORT_SYMBOL(tcp_init_xmit_timers); static void tcp_write_err(struct sock *sk) diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 3b3813cc80b9..59186ca7808a 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -48,7 +48,6 @@ err: return ret; } - EXPORT_SYMBOL(xfrm4_tunnel_register); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) @@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) return ret; } - EXPORT_SYMBOL(xfrm4_tunnel_deregister); static int tunnel4_rcv(struct sk_buff *skb) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eec4ff456e33..32e0bef60d0a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, !sock_flag(sk, SOCK_BROADCAST)) goto out; if (connected) - sk_dst_set(sk, dst_clone(&rt->u.dst)); + sk_dst_set(sk, dst_clone(&rt->dst)); } if (msg->msg_flags&MSG_CONFIRM) @@ -978,7 +978,7 @@ out: return err; do_confirm: - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 6610bf76369f..ab76aa928fa9 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -58,6 +58,7 @@ struct proto udplite_prot = { .compat_getsockopt = compat_udp_getsockopt, #endif }; +EXPORT_SYMBOL(udplite_prot); static struct inet_protosw udplite4_protosw = { .type = SOCK_DGRAM, @@ -127,5 +128,3 @@ out_unregister_proto: out_register_err: printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); } - -EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index ad8fbb871aa0..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); } - EXPORT_SYMBOL(xfrm4_rcv); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1705476670ef..869078d4eeb9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, fl.fl4_src = saddr->a4; err = __ip_route_output_key(net, &rt, &fl); - dst = &rt->u.dst; + dst = &rt->dst; if (err) dst = ERR_PTR(err); return dst; @@ -108,6 +108,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) u8 *xprth = skb_network_header(skb) + iph->ihl * 4; memset(fl, 0, sizeof(struct flowi)); + fl->mark = skb->mark; + if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { case IPPROTO_UDP: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1a698df5706..ab70a3fbcafa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -121,8 +121,6 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __ipv6_regen_rndid(struct inet6_dev *idev); static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); - -static int desync_factor = MAX_DESYNC_FACTOR * HZ; #endif static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); @@ -284,13 +282,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { if (snmp_mib_init((void __percpu **)idev->stats.ipv6, - sizeof(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) goto err_ip; if (snmp_mib_init((void __percpu **)idev->stats.icmpv6, - sizeof(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6_mib), + __alignof__(struct icmpv6_mib)) < 0) goto err_icmp; if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg, - sizeof(struct icmpv6msg_mib)) < 0) + sizeof(struct icmpv6msg_mib), + __alignof__(struct icmpv6msg_mib)) < 0) goto err_icmpmsg; return 0; @@ -557,7 +558,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warning("Freeing alive inet6 address %p\n", ifp); return; } - dst_release(&ifp->rt->u.dst); + dst_release(&ifp->rt->dst); call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); } @@ -823,7 +824,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) rt->rt6i_flags |= RTF_EXPIRES; } } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } out: @@ -890,7 +891,8 @@ retry: idev->cnf.temp_valid_lft); tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, - idev->cnf.temp_prefered_lft - desync_factor / HZ); + idev->cnf.temp_prefered_lft - + idev->cnf.max_desync_factor); tmp_plen = ifp->prefix_len; max_addresses = idev->cnf.max_addresses; tmp_cstamp = ifp->cstamp; @@ -1650,7 +1652,8 @@ static void ipv6_regen_rndid(unsigned long data) expires = jiffies + idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor; + idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - + idev->cnf.max_desync_factor * HZ; if (time_before(expires, jiffies)) { printk(KERN_WARNING "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", @@ -1760,7 +1763,10 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) idev = ipv6_find_idev(dev); if (!idev) - return NULL; + return ERR_PTR(-ENOBUFS); + + if (idev->cnf.disable_ipv6) + return ERR_PTR(-EACCES); /* Add default multicast route */ addrconf_add_mroute(dev); @@ -1863,7 +1869,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) dev, expires, flags); } if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* Try to figure out our local address for this prefix */ @@ -2129,8 +2135,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, if (!dev) return -ENODEV; - if ((idev = addrconf_add_dev(dev)) == NULL) - return -ENOBUFS; + idev = addrconf_add_dev(dev); + if (IS_ERR(idev)) + return PTR_ERR(idev); scope = ipv6_addr_scope(pfx); @@ -2377,7 +2384,7 @@ static void addrconf_dev_config(struct net_device *dev) } idev = addrconf_add_dev(dev); - if (idev == NULL) + if (IS_ERR(idev)) return; memset(&addr, 0, sizeof(struct in6_addr)); @@ -2468,7 +2475,7 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) ASSERT_RTNL(); idev = addrconf_add_dev(dev); - if (!idev) { + if (IS_ERR(idev)) { printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); return; } @@ -3492,8 +3499,12 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred -= tval; else preferred = 0; - if (valid != INFINITY_LIFE_TIME) - valid -= tval; + if (valid != INFINITY_LIFE_TIME) { + if (valid > tval) + valid -= tval; + else + valid = 0; + } } } else { preferred = INFINITY_LIFE_TIME; @@ -3855,12 +3866,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, memset(&stats[items], 0, pad); } +static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, + int items, int bytes, size_t syncpoff) +{ + int i; + int pad = bytes - sizeof(u64) * items; + BUG_ON(pad < 0); + + /* Use put_unaligned() because stats may not be aligned for u64. */ + put_unaligned(items, &stats[0]); + for (i = 1; i < items; i++) + put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]); + + memset(&stats[items], 0, pad); +} + static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes) { switch (attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); + __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, + IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); @@ -4093,11 +4120,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); - dst_hold(&ifp->rt->u.dst); + dst_hold(&ifp->rt->dst); if (ifp->state == INET6_IFADDR_STATE_DEAD && ip6_del_rt(ifp->rt)) - dst_free(&ifp->rt->u.dst); + dst_free(&ifp->rt->dst); break; } } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 8c4348cb1950..f0e774cea386 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -53,11 +53,7 @@ static struct ip6addrlbl_table static inline struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) { -#ifdef CONFIG_NET_NS - return lbl->lbl_net; -#else - return &init_net; -#endif + return read_pnet(&lbl->lbl_net); } /* diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e733942dafe1..56b9bf2516f4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -522,10 +522,10 @@ const struct proto_ops inet6_stream_ops = { .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ - .sendmsg = tcp_sendmsg, /* ok */ - .recvmsg = sock_common_recvmsg, /* ok */ + .sendmsg = inet_sendmsg, /* ok */ + .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, - .sendpage = tcp_sendpage, + .sendpage = inet_sendpage, .splice_read = tcp_splice_read, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, @@ -549,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = { .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet_sendmsg, /* ok */ - .recvmsg = sock_common_recvmsg, /* ok */ + .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT @@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); @@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { @@ -976,19 +971,24 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, - sizeof (struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) return -ENOMEM; if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, - sizeof (struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) goto err_udplite_mib; if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, - sizeof(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, - sizeof(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6_mib), + __alignof__(struct icmpv6_mib)) < 0) goto err_icmp_mib; if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics, - sizeof(struct icmpv6msg_mib)) < 0) + sizeof(struct icmpv6msg_mib), + __alignof__(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; return 0; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index b5b07054508a..0e5e943446f0 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) pac->acl_next = NULL; ipv6_addr_copy(&pac->acl_addr, addr); + rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); - dst_release(&rt->u.dst); + dst_release(&rt->dst); } else if (ishost) { err = -EADDRNOTAVAIL; - goto out_free_pac; + goto error; } else { /* router, no matching interface: just pick one */ - - dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags_rcu(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (dev == NULL) { err = -ENODEV; - goto out_free_pac; + goto error; } - idev = in6_dev_get(dev); + idev = __in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; else err = -EADDRNOTAVAIL; - goto out_dev_put; + goto error; } /* reset ishost, now that we have a specific device */ ishost = !idev->cnf.forwarding; - in6_dev_put(idev); pac->acl_ifindex = dev->ifindex; @@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto out_dev_put; + goto error; } err = ipv6_dev_ac_inc(dev, addr); - if (err) - goto out_dev_put; - - write_lock_bh(&ipv6_sk_ac_lock); - pac->acl_next = np->ipv6_ac_list; - np->ipv6_ac_list = pac; - write_unlock_bh(&ipv6_sk_ac_lock); - - dev_put(dev); - - return 0; + if (!err) { + write_lock_bh(&ipv6_sk_ac_lock); + pac->acl_next = np->ipv6_ac_list; + np->ipv6_ac_list = pac; + write_unlock_bh(&ipv6_sk_ac_lock); + pac = NULL; + } -out_dev_put: - dev_put(dev); -out_free_pac: - sock_kfree_s(sk, pac, sizeof(*pac)); +error: + rcu_read_unlock(); + if (pac) + sock_kfree_s(sk, pac, sizeof(*pac)); return err; } @@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(net, pac->acl_ifindex); - if (dev) { + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - dev_put(dev); - } + rcu_read_unlock(); + sock_kfree_s(sk, pac, sizeof(*pac)); return 0; } @@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk) write_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; + rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - if (dev) - dev_put(dev); - dev = dev_get_by_index(net, pac->acl_ifindex); + dev = dev_get_by_index_rcu(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } - if (dev) - dev_put(dev); + rcu_read_unlock(); } #if 0 @@ -250,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac) { if (atomic_dec_and_test(&ac->aca_refcnt)) { in6_dev_put(ac->aca_idev); - dst_release(&ac->aca_rt->u.dst); + dst_release(&ac->aca_rt->dst); kfree(ac); } } @@ -356,40 +350,39 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->u.dst); + dst_hold(&aca->aca_rt->dst); ip6_del_rt(aca->aca_rt); aca_put(aca); return 0; } +/* called with rcu_read_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) { - int ret; - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); + if (idev == NULL) return -ENODEV; - ret = __ipv6_dev_ac_dec(idev, addr); - in6_dev_put(idev); - return ret; + return __ipv6_dev_ac_dec(idev, addr); } /* * check if the interface has this anycast address + * called with rcu_read_lock() */ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) { struct inet6_dev *idev; struct ifacaddr6 *aca; - idev = in6_dev_get(dev); + idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); for (aca = idev->ac_list; aca; aca = aca->aca_next) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; read_unlock_bh(&idev->lock); - in6_dev_put(idev); return aca != NULL; } return 0; @@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, { int found = 0; - if (dev) - return ipv6_chk_acast_dev(dev, addr); rcu_read_lock(); - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { - found = 1; - break; - } + if (dev) + found = ipv6_chk_acast_dev(dev, addr); + else + for_each_netdev_rcu(net, dev) + if (ipv6_chk_acast_dev(dev, addr)) { + found = 1; + break; + } rcu_read_unlock(); return found; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 712684687c9a..7d929a22cbc2 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; struct flowi fl; struct ip6_flowlabel *flowlabel = NULL; + struct ipv6_txoptions *opt; int addr_type; int err; @@ -155,19 +156,8 @@ ipv4_connected: security_sk_classify_flow(sk, &fl); - if (flowlabel) { - if (flowlabel->opt && flowlabel->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - } else if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + opt = flowlabel ? flowlabel->opt : np->opt; + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8a659f92d17a..262f105d23b9 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -312,6 +312,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) Routing header. ********************************/ +/* called with rcu_read_lock() */ static int ipv6_rthdr_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); @@ -324,12 +325,9 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int accept_source_route = net->ipv6.devconf_all->accept_source_route; - idev = in6_dev_get(skb->dev); - if (idev) { - if (accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; - in6_dev_put(idev); - } + idev = __in6_dev_get(skb->dev); + if (idev && accept_source_route > idev->cnf.accept_source_route) + accept_source_route = idev->cnf.accept_source_route; if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -874,3 +872,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, return opt; } +/** + * fl6_update_dst - update flowi destination address with info given + * by srcrt option, if any. + * + * @fl: flowi for which fl6_dst is to be updated + * @opt: struct ipv6_txoptions in which to look for srcrt opt + * @orig: copy of original fl6_dst address if modified + * + * Returns NULL if no txoptions or no srcrt, otherwise returns orig + * and initial value of fl->fl6_dst set in orig + */ +struct in6_addr *fl6_update_dst(struct flowi *fl, + const struct ipv6_txoptions *opt, + struct in6_addr *orig) +{ + if (!opt || !opt->srcrt) + return NULL; + + ipv6_addr_copy(orig, &fl->fl6_dst); + ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); + return orig; +} + +EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8e44f8f9c188..b1108ede18e1 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, if (arg.result) return arg.result; - dst_hold(&net->ipv6.ip6_null_entry->u.dst); - return &net->ipv6.ip6_null_entry->u.dst; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, @@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, struct in6_addr saddr; if (ipv6_dev_get_saddr(net, - ip6_dst_idev(&rt->u.dst)->dev, + ip6_dst_idev(&rt->dst)->dev, &flp->fl6_dst, rt6_flags2srcprefs(flags), &saddr)) @@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } again: - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = NULL; goto out; discard_pkt: - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); out: arg->result = rt; return rt == NULL ? -EAGAIN : 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0c5e3c3b7fd5..8a1628023bd1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; struct dst_entry *dst; - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; memset(&fl, 0, sizeof(fl)); fl.proto = sk->sk_protocol; @@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb) fl.fl_ip_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 92a122b7795d..b6a585909d35 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn) static __inline__ void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) - dst_free(&rt->u.dst); + dst_free(&rt->dst); } static void fib6_link_table(struct net *net, struct fib6_table *tb) @@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) int res; struct rt6_info *rt; - for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { res = rt6_dump_route(rt, w->args); if (res < 0) { /* Frame is full, suspend walking */ @@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &fn->leaf; - for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) { + for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) { /* * Search for duplicates */ @@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (iter->rt6i_metric > rt->rt6i_metric) break; - ins = &iter->u.dst.rt6_next; + ins = &iter->dst.rt6_next; } /* Reset round-robin state, if necessary */ @@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * insert node */ - rt->u.dst.rt6_next = iter; + rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); @@ -799,7 +799,7 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - dst_free(&rt->u.dst); + dst_free(&rt->dst); } return err; @@ -810,7 +810,7 @@ out: st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); - dst_free(&rt->u.dst); + dst_free(&rt->dst); return err; #endif } @@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, RT6_TRACE("fib6_del_route\n"); /* Unlink it */ - *rtp = rt->u.dst.rt6_next; + *rtp = rt->dst.rt6_next; rt->rt6i_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, FOR_WALKERS(w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rt->u.dst.rt6_next; + w->leaf = rt->dst.rt6_next; if (w->leaf == NULL) w->state = FWS_U; } } read_unlock(&fib6_walker_lock); - rt->u.dst.rt6_next = NULL; + rt->dst.rt6_next = NULL; /* If it was last route, expunge its radix tree node */ if (fn->leaf == NULL) { @@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info **rtp; #if RT6_DEBUG >= 2 - if (rt->u.dst.obsolete>0) { + if (rt->dst.obsolete>0) { WARN_ON(fn != NULL); return -ENOENT; } @@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * Walk the leaf entries looking for ourself */ - for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) { + for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { if (*rtp == rt) { fib6_del_route(fn, rtp, info); return 0; @@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) .nl_net = c->net, }; - for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; @@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) } gc_args.more++; } else if (rt->rt6i_flags & RTF_CACHE) { - if (atomic_read(&rt->u.dst.__refcnt) == 0 && - time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) { + if (atomic_read(&rt->dst.__refcnt) == 0 && + time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 89425af0684c..d40b330c0ee6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); for (;;) { /* Prepare header of the next frame, @@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !frag) @@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - dst_release(&rt->u.dst); + dst_release(&rt->dst); return 0; } @@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - dst_release(&rt->u.dst); + dst_release(&rt->dst); return err; } @@ -785,7 +785,7 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -798,7 +798,7 @@ slow_path: */ ip6_copy_metadata(frag, skb); - skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); @@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* need source address above miyazawa*/ } - dst_hold(&rt->u.dst); - inet->cork.dst = &rt->u.dst; + dst_hold(&rt->dst); + inet->cork.dst = &rt->dst; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); + rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } inet->cork.fragsize = mtu; - if (dst_allfrag(rt->u.dst.path)) + if (dst_allfrag(rt->dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - + exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; @@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, mtu = inet->cork.fragsize; } - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); @@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } if (proto == IPPROTO_UDP && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, @@ -1270,7 +1270,7 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && - !(rt->u.dst.dev->features&NETIF_F_SG)) + !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -1281,7 +1281,7 @@ alloc_new_skb: * because we have no idea if we're the last one. */ if (datalen == length + fraggap) - alloclen += rt->u.dst.trailer_len; + alloclen += rt->dst.trailer_len; /* * We just reserve space for fragment header. @@ -1358,7 +1358,7 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; @@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8f39893d8081..0fd027f3f47e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) goto out; - skb2->dev = rt->u.dst.dev; + skb2->dev = rt->dst.dev; /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { @@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_src = eiph->saddr; fl.fl4_tos = eiph->tos; if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { + rt->dst.dev->type != ARPHRD_TUNNEL) { ip_rt_put(rt); goto out; } @@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_send(skb2, rel_type, rel_code, rel_info); if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); kfree_skb(skb2); } @@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bd43f0152c21..a7f66bc8f0b0 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -55,8 +55,6 @@ #include <asm/uaccess.h> -DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; - struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ab1622d7d409..d1444b95ad7e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -152,18 +152,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->next = NULL; ipv6_addr_copy(&mc_lst->addr, addr); + rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (dev == NULL) { + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; } @@ -180,8 +181,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_mc_inc(dev, addr); if (err) { + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - dev_put(dev); return err; } @@ -190,7 +191,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) np->ipv6_mc_list = mc_lst; write_unlock_bh(&ipv6_sk_mc_lock); - dev_put(dev); + rcu_read_unlock(); return 0; } @@ -213,18 +214,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(net, mc_lst->ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, mc_lst->ifindex); if (dev != NULL) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) { + if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - in6_dev_put(idev); - } - dev_put(dev); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return 0; } @@ -234,43 +234,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EADDRNOTAVAIL; } -static struct inet6_dev *ip6_mc_find_dev(struct net *net, - struct in6_addr *group, - int ifindex) +/* called with rcu_read_lock() */ +static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, + struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; if (ifindex == 0) { - struct rt6_info *rt; + struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); - rt = rt6_lookup(net, group, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (!dev) - goto nodev; - idev = in6_dev_get(dev); + return NULL; + idev = __in6_dev_get(dev); if (!idev) - goto release; + return NULL;; read_lock_bh(&idev->lock); - if (idev->dead) - goto unlock_release; - + if (idev->dead) { + read_unlock_bh(&idev->lock); + return NULL; + } return idev; - -unlock_release: - read_unlock_bh(&idev->lock); - in6_dev_put(idev); -release: - dev_put(dev); -nodev: - return NULL; } void ipv6_sock_mc_close(struct sock *sk) @@ -286,19 +279,17 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(net, mc_lst->ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, mc_lst->ifindex); if (dev) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) { + if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - in6_dev_put(idev); - } - dev_put(dev); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); write_lock_bh(&ipv6_sk_mc_lock); @@ -327,14 +318,17 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); - if (!idev) + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface); + if (!idev) { + rcu_read_unlock(); return -ENODEV; + } dev = idev->dev; err = -EADDRNOTAVAIL; - read_lock_bh(&ipv6_sk_mc_lock); + read_lock(&ipv6_sk_mc_lock); for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; @@ -358,7 +352,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } - write_lock_bh(&pmc->sflock); + write_lock(&pmc->sflock); pmclocked = 1; psl = pmc->sflist; @@ -433,11 +427,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, ip6_mc_add_src(idev, group, omode, 1, source, 1); done: if (pmclocked) - write_unlock_bh(&pmc->sflock); - read_unlock_bh(&ipv6_sk_mc_lock); + write_unlock(&pmc->sflock); + read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); if (leavegroup) return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -463,14 +456,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev(net, group, gsf->gf_interface); + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - if (!idev) + if (!idev) { + rcu_read_unlock(); return -ENODEV; + } dev = idev->dev; err = 0; - read_lock_bh(&ipv6_sk_mc_lock); + read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; @@ -512,7 +508,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); } - write_lock_bh(&pmc->sflock); + write_lock(&pmc->sflock); psl = pmc->sflist; if (psl) { (void) ip6_mc_del_src(idev, group, pmc->sfmode, @@ -522,13 +518,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); pmc->sflist = newpsl; pmc->sfmode = gsf->gf_fmode; - write_unlock_bh(&pmc->sflock); + write_unlock(&pmc->sflock); err = 0; done: - read_unlock_bh(&ipv6_sk_mc_lock); + read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -551,11 +546,13 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(net, group, gsf->gf_interface); + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - if (!idev) + if (!idev) { + rcu_read_unlock(); return -ENODEV; - + } dev = idev->dev; err = -EADDRNOTAVAIL; @@ -577,8 +574,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, psl = pmc->sflist; count = psl ? psl->sl_count : 0; read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; @@ -604,8 +600,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return 0; done: read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); return err; } @@ -822,6 +817,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) struct ifmcaddr6 *mc; struct inet6_dev *idev; + /* we need to take a reference on idev */ idev = in6_dev_get(dev); if (idev == NULL) @@ -860,7 +856,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); ipv6_addr_copy(&mc->mca_addr, addr); - mc->idev = idev; + mc->idev = idev; /* (reference taken) */ mc->mca_users = 1; /* mca_stamp should be updated upon changes */ mc->mca_cstamp = mc->mca_tstamp = jiffies; @@ -915,16 +911,18 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev; int err; - if (!idev) - return -ENODEV; - - err = __ipv6_dev_mc_dec(idev, addr); + rcu_read_lock(); - in6_dev_put(idev); + idev = __in6_dev_get(dev); + if (!idev) + err = -ENODEV; + else + err = __ipv6_dev_mc_dec(idev, addr); + rcu_read_unlock(); return err; } @@ -965,7 +963,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, struct ifmcaddr6 *mc; int rv = 0; - idev = in6_dev_get(dev); + rcu_read_lock(); + idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); for (mc = idev->mc_list; mc; mc=mc->next) { @@ -992,8 +991,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rv = 1; /* don't filter unspecified source */ } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } + rcu_read_unlock(); return rv; } @@ -1104,6 +1103,7 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, return 1; } +/* called with rcu_read_lock() */ int igmp6_event_query(struct sk_buff *skb) { struct mld2_query *mlh2 = NULL; @@ -1127,7 +1127,7 @@ int igmp6_event_query(struct sk_buff *skb) if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) return -EINVAL; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (idev == NULL) return 0; @@ -1137,10 +1137,8 @@ int igmp6_event_query(struct sk_buff *skb) group_type = ipv6_addr_type(group); if (group_type != IPV6_ADDR_ANY && - !(group_type&IPV6_ADDR_MULTICAST)) { - in6_dev_put(idev); + !(group_type&IPV6_ADDR_MULTICAST)) return -EINVAL; - } if (len == 24) { int switchback; @@ -1161,10 +1159,9 @@ int igmp6_event_query(struct sk_buff *skb) } else if (len >= 28) { int srcs_offset = sizeof(struct mld2_query) - sizeof(struct icmp6hdr); - if (!pskb_may_pull(skb, srcs_offset)) { - in6_dev_put(idev); + if (!pskb_may_pull(skb, srcs_offset)) return -EINVAL; - } + mlh2 = (struct mld2_query *)skb_transport_header(skb); max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; if (!max_delay) @@ -1173,28 +1170,23 @@ int igmp6_event_query(struct sk_buff *skb) if (mlh2->mld2q_qrv) idev->mc_qrv = mlh2->mld2q_qrv; if (group_type == IPV6_ADDR_ANY) { /* general query */ - if (mlh2->mld2q_nsrcs) { - in6_dev_put(idev); + if (mlh2->mld2q_nsrcs) return -EINVAL; /* no sources allowed */ - } + mld_gq_start_timer(idev); - in6_dev_put(idev); return 0; } /* mark sources to include, if group & source-specific */ if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + - ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) { - in6_dev_put(idev); + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) return -EINVAL; - } + mlh2 = (struct mld2_query *)skb_transport_header(skb); mark = 1; } - } else { - in6_dev_put(idev); + } else return -EINVAL; - } read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { @@ -1227,12 +1219,11 @@ int igmp6_event_query(struct sk_buff *skb) } } read_unlock_bh(&idev->lock); - in6_dev_put(idev); return 0; } - +/* called with rcu_read_lock() */ int igmp6_event_report(struct sk_buff *skb) { struct ifmcaddr6 *ma; @@ -1260,7 +1251,7 @@ int igmp6_event_report(struct sk_buff *skb) !(addr_type&IPV6_ADDR_LINKLOCAL)) return -EINVAL; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (idev == NULL) return -ENODEV; @@ -1280,7 +1271,6 @@ int igmp6_event_report(struct sk_buff *skb) } } read_unlock_bh(&idev->lock); - in6_dev_put(idev); return 0; } @@ -1396,12 +1386,14 @@ static void mld_sendpack(struct sk_buff *skb) struct mld2_report *pmr = (struct mld2_report *)skb_transport_header(skb); int payload_len, mldlen; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev; struct net *net = dev_net(skb->dev); int err; struct flowi fl; struct dst_entry *dst; + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); @@ -1441,8 +1433,7 @@ out: } else IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; err_out: @@ -1779,7 +1770,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - idev = in6_dev_get(skb->dev); + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); if (!dst) { @@ -1806,8 +1798,7 @@ out: } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; err_out: @@ -1998,8 +1989,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) &psf->sf_addr)) break; if (!dpsf) { - dpsf = (struct ip6_sf_list *) - kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); if (!dpsf) continue; *dpsf = *psf; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 2794b6002836..d6e9599d0705 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type = static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { + struct ipv6hdr *iph = ipv6_hdr(skb); struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; int err = rt2->rt_hdr.nexthdr; spin_lock(&x->lock); - if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && + if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) err = -ENOENT; spin_unlock(&x->lock); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0abdc242ddb7..58841c4ae947 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -586,6 +586,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = 0; + inc_opt |= ifp->idev->cnf.force_tllao; in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, @@ -599,7 +600,6 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, icmp6h.icmp6_solicited = solicited; icmp6h.icmp6_override = override; - inc_opt |= ifp->idev->cnf.force_tllao; __ndisc_send(dev, neigh, daddr, src_addr, &icmp6h, solicited_addr, inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); @@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", __func__); - dst_release(&rt->u.dst); + dst_release(&rt->dst); in6_dev_put(in6_dev); return; } @@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; } skip_defrtr: @@ -1363,7 +1363,7 @@ skip_linkparms: in6_dev->cnf.mtu6 = mtu; if (rt) - rt->u.dst.metrics[RTAX_MTU-1] = mtu; + rt->dst.metrics[RTAX_MTU-1] = mtu; rt6_mtu_change(skb->dev, mtu); } @@ -1384,7 +1384,7 @@ skip_linkparms: } out: if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); else if (neigh) neigh_release(neigh); in6_dev_put(in6_dev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index a74951c039b6..7155b2451d7c 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -151,9 +151,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, protocol, csum_sub(0, hsum))); skb->ip_summed = CHECKSUM_NONE; - csum = __skb_checksum_complete_head(skb, dataoff + len); - if (!csum) - skb->ip_summed = CHECKSUM_UNNECESSARY; + return __skb_checksum_complete_head(skb, dataoff + len); } return csum; }; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 8c201743d96d..413ab0754e1f 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -43,7 +43,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; -static DEFINE_RWLOCK(queue_lock); +static DEFINE_SPINLOCK(queue_lock); static int peer_pid __read_mostly; static unsigned int copy_range __read_mostly; static unsigned int queue_total; @@ -73,10 +73,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range) break; case IPQ_COPY_PACKET: - copy_mode = mode; + if (range > 0xFFFF) + range = 0xFFFF; copy_range = range; - if (copy_range > 0xFFFF) - copy_range = 0xFFFF; + copy_mode = mode; break; default: @@ -102,7 +102,7 @@ ipq_find_dequeue_entry(unsigned long id) { struct nf_queue_entry *entry = NULL, *i; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); list_for_each_entry(i, &queue_list, list) { if ((unsigned long)i == id) { @@ -116,7 +116,7 @@ ipq_find_dequeue_entry(unsigned long id) queue_total--; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return entry; } @@ -137,9 +137,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) static void ipq_flush(ipq_cmpfn cmpfn, unsigned long data) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); __ipq_flush(cmpfn, data); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } static struct sk_buff * @@ -153,9 +153,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) struct nlmsghdr *nlh; struct timeval tv; - read_lock_bh(&queue_lock); - - switch (copy_mode) { + switch (ACCESS_ONCE(copy_mode)) { case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); @@ -163,26 +161,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_PACKET: if (entry->skb->ip_summed == CHECKSUM_PARTIAL && - (*errp = skb_checksum_help(entry->skb))) { - read_unlock_bh(&queue_lock); + (*errp = skb_checksum_help(entry->skb))) return NULL; - } - if (copy_range == 0 || copy_range > entry->skb->len) + + data_len = ACCESS_ONCE(copy_range); + if (data_len == 0 || data_len > entry->skb->len) data_len = entry->skb->len; - else - data_len = copy_range; size = NLMSG_SPACE(sizeof(*pmsg) + data_len); break; default: *errp = -EINVAL; - read_unlock_bh(&queue_lock); return NULL; } - read_unlock_bh(&queue_lock); - skb = alloc_skb(size, GFP_ATOMIC); if (!skb) goto nlmsg_failure; @@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) if (nskb == NULL) return status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (!peer_pid) goto err_out_free_nskb; @@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) __ipq_enqueue_entry(entry); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; err_out_free_nskb: kfree_skb(nskb); err_out_unlock: - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range) { int status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); status = __ipq_set_mode(mode, range); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -441,11 +434,11 @@ __ipq_rcv_skb(struct sk_buff *skb) if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (peer_pid) { if (peer_pid != pid) { - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); RCV_SKB_FAIL(-EBUSY); } } else { @@ -453,7 +446,7 @@ __ipq_rcv_skb(struct sk_buff *skb) peer_pid = pid; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); @@ -498,10 +491,10 @@ ipq_rcv_nl_event(struct notifier_block *this, struct netlink_notify *n = ptr; if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } return NOTIFY_DONE; } @@ -528,7 +521,7 @@ static ctl_table ipq_table[] = { #ifdef CONFIG_PROC_FS static int ip6_queue_show(struct seq_file *m, void *v) { - read_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); seq_printf(m, "Peer PID : %d\n" @@ -546,7 +539,7 @@ static int ip6_queue_show(struct seq_file *m, void *v) queue_dropped, queue_user_dropped); - read_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return 0; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9d2d68f0e605..5359ef4daac5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -387,9 +387,7 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(skb)->payload_len) + - sizeof(struct ipv6hdr), 1); + ADD_COUNTER(e->counters, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -899,7 +897,7 @@ get_counters(const struct xt_table_info *t, struct ip6t_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -909,14 +907,16 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -930,7 +930,7 @@ get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -943,7 +943,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vmalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1213,8 +1213,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ip6t_entry *iter; ret = 0; - counters = vmalloc_node(num_counters * sizeof(struct xt_counters), - numa_node_id()); + counters = vmalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; @@ -1368,7 +1367,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len - size, numa_node_id()); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index af4ee11f2066..0a07ae7b933f 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -373,6 +373,56 @@ static void dump_packet(const struct nf_loginfo *info, printk("MARK=0x%x ", skb->mark); } +static void dump_mac_header(const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & IP6T_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + printk("MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT && + (p -= ETH_HLEN) < skb->head) + p = NULL; + + if (p != NULL) { + printk("%02x", *p++); + for (i = 1; i < len; i++) + printk(":%02x", p[i]); + } + printk(" "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); + } + } else + printk(" "); +} + static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -400,35 +450,10 @@ ip6t_log_packet(u_int8_t pf, prefix, in ? in->name : "", out ? out->name : ""); - if (in && !out) { - unsigned int len; - /* MAC logging for input chain only. */ - printk("MAC="); - if (skb->dev && (len = skb->dev->hard_header_len) && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - int i; - - if (skb->dev->type == ARPHRD_SIT && - (p -= ETH_HLEN) < skb->head) - p = NULL; - - if (p != NULL) { - for (i = 0; i < len; i++) - printk("%02x%s", p[i], - i == len - 1 ? "" : ":"); - } - printk(" "); - if (skb->dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - printk("TUNNEL=%pI4->%pI4 ", - &iph->saddr, &iph->daddr); - } - } else - printk(" "); - } + /* MAC logging for input path only. */ + if (in && !out) + dump_mac_header(loginfo, skb); dump_packet(loginfo, skb, skb_network_offset(skb), 1); printk("\n"); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 47d227713758..2933396e0281 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) fl.fl_ip_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(net, NULL, &fl); - if (dst == NULL) + if (dst == NULL || dst->error) { + dst_release(dst); return; - if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) + } + if (xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9be81776415e..1df3c8b6bf47 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6fb890187de0..13ef5bc05cf5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -114,10 +114,8 @@ static void nf_skb_free(struct sk_buff *skb) } /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) +static void frag_kfree_skb(struct sk_buff *skb) { - if (work) - *work -= skb->truesize; atomic_sub(skb->truesize, &nf_init_frags.mem); nf_skb_free(skb); kfree_skb(skb); @@ -201,7 +199,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, int offset, end; if (fq->q.last_in & INET_FRAG_COMPLETE) { - pr_debug("Allready completed\n"); + pr_debug("Already completed\n"); goto err; } @@ -271,6 +269,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = fq->q.fragments_tail; + if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for (next = fq->q.fragments; next != NULL; next = next->next) { if (NFCT_FRAG6_CB(next)->offset >= offset) @@ -278,6 +281,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, prev = next; } +found: /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. @@ -335,7 +339,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, fq->q.fragments = next; fq->q.meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + frag_kfree_skb(free_it); } } @@ -343,6 +347,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + fq->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -442,7 +448,6 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_init_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -452,8 +457,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_init_frags.mem); } + atomic_sub(head->truesize, &nf_init_frags.mem); head->next = NULL; head->dev = dev; @@ -467,6 +472,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum); fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 566798d69f37..d082eaeefa25 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -174,17 +174,28 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib, const struct snmp_mib *itemlist) { int i; - for (i=0; itemlist[i].name; i++) + + for (i = 0; itemlist[i].name; i++) seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, snmp_fold_field(mib, itemlist[i].entry)); } +static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, + const struct snmp_mib *itemlist, size_t syncpoff) +{ + int i; + + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, + snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); +} + static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics, - snmp6_ipstats_list); + snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, + snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4a4dcbe4f8b2..e677937a07fc 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -602,31 +602,33 @@ out: } static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, - struct flowi *fl, struct rt6_info *rt, + struct flowi *fl, struct dst_entry **dstp, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; int err; + struct rt6_info *rt = (struct rt6_info *)*dstp; - if (length > rt->u.dst.dev->mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); + if (length > rt->dst.dev->mtu) { + ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, &rt->dst); + *dstp = NULL; skb_put(skb, length); skb_reset_network_header(skb); @@ -641,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->u.dst.dev, dst_output); + rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) @@ -725,7 +727,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, { struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct raw6_sock *rp = raw6_sk(sk); @@ -847,13 +849,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); - /* merge ip6_build_xmit from ip6_output */ - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; @@ -892,9 +888,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, goto do_confirm; back_from_confirm: - if (inet->hdrincl) { - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); - } else { + if (inet->hdrincl) + err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); + else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 6d4292ff5854..545c4141b755 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -150,11 +150,8 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) EXPORT_SYMBOL(ip6_frag_match); /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct netns_frags *nf, - struct sk_buff *skb, int *work) +static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) { - if (work) - *work -= skb->truesize; atomic_sub(skb->truesize, &nf->mem); kfree_skb(skb); } @@ -336,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = fq->q.fragments_tail; + if (!prev || FRAG6_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) @@ -343,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, prev = next; } +found: /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. @@ -392,7 +395,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->q.fragments = next; fq->q.meat -= free_it->len; - frag_kfree_skb(fq->q.net, free_it, NULL); + frag_kfree_skb(fq->q.net, free_it); } } @@ -400,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + fq->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -466,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oom; fp->next = head->next; + if (!fp->next) + fq->q.fragments_tail = fp; prev->next = fp; skb_morph(head, fq->q.fragments); @@ -524,7 +531,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &fq->q.net->mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -534,8 +540,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &fq->q.net->mem); } + atomic_sub(head->truesize, &fq->q.net->mem); head->next = NULL; head->dev = dev; @@ -553,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; return 1; out_oversize: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 252d76199c41..8f2d0400cf8a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = { }; static struct rt6_info ip6_null_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -ENETUNREACH, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_discard, - .output = ip6_pkt_discard_out, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -ENETUNREACH, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); static struct rt6_info ip6_prohibit_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_prohibit, - .output = ip6_pkt_prohibit_out, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_prohibit, + .output = ip6_pkt_prohibit_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = { }; static struct rt6_info ip6_blk_hole_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = dst_discard, - .output = dst_discard, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = dst_discard, + .output = dst_discard, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (!oif && ipv6_addr_any(saddr)) goto out; - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { struct net_device *dev = sprt->rt6i_dev; if (oif) { @@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; - rt = rt->u.dst.rt6_next) + rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; - rt = rt->u.dst.rt6_next) + rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); return match; @@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) if (!match && (strict & RT6_LOOKUP_F_REACHABLE)) { - struct rt6_info *next = rt0->u.dst.rt6_next; + struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) @@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt->rt6i_expires = jiffies + HZ * lifetime; rt->rt6i_flags |= RTF_EXPIRES; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } return 0; } @@ -555,7 +549,7 @@ restart: rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); BACKTRACK(net, &fl->fl6_src); out: - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); return rt; @@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->u.dst.flags |= DST_HOST; + rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad if (net_ratelimit()) printk(KERN_WARNING "Neighbour table overflow.\n"); - dst_free(&rt->u.dst); + dst_free(&rt->dst); return NULL; } rt->rt6i_nexthop = neigh; @@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->u.dst.flags |= DST_HOST; + rt->dst.flags |= DST_HOST; rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); } return rt; @@ -726,7 +720,7 @@ restart: rt->rt6i_flags & RTF_CACHE) goto out; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) @@ -739,10 +733,10 @@ restart: #endif } - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); if (nrt) { err = ip6_ins_rt(nrt); if (!err) @@ -756,7 +750,7 @@ restart: * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto relookup; out: @@ -764,11 +758,11 @@ out: reachable = 0; goto restart_2; } - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); out2: - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; + rt->dst.lastuse = jiffies; + rt->dst.__use++; return rt; } @@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl struct dst_entry *new = NULL; if (rt) { - new = &rt->u.dst; + new = &rt->dst; atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); - new->dev = ort->u.dst.dev; + memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); rt->rt6i_idev = ort->rt6i_idev; @@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { if (rt->rt6i_flags&RTF_CACHE) { - dst_set_expires(&rt->u.dst, 0); + dst_set_expires(&rt->dst, 0); rt->rt6i_flags |= RTF_EXPIRES; } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) rt->rt6i_node->fn_sernum = -1; @@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_dev = dev; rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.output = ip6_output; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ - rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST + rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST ? DST_HOST : 0; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); @@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, #endif spin_lock_bh(&icmp6_dst_lock); - rt->u.dst.next = icmp6_dst_gc_list; - icmp6_dst_gc_list = &rt->u.dst; + rt->dst.next = icmp6_dst_gc_list; + icmp6_dst_gc_list = &rt->dst; spin_unlock_bh(&icmp6_dst_lock); fib6_force_start_gc(net); out: - return &rt->u.dst; + return &rt->dst; } int icmp6_dst_gc(void) @@ -1090,11 +1084,11 @@ static int ipv6_get_mtu(struct net_device *dev) int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; - idev = in6_dev_get(dev); - if (idev) { + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) mtu = idev->cnf.mtu6; - in6_dev_put(idev); - } + rcu_read_unlock(); return mtu; } @@ -1103,12 +1097,15 @@ int ip6_dst_hoplimit(struct dst_entry *dst) int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); if (hoplimit < 0) { struct net_device *dev = dst->dev; - struct inet6_dev *idev = in6_dev_get(dev); - if (idev) { + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); - } else + else hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + rcu_read_unlock(); } return hoplimit; } @@ -1159,7 +1156,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->u.dst.obsolete = -1; + rt->dst.obsolete = -1; rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? jiffies + clock_t_to_jiffies(cfg->fc_expires) : 0; @@ -1171,16 +1168,16 @@ int ip6_route_add(struct fib6_config *cfg) addr_type = ipv6_addr_type(&cfg->fc_dst); if (addr_type & IPV6_ADDR_MULTICAST) - rt->u.dst.input = ip6_mc_input; + rt->dst.input = ip6_mc_input; else - rt->u.dst.input = ip6_forward; + rt->dst.input = ip6_forward; - rt->u.dst.output = ip6_output; + rt->dst.output = ip6_output; ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->u.dst.flags = DST_HOST; + rt->dst.flags = DST_HOST; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1208,9 +1205,9 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->u.dst.output = ip6_pkt_discard_out; - rt->u.dst.input = ip6_pkt_discard; - rt->u.dst.error = -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; + rt->dst.error = -ENETUNREACH; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; goto install_route; } @@ -1244,7 +1241,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; if (dev) { if (dev != grt->rt6i_dev) { - dst_release(&grt->u.dst); + dst_release(&grt->dst); goto out; } } else { @@ -1255,7 +1252,7 @@ int ip6_route_add(struct fib6_config *cfg) } if (!(grt->rt6i_flags&RTF_GATEWAY)) err = 0; - dst_release(&grt->u.dst); + dst_release(&grt->dst); if (err) goto out; @@ -1294,18 +1291,18 @@ install_route: goto out; } - rt->u.dst.metrics[type - 1] = nla_get_u32(nla); + rt->dst.metrics[type - 1] = nla_get_u32(nla); } } } - if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!dst_mtu(&rt->u.dst)) - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.dev = dev; + if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + if (!dst_mtu(&rt->dst)) + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); + if (!dst_metric(&rt->dst, RTAX_ADVMSS)) + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1319,7 +1316,7 @@ out: if (idev) in6_dev_put(idev); if (rt) - dst_free(&rt->u.dst); + dst_free(&rt->dst); return err; } @@ -1336,7 +1333,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) write_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); - dst_release(&rt->u.dst); + dst_release(&rt->dst); write_unlock_bh(&table->tb6_lock); @@ -1369,7 +1366,7 @@ static int ip6_route_del(struct fib6_config *cfg) &cfg->fc_src, cfg->fc_src_len); if (fn) { - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (cfg->fc_ifindex && (rt->rt6i_dev == NULL || rt->rt6i_dev->ifindex != cfg->fc_ifindex)) @@ -1379,7 +1376,7 @@ static int ip6_route_del(struct fib6_config *cfg) continue; if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); return __ip6_del_rt(rt, &cfg->fc_nlinfo); @@ -1421,7 +1418,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { /* * Current route is on-link; redirect is always invalid. * @@ -1445,7 +1442,7 @@ restart: rt = net->ipv6.ip6_null_entry; BACKTRACK(net, &fl->fl6_src); out: - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); @@ -1513,10 +1510,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, * Look, redirects are sent only in response to data packets, * so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == rt->u.dst.neighbour) + if (neigh == rt->dst.neighbour) goto out; nrt = ip6_rt_copy(rt); @@ -1529,20 +1526,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); nrt->rt6i_dst.plen = 128; - nrt->u.dst.flags |= DST_HOST; + nrt->dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ - nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->u.dst)); + nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); + nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->dst)); if (ip6_ins_rt(nrt)) goto out; - netevent.old = &rt->u.dst; - netevent.new = &nrt->u.dst; + netevent.old = &rt->dst; + netevent.new = &nrt->dst; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags&RTF_CACHE) { @@ -1551,7 +1548,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, } out: - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* @@ -1570,7 +1567,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, if (rt == NULL) return; - if (pmtu >= dst_mtu(&rt->u.dst)) + if (pmtu >= dst_mtu(&rt->dst)) goto out; if (pmtu < IPV6_MIN_MTU) { @@ -1588,7 +1585,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, They are sent only in response to data packets, so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); /* Host route. If it is static, it would be better not to override it, but add new one, so that @@ -1596,10 +1593,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, would return automatically. */ if (rt->rt6i_flags & RTF_CACHE) { - rt->u.dst.metrics[RTAX_MTU-1] = pmtu; + rt->dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) - rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); + rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1615,9 +1612,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_alloc_clone(rt, daddr); if (nrt) { - nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; + nrt->dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) - nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; /* According to RFC 1981, detecting PMTU increase shouldn't be * happened within 5 mins, the recommended timer is 10 mins. @@ -1625,13 +1622,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); } out: - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* @@ -1644,18 +1641,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (rt) { - rt->u.dst.input = ort->u.dst.input; - rt->u.dst.output = ort->u.dst.output; - - memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); - rt->u.dst.error = ort->u.dst.error; - rt->u.dst.dev = ort->u.dst.dev; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + rt->dst.input = ort->dst.input; + rt->dst.output = ort->dst.output; + + memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + rt->dst.error = ort->dst.error; + rt->dst.dev = ort->dst.dev; + if (rt->dst.dev) + dev_hold(rt->dst.dev); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); - rt->u.dst.lastuse = jiffies; + rt->dst.lastuse = jiffies; rt->rt6i_expires = 0; ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); @@ -1689,14 +1686,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!fn) goto out; - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->rt6i_dev->ifindex != ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) continue; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); break; } out: @@ -1744,14 +1741,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return NULL; write_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { if (dev == rt->rt6i_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) break; } if (rt) - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); write_unlock_bh(&table->tb6_lock); return rt; } @@ -1790,9 +1787,9 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); goto restart; @@ -1930,15 +1927,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, dev_hold(net->loopback_dev); in6_dev_hold(idev); - rt->u.dst.flags = DST_HOST; - rt->u.dst.input = ip6_input; - rt->u.dst.output = ip6_output; + rt->dst.flags = DST_HOST; + rt->dst.input = ip6_input; + rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - rt->u.dst.obsolete = -1; + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) @@ -1947,7 +1944,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_LOCAL; neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); if (IS_ERR(neigh)) { - dst_free(&rt->u.dst); + dst_free(&rt->dst); /* We are casting this because that is the return * value type. But an errno encoded pointer is the @@ -1962,7 +1959,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); - atomic_set(&rt->u.dst.__refcnt, 1); + atomic_set(&rt->dst.__refcnt, 1); return rt; } @@ -2033,12 +2030,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->rt6i_dev == arg->dev && - !dst_metric_locked(&rt->u.dst, RTAX_MTU) && - (dst_mtu(&rt->u.dst) >= arg->mtu || - (dst_mtu(&rt->u.dst) < arg->mtu && - dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { - rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU) && + (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6))) { + rt->dst.metrics[RTAX_MTU-1] = arg->mtu; + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); } return 0; } @@ -2252,20 +2249,20 @@ static int rt6_fill_node(struct net *net, #endif NLA_PUT_U32(skb, RTA_IIF, iif); } else if (dst) { - struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); struct in6_addr saddr_buf; if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto nla_put_failure; - if (rt->u.dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + if (rt->dst.neighbour) + NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); - if (rt->u.dst.dev) + if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); @@ -2277,8 +2274,8 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, - expires, rt->u.dst.error) < 0) + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, + expires, rt->dst.error) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2364,7 +2361,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, @@ -2416,12 +2413,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { - net->ipv6.ip6_null_entry->u.dst.dev = dev; + net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; + net->ipv6.ip6_prohibit_entry->dst.dev = dev; net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); - net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; + net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); #endif } @@ -2464,8 +2461,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000"); } seq_printf(m, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, rt->rt6i_flags, + rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), + rt->dst.__use, rt->rt6i_flags, rt->rt6i_dev ? rt->rt6i_dev->name : ""); return 0; } @@ -2646,9 +2643,9 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_null_entry) goto out_ip6_dst_ops; - net->ipv6.ip6_null_entry->u.dst.path = + net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; - net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2656,18 +2653,18 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_prohibit_entry) goto out_ip6_null_entry; - net->ipv6.ip6_prohibit_entry->u.dst.path = + net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; - net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), GFP_KERNEL); if (!net->ipv6.ip6_blk_hole_entry) goto out_ip6_prohibit_entry; - net->ipv6.ip6_blk_hole_entry->u.dst.path = + net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; - net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; #endif net->ipv6.sysctl.flush_delay = 0; @@ -2742,12 +2739,12 @@ int __init ip6_route_init(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ - init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif ret = fib6_init(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e51e650ea80b..4699cd3c3118 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -249,8 +249,6 @@ failed: return NULL; } -static DEFINE_SPINLOCK(ipip6_prl_lock); - #define for_each_prl_rcu(start) \ for (prl = rcu_dereference(start); \ prl; \ @@ -340,7 +338,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) if (a->addr == htonl(INADDR_ANY)) return -EINVAL; - spin_lock(&ipip6_prl_lock); + ASSERT_RTNL(); for (p = t->prl; p; p = p->next) { if (p->addr == a->addr) { @@ -370,7 +368,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) t->prl_count++; rcu_assign_pointer(t->prl, p); out: - spin_unlock(&ipip6_prl_lock); return err; } @@ -397,7 +394,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) struct ip_tunnel_prl_entry *x, **p; int err = 0; - spin_lock(&ipip6_prl_lock); + ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; *p; p = &(*p)->next) { @@ -419,7 +416,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) } } out: - spin_unlock(&ipip6_prl_lock); return err; } @@ -716,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, stats->tx_carrier_errors++; goto tx_error_icmp; } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -725,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (df) { - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { stats->collisions++; @@ -784,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags = 0; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -833,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_IPV6 }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 34d1f0690d7e..09fd34f0dbf2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -27,28 +27,17 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -/* - * This table has to be sorted and terminated with (__u16)-1. - * XXX generate a better table. - * Unresolved Issues: HIPPI with a 64k MSS is not well supported. - * - * Taken directly from ipv4 implementation. - * Should this list be modified for ipv6 use or is it close enough? - * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart - */ +/* Table must be sorted. */ static __u16 const msstab[] = { - 64 - 1, - 256 - 1, - 512 - 1, - 536 - 1, - 1024 - 1, - 1440 - 1, - 1460 - 1, - 4312 - 1, - (__u16)-1 + 64, + 512, + 536, + 1280 - 60, + 1480 - 60, + 1500 - 60, + 4460 - 60, + 9000 - 60, }; -/* The number doesn't include the -1 terminator */ -#define NUM_MSS (ARRAY_SIZE(msstab) - 1) /* * This (misnamed) value is the age of syncookie which is permitted. @@ -134,9 +123,11 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) tcp_synq_overflow(sk); - for (mssind = 0; mss > msstab[mssind + 1]; mssind++) - ; - *mssp = msstab[mssind] + 1; + for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) + if (mss >= msstab[mssind]) + break; + + *mssp = msstab[mssind]; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); @@ -154,7 +145,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) th->source, th->dest, seq, jiffies / (HZ * 60), COUNTER_TRIES); - return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; + return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) @@ -173,8 +164,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; + bool ecn_ok; - if (!sysctl_tcp_syncookies || !th->ack) + if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk) || @@ -189,8 +181,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0); - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); + if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + goto out; ret = NULL; req = inet6_reqsk_alloc(&tcp6_request_sock_ops); @@ -224,9 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; - ireq->ecn_ok = 0; + ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; @@ -240,17 +231,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) * me if there is a preferred way. */ { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b7c3a100e2c..fe6d40418c0b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct in6_addr *saddr = NULL, *final_p, final; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->inet_sport; - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); security_sk_classify_flow(sk, &fl); @@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; - struct in6_addr * final_p = NULL, final; + struct in6_addr * final_p, final; struct flowi fl; struct dst_entry *dst; int err = -1; @@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, security_req_classify_flow(req, &fl); opt = np->opt; - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -1167,7 +1157,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) } #ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) + if (!th->syn) sk = cookie_v6_check(sk, skb); #endif return sk; @@ -1279,13 +1269,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - if (!want_cookie) + if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); - if (want_cookie) { - isn = cookie_v6_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - } else if (!isn) { + if (!isn) { if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1298,8 +1285,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) treq->iif = inet6_iif(skb); - - isn = tcp_v6_init_sequence(skb); + if (!want_cookie) { + isn = tcp_v6_init_sequence(skb); + } else { + isn = cookie_v6_init_sequence(sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; + } } tcp_rsk(req)->snt_isn = isn; @@ -1392,18 +1383,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto out_overflow; if (dst == NULL) { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; @@ -2156,6 +2142,8 @@ struct proto tcpv6_prot = { .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, .recvmsg = tcp_recvmsg, + .sendmsg = tcp_sendmsg, + .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .hash = tcp_v6_hash, .unhash = inet_unhash, @@ -2174,6 +2162,7 @@ struct proto tcpv6_prot = { .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, + .no_autobind = true, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 87be58673b55..1dd1affdead2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -927,7 +927,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi fl; @@ -1097,14 +1097,9 @@ do_udp_sendmsg: ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl_ip_sport = inet->inet_sport; - /* merge ip6_build_xmit from ip6_output */ - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; + final_p = fl6_update_dst(&fl, opt, &final); + if (final_p) connected = 0; - } if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { fl.oif = np->mcast_oif; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4a0e77e14468..6baeabbbca82 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -124,6 +124,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); + fl->mark = skb->mark; + ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 6a1a202710c5..800bc53b7f63 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -527,7 +527,7 @@ static int dev_irnet_close(struct inode * inode, struct file * file) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", file, ap); @@ -564,7 +564,7 @@ dev_irnet_write(struct file * file, size_t count, loff_t * ppos) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", file, ap, count); @@ -588,7 +588,7 @@ dev_irnet_read(struct file * file, size_t count, loff_t * ppos) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", file, ap, count); @@ -609,7 +609,7 @@ static unsigned int dev_irnet_poll(struct file * file, poll_table * wait) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; unsigned int mask; DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", @@ -638,7 +638,7 @@ dev_irnet_ioctl( unsigned int cmd, unsigned long arg) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; int err; int val; void __user *argp = (void __user *)arg; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 47db1d8a0d92..285761e77d90 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1853,23 +1853,23 @@ static int irttp_seq_show(struct seq_file *seq, void *v) self->remote_credit); seq_printf(seq, "send credit: %d\n", self->send_credit); - seq_printf(seq, " tx packets: %ld, ", + seq_printf(seq, " tx packets: %lu, ", self->stats.tx_packets); - seq_printf(seq, "rx packets: %ld, ", + seq_printf(seq, "rx packets: %lu, ", self->stats.rx_packets); - seq_printf(seq, "tx_queue len: %d ", + seq_printf(seq, "tx_queue len: %u ", skb_queue_len(&self->tx_queue)); - seq_printf(seq, "rx_queue len: %d\n", + seq_printf(seq, "rx_queue len: %u\n", skb_queue_len(&self->rx_queue)); seq_printf(seq, " tx_sdu_busy: %s, ", self->tx_sdu_busy? "TRUE":"FALSE"); seq_printf(seq, "rx_sdu_busy: %s\n", self->rx_sdu_busy? "TRUE":"FALSE"); - seq_printf(seq, " max_seg_size: %d, ", + seq_printf(seq, " max_seg_size: %u, ", self->max_seg_size); - seq_printf(seq, "tx_max_sdu_size: %d, ", + seq_printf(seq, "tx_max_sdu_size: %u, ", self->tx_max_sdu_size); - seq_printf(seq, "rx_max_sdu_size: %d\n", + seq_printf(seq, "rx_max_sdu_size: %u\n", self->rx_max_sdu_size); seq_printf(seq, " Used by (%s)\n\n", diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index f28ad2cc8428..499c045d6910 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1463,7 +1463,7 @@ struct iucv_path_pending { u32 res3; u8 ippollfg; u8 res4[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_pending(struct iucv_irq_data *data) { @@ -1524,7 +1524,7 @@ struct iucv_path_complete { u32 res3; u8 ippollfg; u8 res4[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_complete(struct iucv_irq_data *data) { @@ -1554,7 +1554,7 @@ struct iucv_path_severed { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_severed(struct iucv_irq_data *data) { @@ -1590,7 +1590,7 @@ struct iucv_path_quiesced { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_quiesced(struct iucv_irq_data *data) { @@ -1618,7 +1618,7 @@ struct iucv_path_resumed { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_resumed(struct iucv_irq_data *data) { @@ -1649,7 +1649,7 @@ struct iucv_message_complete { u32 ipbfln2f; u8 ippollfg; u8 res2[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_message_complete(struct iucv_irq_data *data) { @@ -1694,7 +1694,7 @@ struct iucv_message_pending { u32 ipbfln2f; u8 ippollfg; u8 res2[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_message_pending(struct iucv_irq_data *data) { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0852512d392c..226a0ae3bcfd 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; - sk_dst_set(sk, &rt->u.dst); + sk_dst_set(sk, &rt->dst); write_lock_bh(&l2tp_ip_lock); hlist_del_init(&sk->sk_bind_node); @@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) goto no_route; } - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); } - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); /* Queue the packet to IP for output */ rc = ip_queue_xmit(skb); diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 8a91f6c0bb18..4d6f8653ec88 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -33,6 +33,13 @@ config MAC80211_RC_MINSTREL ---help--- This option enables the 'minstrel' TX rate control algorithm +config MAC80211_RC_MINSTREL_HT + bool "Minstrel 802.11n support" if EMBEDDED + depends on MAC80211_RC_MINSTREL + default y + ---help--- + This option enables the 'minstrel_ht' TX rate control algorithm + choice prompt "Default rate control algorithm" depends on MAC80211_HAS_RC @@ -62,6 +69,7 @@ endchoice config MAC80211_RC_DEFAULT string + default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL default "pid" if MAC80211_RC_DEFAULT_PID default "" diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 84b48ba8a77e..fdb54e61d637 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -51,7 +51,11 @@ rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o rc80211_minstrel-y := rc80211_minstrel.o rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o +rc80211_minstrel_ht-y := rc80211_minstrel_ht.o +rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o + mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) +mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y) ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 6bb9a9a94960..965b272499fd 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -6,39 +6,70 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> - * Copyright 2007-2008, Intel Corporation + * Copyright 2007-2010, Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +/** + * DOC: RX A-MPDU aggregation + * + * Aggregation on the RX side requires only implementing the + * @ampdu_action callback that is invoked to start/stop any + * block-ack sessions for RX aggregation. + * + * When RX aggregation is started by the peer, the driver is + * notified via @ampdu_action function, with the + * %IEEE80211_AMPDU_RX_START action, and may reject the request + * in which case a negative response is sent to the peer, if it + * accepts it a positive response is sent. + * + * While the session is active, the device/driver are required + * to de-aggregate frames and pass them up one by one to mac80211, + * which will handle the reorder buffer. + * + * When the aggregation session is stopped again by the peer or + * ourselves, the driver's @ampdu_action function will be called + * with the action %IEEE80211_AMPDU_RX_STOP. In this case, the + * call must not fail. + */ + #include <linux/ieee80211.h> #include <linux/slab.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" -static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, - bool from_timer) +static void ieee80211_free_tid_rx(struct rcu_head *h) { - struct ieee80211_local *local = sta->local; - struct tid_ampdu_rx *tid_rx; + struct tid_ampdu_rx *tid_rx = + container_of(h, struct tid_ampdu_rx, rcu_head); int i; - spin_lock_bh(&sta->lock); + for (i = 0; i < tid_rx->buf_size; i++) + dev_kfree_skb(tid_rx->reorder_buf[i]); + kfree(tid_rx->reorder_buf); + kfree(tid_rx->reorder_time); + kfree(tid_rx); +} - /* check if TID is in operational state */ - if (!sta->ampdu_mlme.tid_active_rx[tid]) { - spin_unlock_bh(&sta->lock); - return; - } +void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason) +{ + struct ieee80211_local *local = sta->local; + struct tid_ampdu_rx *tid_rx; - sta->ampdu_mlme.tid_active_rx[tid] = false; + lockdep_assert_held(&sta->ampdu_mlme.mtx); tid_rx = sta->ampdu_mlme.tid_rx[tid]; + if (!tid_rx) + return; + + rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL); + #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", sta->sta.addr, tid); @@ -54,32 +85,17 @@ static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, ieee80211_send_delba(sta->sdata, sta->sta.addr, tid, 0, reason); - /* free the reordering buffer */ - for (i = 0; i < tid_rx->buf_size; i++) { - if (tid_rx->reorder_buf[i]) { - /* release the reordered frames */ - dev_kfree_skb(tid_rx->reorder_buf[i]); - tid_rx->stored_mpdu_num--; - tid_rx->reorder_buf[i] = NULL; - } - } - - /* free resources */ - kfree(tid_rx->reorder_buf); - kfree(tid_rx->reorder_time); - sta->ampdu_mlme.tid_rx[tid] = NULL; - - spin_unlock_bh(&sta->lock); + del_timer_sync(&tid_rx->session_timer); - if (!from_timer) - del_timer_sync(&tid_rx->session_timer); - kfree(tid_rx); + call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason) { - ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, false); + mutex_lock(&sta->ampdu_mlme.mtx); + ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason); + mutex_unlock(&sta->ampdu_mlme.mtx); } /* @@ -100,8 +116,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); #endif - ___ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_TIMEOUT, true); + set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); + ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); } static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, @@ -212,9 +228,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* examine state machine */ - spin_lock_bh(&sta->lock); + mutex_lock(&sta->ampdu_mlme.mtx); - if (sta->ampdu_mlme.tid_active_rx[tid]) { + if (sta->ampdu_mlme.tid_rx[tid]) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "unexpected AddBA Req from " @@ -225,9 +241,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } /* prepare A-MPDU MLME for Rx aggregation */ - sta->ampdu_mlme.tid_rx[tid] = - kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_rx[tid]) { + tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); + if (!tid_agg_rx) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_ERR "allocate rx mlme to tid %d failed\n", @@ -235,14 +250,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, #endif goto end; } - /* rx timer */ - sta->ampdu_mlme.tid_rx[tid]->session_timer.function = - sta_rx_agg_session_timer_expired; - sta->ampdu_mlme.tid_rx[tid]->session_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + /* rx timer */ + tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; + tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&tid_agg_rx->session_timer); /* prepare reordering buffer */ tid_agg_rx->reorder_buf = @@ -257,8 +269,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, #endif kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); - kfree(sta->ampdu_mlme.tid_rx[tid]); - sta->ampdu_mlme.tid_rx[tid] = NULL; + kfree(tid_agg_rx); goto end; } @@ -270,13 +281,12 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, if (ret) { kfree(tid_agg_rx->reorder_buf); + kfree(tid_agg_rx->reorder_time); kfree(tid_agg_rx); - sta->ampdu_mlme.tid_rx[tid] = NULL; goto end; } - /* change state and send addba resp */ - sta->ampdu_mlme.tid_active_rx[tid] = true; + /* update data */ tid_agg_rx->dialog_token = dialog_token; tid_agg_rx->ssn = start_seq_num; tid_agg_rx->head_seq_num = start_seq_num; @@ -284,8 +294,15 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, tid_agg_rx->timeout = timeout; tid_agg_rx->stored_mpdu_num = 0; status = WLAN_STATUS_SUCCESS; + + /* activate it for RX */ + rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); + + if (timeout) + mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout)); + end: - spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); end_no_lock: ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 98258b7341e3..c893f236acea 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -6,7 +6,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> - * Copyright 2007-2009, Intel Corporation + * Copyright 2007-2010, Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,28 +21,39 @@ #include "wme.h" /** - * DOC: TX aggregation + * DOC: TX A-MPDU aggregation * * Aggregation on the TX side requires setting the hardware flag - * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues - * hardware parameter to the number of hardware AMPDU queues. If there are no - * hardware queues then the driver will (currently) have to do all frame - * buffering. + * %IEEE80211_HW_AMPDU_AGGREGATION. The driver will then be handed + * packets with a flag indicating A-MPDU aggregation. The driver + * or device is responsible for actually aggregating the frames, + * as well as deciding how many and which to aggregate. * - * When TX aggregation is started by some subsystem (usually the rate control - * algorithm would be appropriate) by calling the - * ieee80211_start_tx_ba_session() function, the driver will be notified via - * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action. + * When TX aggregation is started by some subsystem (usually the rate + * control algorithm would be appropriate) by calling the + * ieee80211_start_tx_ba_session() function, the driver will be + * notified via its @ampdu_action function, with the + * %IEEE80211_AMPDU_TX_START action. * * In response to that, the driver is later required to call the - * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe()) - * function, which will start the aggregation session. + * ieee80211_start_tx_ba_cb_irqsafe() function, which will really + * start the aggregation session after the peer has also responded. + * If the peer responds negatively, the session will be stopped + * again right away. Note that it is possible for the aggregation + * session to be stopped before the driver has indicated that it + * is done setting it up, in which case it must not indicate the + * setup completion. * - * Similarly, when the aggregation session is stopped by - * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will - * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the - * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb() - * (or ieee80211_stop_tx_ba_cb_irqsafe()). + * Also note that, since we also need to wait for a response from + * the peer, the driver is notified of the completion of the + * handshake by the %IEEE80211_AMPDU_TX_OPERATIONAL action to the + * @ampdu_action callback. + * + * Similarly, when the aggregation session is stopped by the peer + * or something calling ieee80211_stop_tx_ba_session(), the driver's + * @ampdu_action function will be called with the action + * %IEEE80211_AMPDU_TX_STOP. In this case, the call must not fail, + * and the driver must later call ieee80211_stop_tx_ba_cb_irqsafe(). */ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, @@ -125,25 +136,53 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 ieee80211_tx_skb(sdata, skb); } +static void kfree_tid_tx(struct rcu_head *rcu_head) +{ + struct tid_ampdu_tx *tid_tx = + container_of(rcu_head, struct tid_ampdu_tx, rcu_head); + + kfree(tid_tx); +} + int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator) { struct ieee80211_local *local = sta->local; + struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; int ret; - u8 *state; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + + if (!tid_tx) + return -ENOENT; + + spin_lock_bh(&sta->lock); + + if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { + /* not even started yet! */ + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); + spin_unlock_bh(&sta->lock); + call_rcu(&tid_tx->rcu_head, kfree_tid_tx); + return 0; + } + + spin_unlock_bh(&sta->lock); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; + set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); - if (*state == HT_AGG_STATE_OPERATIONAL) - sta->ampdu_mlme.addba_req_num[tid] = 0; + /* + * After this packets are no longer handed right through + * to the driver but are put onto tid_tx->pending instead, + * with locking to ensure proper access. + */ + clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); - *state = HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + tid_tx->stop_initiator = initiator; ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_STOP, @@ -174,16 +213,14 @@ static void sta_addba_resp_timer_expired(unsigned long data) u16 tid = *(u8 *)data; struct sta_info *sta = container_of((void *)data, struct sta_info, timer_to_tid[tid]); - u8 *state; - - state = &sta->ampdu_mlme.tid_state_tx[tid]; + struct tid_ampdu_tx *tid_tx; /* check if the TID waits for addBA response */ - spin_lock_bh(&sta->lock); - if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK | - HT_AGG_STATE_REQ_STOP_BA_MSK)) != - HT_ADDBA_REQUESTED_MSK) { - spin_unlock_bh(&sta->lock); + rcu_read_lock(); + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx || + test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { + rcu_read_unlock(); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "timer expired on tid %d but we are not " "(or no longer) expecting addBA response there\n", @@ -196,8 +233,8 @@ static void sta_addba_resp_timer_expired(unsigned long data) printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); #endif - ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); - spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(&sta->sta, tid); + rcu_read_unlock(); } static inline int ieee80211_ac_from_tid(int tid) @@ -205,14 +242,112 @@ static inline int ieee80211_ac_from_tid(int tid) return ieee802_1d_to_ac[tid & 7]; } +/* + * When multiple aggregation sessions on multiple stations + * are being created/destroyed simultaneously, we need to + * refcount the global queue stop caused by that in order + * to not get into a situation where one of the aggregation + * setup or teardown re-enables queues before the other is + * ready to handle that. + * + * These two functions take care of this issue by keeping + * a global "agg_queue_stop" refcount. + */ +static void __acquires(agg_queue) +ieee80211_stop_queue_agg(struct ieee80211_local *local, int tid) +{ + int queue = ieee80211_ac_from_tid(tid); + + if (atomic_inc_return(&local->agg_queue_stop[queue]) == 1) + ieee80211_stop_queue_by_reason( + &local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __acquire(agg_queue); +} + +static void __releases(agg_queue) +ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid) +{ + int queue = ieee80211_ac_from_tid(tid); + + if (atomic_dec_return(&local->agg_queue_stop[queue]) == 0) + ieee80211_wake_queue_by_reason( + &local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __release(agg_queue); +} + +void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) +{ + struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + u16 start_seq_num; + int ret; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + + /* + * While we're asking the driver about the aggregation, + * stop the AC queue so that we don't have to worry + * about frames that came in while we were doing that, + * which would require us to put them to the AC pending + * afterwards which just makes the code more complex. + */ + ieee80211_stop_queue_agg(local, tid); + + clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); + + /* + * make sure no packets are being processed to get + * valid starting sequence number + */ + synchronize_net(); + + start_seq_num = sta->tid_seq[tid] >> 4; + + ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, + &sta->sta, tid, &start_seq_num); + if (ret) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - HW unavailable for" + " tid %d\n", tid); +#endif + spin_lock_bh(&sta->lock); + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); + spin_unlock_bh(&sta->lock); + + ieee80211_wake_queue_agg(local, tid); + call_rcu(&tid_tx->rcu_head, kfree_tid_tx); + return; + } + + /* we can take packets again now */ + ieee80211_wake_queue_agg(local, tid); + + /* activate the timer for the recipient's addBA response */ + mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); +#endif + + spin_lock_bh(&sta->lock); + sta->ampdu_mlme.addba_req_num[tid]++; + spin_unlock_bh(&sta->lock); + + /* send AddBA request */ + ieee80211_send_addba_request(sdata, sta->sta.addr, tid, + tid_tx->dialog_token, start_seq_num, + 0x40, 5000); +} + int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - u8 *state; + struct tid_ampdu_tx *tid_tx; int ret = 0; - u16 start_seq_num; trace_api_start_tx_ba_session(pubsta, tid); @@ -239,24 +374,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; - if (test_sta_flags(sta, WLAN_STA_DISASSOC)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Disassociation is in progress. " - "Denying BA session request\n"); -#endif - return -EINVAL; - } - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Suspend in progress. " + printk(KERN_DEBUG "BA sessions blocked. " "Denying BA session request\n"); #endif return -EINVAL; } spin_lock_bh(&sta->lock); - spin_lock(&local->ampdu_lock); /* we have tried too many times, receiver does not want A-MPDU */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { @@ -264,9 +390,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) goto err_unlock_sta; } - state = &sta->ampdu_mlme.tid_state_tx[tid]; + tid_tx = sta->ampdu_mlme.tid_tx[tid]; /* check if the TID is not in aggregation flow already */ - if (*state != HT_AGG_STATE_IDLE) { + if (tid_tx) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - session is not " "idle on tid %u\n", tid); @@ -275,96 +401,37 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) goto err_unlock_sta; } - /* - * While we're asking the driver about the aggregation, - * stop the AC queue so that we don't have to worry - * about frames that came in while we were doing that, - * which would require us to put them to the AC pending - * afterwards which just makes the code more complex. - */ - ieee80211_stop_queue_by_reason( - &local->hw, ieee80211_ac_from_tid(tid), - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - /* prepare A-MPDU MLME for Tx aggregation */ - sta->ampdu_mlme.tid_tx[tid] = - kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_tx[tid]) { + tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); + if (!tid_tx) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_ERR "allocate tx mlme to tid %d failed\n", tid); #endif ret = -ENOMEM; - goto err_wake_queue; + goto err_unlock_sta; } - skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending); + skb_queue_head_init(&tid_tx->pending); + __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); /* Tx timer */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = - sta_addba_resp_timer_expired; - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - - /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the - * call back right away, it must see that the flow has begun */ - *state |= HT_ADDBA_REQUESTED_MSK; - - start_seq_num = sta->tid_seq[tid] >> 4; - - ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, - pubsta, tid, &start_seq_num); + tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired; + tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&tid_tx->addba_resp_timer); - if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - HW unavailable for" - " tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - *state = HT_AGG_STATE_IDLE; - goto err_free; - } - - /* Driver vetoed or OKed, but we can take packets again now */ - ieee80211_wake_queue_by_reason( - &local->hw, ieee80211_ac_from_tid(tid), - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - - spin_unlock(&local->ampdu_lock); - - /* prepare tid data */ + /* assign a dialog token */ sta->ampdu_mlme.dialog_token_allocator++; - sta->ampdu_mlme.tid_tx[tid]->dialog_token = - sta->ampdu_mlme.dialog_token_allocator; - sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator; - spin_unlock_bh(&sta->lock); + /* finally, assign it to the array */ + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); - /* send AddBA request */ - ieee80211_send_addba_request(sdata, pubsta->addr, tid, - sta->ampdu_mlme.tid_tx[tid]->dialog_token, - sta->ampdu_mlme.tid_tx[tid]->ssn, - 0x40, 5000); - sta->ampdu_mlme.addba_req_num[tid]++; - /* activate the timer for the recipient's addBA response */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = - jiffies + ADDBA_RESP_INTERVAL; - add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); -#endif - return 0; - - err_free: - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - err_wake_queue: - ieee80211_wake_queue_by_reason( - &local->hw, ieee80211_ac_from_tid(tid), - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + + /* this flow continues off the work */ err_unlock_sta: - spin_unlock(&local->ampdu_lock); spin_unlock_bh(&sta->lock); return ret; } @@ -372,69 +439,65 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_session); /* * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish and holding - * local->ampdu_lock across both calls. + * requires a call to ieee80211_agg_splice_finish later */ -static void ieee80211_agg_splice_packets(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_local *local, + struct tid_ampdu_tx *tid_tx, u16 tid) { + int queue = ieee80211_ac_from_tid(tid); unsigned long flags; - u16 queue = ieee80211_ac_from_tid(tid); - - ieee80211_stop_queue_by_reason( - &local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - if (!(sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)) - return; + ieee80211_stop_queue_agg(local, tid); - if (WARN(!sta->ampdu_mlme.tid_tx[tid], - "TID %d gone but expected when splicing aggregates from" - "the pending queue\n", tid)) + if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" + " from the pending queue\n", tid)) return; - if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) { + if (!skb_queue_empty(&tid_tx->pending)) { spin_lock_irqsave(&local->queue_stop_reason_lock, flags); /* copy over remaining packets */ - skb_queue_splice_tail_init( - &sta->ampdu_mlme.tid_tx[tid]->pending, - &local->pending[queue]); + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } } -static void ieee80211_agg_splice_finish(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) { - u16 queue = ieee80211_ac_from_tid(tid); - - ieee80211_wake_queue_by_reason( - &local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + ieee80211_wake_queue_agg(local, tid); } -/* caller must hold sta->lock */ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { + lockdep_assert_held(&sta->ampdu_mlme.mtx); + #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid); #endif - spin_lock(&local->ampdu_lock); - ieee80211_agg_splice_packets(local, sta, tid); - /* - * NB: we rely on sta->lock being taken in the TX - * processing here when adding to the pending queue, - * otherwise we could only change the state of the - * session to OPERATIONAL _here_. - */ - ieee80211_agg_splice_finish(local, sta, tid); - spin_unlock(&local->ampdu_lock); - drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, &sta->sta, tid, NULL); + + /* + * synchronize with TX path, while splicing the TX path + * should block so it won't put more packets onto pending. + */ + spin_lock_bh(&sta->lock); + + ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid); + /* + * Now mark as operational. This will be visible + * in the TX path, and lets it go lock-free in + * the common case. + */ + set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state); + ieee80211_agg_splice_finish(local, tid); + + spin_unlock_bh(&sta->lock); } void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) @@ -442,7 +505,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u8 *state; + struct tid_ampdu_tx *tid_tx; trace_api_start_tx_ba_cb(sdata, ra, tid); @@ -454,42 +517,36 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) return; } - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, ra); if (!sta) { - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find station: %pM\n", ra); #endif return; } - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); + mutex_lock(&sta->ampdu_mlme.mtx); + tid_tx = sta->ampdu_mlme.tid_tx[tid]; - if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) { + if (WARN_ON(!tid_tx)) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", - *state); + printk(KERN_DEBUG "addBA was not requested!\n"); #endif - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); - return; + goto unlock; } - if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK)) - goto out; - - *state |= HT_ADDBA_DRV_READY_MSK; + if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) + goto unlock; - if (*state == HT_AGG_STATE_OPERATIONAL) + if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); - out: - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); + unlock: + mutex_unlock(&sta->ampdu_mlme.mtx); + mutex_unlock(&local->sta_mtx); } -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) @@ -510,44 +567,36 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; - ra_tid->vif = vif; - skb->pkt_type = IEEE80211_ADDBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &sdata->work); } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator) { - u8 *state; int ret; - /* check if the TID is in aggregation */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); - - if (*state != HT_AGG_STATE_OPERATIONAL) { - ret = -ENOENT; - goto unlock; - } + mutex_lock(&sta->ampdu_mlme.mtx); ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); - unlock: - spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); + return ret; } -int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, - enum ieee80211_back_parties initiator) +int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + struct tid_ampdu_tx *tid_tx; + int ret = 0; - trace_api_stop_tx_ba_session(pubsta, tid, initiator); + trace_api_stop_tx_ba_session(pubsta, tid); if (!local->ops->ampdu_action) return -EINVAL; @@ -555,7 +604,26 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (tid >= STA_TID_NUM) return -EINVAL; - return __ieee80211_stop_tx_ba_session(sta, tid, initiator); + spin_lock_bh(&sta->lock); + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + + if (!tid_tx) { + ret = -ENOENT; + goto unlock; + } + + if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { + /* already in progress stopping it */ + ret = 0; + goto unlock; + } + + set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + + unlock: + spin_unlock_bh(&sta->lock); + return ret; } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); @@ -564,7 +632,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u8 *state; + struct tid_ampdu_tx *tid_tx; trace_api_stop_tx_ba_cb(sdata, ra, tid); @@ -581,51 +649,56 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - rcu_read_lock(); + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, ra); if (!sta) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find station: %pM\n", ra); #endif - rcu_read_unlock(); - return; + goto unlock; } - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* NOTE: no need to use sta->lock in this state check, as - * ieee80211_stop_tx_ba_session will let only one stop call to - * pass through per sta/tid - */ - if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { + mutex_lock(&sta->ampdu_mlme.mtx); + spin_lock_bh(&sta->lock); + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + + if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); #endif - rcu_read_unlock(); - return; + goto unlock_sta; } - if (*state & HT_AGG_STATE_INITIATOR_MSK) + if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - spin_lock_bh(&sta->lock); - spin_lock(&local->ampdu_lock); + /* + * When we get here, the TX path will not be lockless any more wrt. + * aggregation, since the OPERATIONAL bit has long been cleared. + * Thus it will block on getting the lock, if it occurs. So if we + * stop the queue now, we will not get any more packets, and any + * that might be being processed will wait for us here, thereby + * guaranteeing that no packets go to the tid_tx pending queue any + * more. + */ - ieee80211_agg_splice_packets(local, sta, tid); + ieee80211_agg_splice_packets(local, tid_tx, tid); - *state = HT_AGG_STATE_IDLE; - /* from now on packets are no longer put onto sta->pending */ - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; + /* future packets must not find the tid_tx struct any more */ + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); - ieee80211_agg_splice_finish(local, sta, tid); + ieee80211_agg_splice_finish(local, tid); - spin_unlock(&local->ampdu_lock); - spin_unlock_bh(&sta->lock); + call_rcu(&tid_tx->rcu_head, kfree_tid_tx); - rcu_read_unlock(); + unlock_sta: + spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); + unlock: + mutex_unlock(&local->sta_mtx); } -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) @@ -646,11 +719,10 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; - ra_tid->vif = vif; - skb->pkt_type = IEEE80211_DELBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &sdata->work); } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); @@ -660,40 +732,40 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len) { + struct tid_ampdu_tx *tid_tx; u16 capab, tid; - u8 *state; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - state = &sta->ampdu_mlme.tid_state_tx[tid]; - - spin_lock_bh(&sta->lock); + mutex_lock(&sta->ampdu_mlme.mtx); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + if (!tid_tx) goto out; - if (mgmt->u.action.u.addba_resp.dialog_token != - sta->ampdu_mlme.tid_tx[tid]->dialog_token) { + if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ +#endif goto out; } - del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + del_timer(&tid_tx->addba_resp_timer); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ +#endif if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { - u8 curstate = *state; - - *state |= HT_ADDBA_RECEIVED_MSK; + if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, + &tid_tx->state)) { + /* ignore duplicate response */ + goto out; + } - if (*state != curstate && *state == HT_AGG_STATE_OPERATIONAL) + if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); sta->ampdu_mlme.addba_req_num[tid] = 0; @@ -702,5 +774,5 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } out: - spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c7000a6ca379..29ac8e1a509e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -120,6 +120,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_key *key; int err; + if (!netif_running(dev)) + return -ENETDOWN; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (params->cipher) { @@ -140,17 +143,22 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } + /* reject WEP and TKIP keys if WEP failed to initialize */ + if ((alg == ALG_WEP || alg == ALG_TKIP) && + IS_ERR(sdata->local->wep_tx_tfm)) + return -EINVAL; + key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, params->seq_len, params->seq); if (!key) return -ENOMEM; - rcu_read_lock(); + mutex_lock(&sdata->local->sta_mtx); if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); if (!sta) { - ieee80211_key_free(key); + ieee80211_key_free(sdata->local, key); err = -ENOENT; goto out_unlock; } @@ -160,7 +168,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, err = 0; out_unlock: - rcu_read_unlock(); + mutex_unlock(&sdata->local->sta_mtx); return err; } @@ -174,7 +182,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - rcu_read_lock(); + mutex_lock(&sdata->local->sta_mtx); if (mac_addr) { ret = -ENOENT; @@ -184,7 +192,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; if (sta->key) { - ieee80211_key_free(sta->key); + ieee80211_key_free(sdata->local, sta->key); WARN_ON(sta->key); ret = 0; } @@ -197,12 +205,12 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; } - ieee80211_key_free(sdata->keys[key_idx]); + ieee80211_key_free(sdata->local, sdata->keys[key_idx]); WARN_ON(sdata->keys[key_idx]); ret = 0; out_unlock: - rcu_read_unlock(); + mutex_unlock(&sdata->local->sta_mtx); return ret; } @@ -305,15 +313,10 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ieee80211_set_default_key(sdata, key_idx); - rcu_read_unlock(); - return 0; } @@ -321,15 +324,10 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ieee80211_set_default_mgmt_key(sdata, key_idx); - rcu_read_unlock(); - return 0; } @@ -415,9 +413,6 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (!local->ops->get_survey) - return -EOPNOTSUPP; - return drv_get_survey(local, idx, survey); } @@ -600,7 +595,7 @@ struct iapp_layer2_update { u8 ssap; /* 0 */ u8 control; u8 xid_info[3]; -} __attribute__ ((packed)); +} __packed; static void ieee80211_send_layer2_update(struct sta_info *sta) { @@ -632,7 +627,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) skb->dev = sta->sdata->dev; skb->protocol = eth_type_trans(skb, sta->sdata->dev); memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); + netif_rx_ni(skb); } static void sta_apply_parameters(struct ieee80211_local *local, @@ -1154,10 +1149,6 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, return -EINVAL; } - /* enable WMM or activate new settings */ - local->hw.conf.flags |= IEEE80211_CONF_QOS; - drv_config(local, IEEE80211_CONF_CHANGE_QOS); - return 0; } @@ -1331,28 +1322,28 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) } static int ieee80211_set_tx_power(struct wiphy *wiphy, - enum tx_power_setting type, int dbm) + enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_channel *chan = local->hw.conf.channel; u32 changes = 0; switch (type) { - case TX_POWER_AUTOMATIC: + case NL80211_TX_POWER_AUTOMATIC: local->user_power_level = -1; break; - case TX_POWER_LIMITED: - if (dbm < 0) - return -EINVAL; - local->user_power_level = dbm; + case NL80211_TX_POWER_LIMITED: + if (mbm < 0 || (mbm % 100)) + return -EOPNOTSUPP; + local->user_power_level = MBM_TO_DBM(mbm); break; - case TX_POWER_FIXED: - if (dbm < 0) - return -EINVAL; + case NL80211_TX_POWER_FIXED: + if (mbm < 0 || (mbm % 100)) + return -EOPNOTSUPP; /* TODO: move to cfg80211 when it knows the channel */ - if (dbm > chan->max_power) + if (MBM_TO_DBM(mbm) > chan->max_power) return -EINVAL; - local->user_power_level = dbm; + local->user_power_level = MBM_TO_DBM(mbm); break; } @@ -1448,7 +1439,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_conf *conf = &local->hw.conf; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; @@ -1457,11 +1447,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && - timeout == conf->dynamic_ps_forced_timeout) + timeout == local->dynamic_ps_forced_timeout) return 0; sdata->u.mgd.powersave = enabled; - conf->dynamic_ps_forced_timeout = timeout; + local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ mutex_lock(&sdata->u.mgd.mtx); @@ -1554,10 +1544,58 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie) { - return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan, - channel_type, buf, len, cookie); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct sta_info *sta; + const struct ieee80211_mgmt *mgmt = (void *)buf; + u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + /* Check that we are on the requested channel for transmission */ + if (chan != local->tmp_channel && + chan != local->oper_channel) + return -EBUSY; + if (channel_type_valid && + (channel_type != local->tmp_channel_type && + channel_type != local->_oper_channel_type)) + return -EBUSY; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_ADHOC: + if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) + break; + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->da); + rcu_read_unlock(); + if (!sta) + return -ENOLINK; + break; + case NL80211_IFTYPE_STATION: + if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED)) + flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + break; + default: + return -EOPNOTSUPP; + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); + if (!skb) + return -ENOMEM; + skb_reserve(skb, local->hw.extra_tx_headroom); + + memcpy(skb_put(skb, len), buf, len); + + IEEE80211_SKB_CB(skb)->flags = flags; + + skb->dev = sdata->dev; + ieee80211_tx_skb(sdata, skb); + + *cookie = (unsigned long) skb; + return 0; } struct cfg80211_ops mac80211_config_ops = { diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 637929b65ccc..a694c593ff6a 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -307,9 +307,6 @@ static const struct file_operations queues_ops = { /* statistics stuff */ -#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ - DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value) - static ssize_t format_devstat_counter(struct ieee80211_local *local, char __user *userbuf, size_t count, loff_t *ppos, @@ -351,75 +348,16 @@ static const struct file_operations stats_ ##name## _ops = { \ .open = mac80211_open_file_generic, \ }; -#define DEBUGFS_STATS_ADD(name) \ +#define DEBUGFS_STATS_ADD(name, field) \ + debugfs_create_u32(#name, 0400, statsd, (u32 *) &field); +#define DEBUGFS_DEVSTATS_ADD(name) \ debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); -DEBUGFS_STATS_FILE(transmitted_fragment_count, 20, "%u", - local->dot11TransmittedFragmentCount); -DEBUGFS_STATS_FILE(multicast_transmitted_frame_count, 20, "%u", - local->dot11MulticastTransmittedFrameCount); -DEBUGFS_STATS_FILE(failed_count, 20, "%u", - local->dot11FailedCount); -DEBUGFS_STATS_FILE(retry_count, 20, "%u", - local->dot11RetryCount); -DEBUGFS_STATS_FILE(multiple_retry_count, 20, "%u", - local->dot11MultipleRetryCount); -DEBUGFS_STATS_FILE(frame_duplicate_count, 20, "%u", - local->dot11FrameDuplicateCount); -DEBUGFS_STATS_FILE(received_fragment_count, 20, "%u", - local->dot11ReceivedFragmentCount); -DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u", - local->dot11MulticastReceivedFrameCount); -DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u", - local->dot11TransmittedFrameCount); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS -DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u", - local->tx_handlers_drop); -DEBUGFS_STATS_FILE(tx_handlers_queued, 20, "%u", - local->tx_handlers_queued); -DEBUGFS_STATS_FILE(tx_handlers_drop_unencrypted, 20, "%u", - local->tx_handlers_drop_unencrypted); -DEBUGFS_STATS_FILE(tx_handlers_drop_fragment, 20, "%u", - local->tx_handlers_drop_fragment); -DEBUGFS_STATS_FILE(tx_handlers_drop_wep, 20, "%u", - local->tx_handlers_drop_wep); -DEBUGFS_STATS_FILE(tx_handlers_drop_not_assoc, 20, "%u", - local->tx_handlers_drop_not_assoc); -DEBUGFS_STATS_FILE(tx_handlers_drop_unauth_port, 20, "%u", - local->tx_handlers_drop_unauth_port); -DEBUGFS_STATS_FILE(rx_handlers_drop, 20, "%u", - local->rx_handlers_drop); -DEBUGFS_STATS_FILE(rx_handlers_queued, 20, "%u", - local->rx_handlers_queued); -DEBUGFS_STATS_FILE(rx_handlers_drop_nullfunc, 20, "%u", - local->rx_handlers_drop_nullfunc); -DEBUGFS_STATS_FILE(rx_handlers_drop_defrag, 20, "%u", - local->rx_handlers_drop_defrag); -DEBUGFS_STATS_FILE(rx_handlers_drop_short, 20, "%u", - local->rx_handlers_drop_short); -DEBUGFS_STATS_FILE(rx_handlers_drop_passive_scan, 20, "%u", - local->rx_handlers_drop_passive_scan); -DEBUGFS_STATS_FILE(tx_expand_skb_head, 20, "%u", - local->tx_expand_skb_head); -DEBUGFS_STATS_FILE(tx_expand_skb_head_cloned, 20, "%u", - local->tx_expand_skb_head_cloned); -DEBUGFS_STATS_FILE(rx_expand_skb_head, 20, "%u", - local->rx_expand_skb_head); -DEBUGFS_STATS_FILE(rx_expand_skb_head2, 20, "%u", - local->rx_expand_skb_head2); -DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u", - local->rx_handlers_fragments); -DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u", - local->tx_status_drop); - -#endif - DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount); DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount); DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount); DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount); - void debugfs_hw_add(struct ieee80211_local *local) { struct dentry *phyd = local->hw.wiphy->debugfsdir; @@ -448,38 +386,60 @@ void debugfs_hw_add(struct ieee80211_local *local) if (!statsd) return; - DEBUGFS_STATS_ADD(transmitted_fragment_count); - DEBUGFS_STATS_ADD(multicast_transmitted_frame_count); - DEBUGFS_STATS_ADD(failed_count); - DEBUGFS_STATS_ADD(retry_count); - DEBUGFS_STATS_ADD(multiple_retry_count); - DEBUGFS_STATS_ADD(frame_duplicate_count); - DEBUGFS_STATS_ADD(received_fragment_count); - DEBUGFS_STATS_ADD(multicast_received_frame_count); - DEBUGFS_STATS_ADD(transmitted_frame_count); + DEBUGFS_STATS_ADD(transmitted_fragment_count, + local->dot11TransmittedFragmentCount); + DEBUGFS_STATS_ADD(multicast_transmitted_frame_count, + local->dot11MulticastTransmittedFrameCount); + DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount); + DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount); + DEBUGFS_STATS_ADD(multiple_retry_count, + local->dot11MultipleRetryCount); + DEBUGFS_STATS_ADD(frame_duplicate_count, + local->dot11FrameDuplicateCount); + DEBUGFS_STATS_ADD(received_fragment_count, + local->dot11ReceivedFragmentCount); + DEBUGFS_STATS_ADD(multicast_received_frame_count, + local->dot11MulticastReceivedFrameCount); + DEBUGFS_STATS_ADD(transmitted_frame_count, + local->dot11TransmittedFrameCount); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_STATS_ADD(tx_handlers_drop); - DEBUGFS_STATS_ADD(tx_handlers_queued); - DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted); - DEBUGFS_STATS_ADD(tx_handlers_drop_fragment); - DEBUGFS_STATS_ADD(tx_handlers_drop_wep); - DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc); - DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port); - DEBUGFS_STATS_ADD(rx_handlers_drop); - DEBUGFS_STATS_ADD(rx_handlers_queued); - DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc); - DEBUGFS_STATS_ADD(rx_handlers_drop_defrag); - DEBUGFS_STATS_ADD(rx_handlers_drop_short); - DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan); - DEBUGFS_STATS_ADD(tx_expand_skb_head); - DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned); - DEBUGFS_STATS_ADD(rx_expand_skb_head); - DEBUGFS_STATS_ADD(rx_expand_skb_head2); - DEBUGFS_STATS_ADD(rx_handlers_fragments); - DEBUGFS_STATS_ADD(tx_status_drop); + DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop); + DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued); + DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted, + local->tx_handlers_drop_unencrypted); + DEBUGFS_STATS_ADD(tx_handlers_drop_fragment, + local->tx_handlers_drop_fragment); + DEBUGFS_STATS_ADD(tx_handlers_drop_wep, + local->tx_handlers_drop_wep); + DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc, + local->tx_handlers_drop_not_assoc); + DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port, + local->tx_handlers_drop_unauth_port); + DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop); + DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued); + DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc, + local->rx_handlers_drop_nullfunc); + DEBUGFS_STATS_ADD(rx_handlers_drop_defrag, + local->rx_handlers_drop_defrag); + DEBUGFS_STATS_ADD(rx_handlers_drop_short, + local->rx_handlers_drop_short); + DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan, + local->rx_handlers_drop_passive_scan); + DEBUGFS_STATS_ADD(tx_expand_skb_head, + local->tx_expand_skb_head); + DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned, + local->tx_expand_skb_head_cloned); + DEBUGFS_STATS_ADD(rx_expand_skb_head, + local->rx_expand_skb_head); + DEBUGFS_STATS_ADD(rx_expand_skb_head2, + local->rx_expand_skb_head2); + DEBUGFS_STATS_ADD(rx_handlers_fragments, + local->rx_handlers_fragments); + DEBUGFS_STATS_ADD(tx_status_drop, + local->tx_status_drop); #endif - DEBUGFS_STATS_ADD(dot11ACKFailureCount); - DEBUGFS_STATS_ADD(dot11RTSFailureCount); - DEBUGFS_STATS_ADD(dot11FCSErrorCount); - DEBUGFS_STATS_ADD(dot11RTSSuccessCount); + DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount); + DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); + DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount); + DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount); } diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 97c9e46e859e..fa5e76e658ef 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -143,7 +143,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, len = p - buf; break; case ALG_CCMP: - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) { + for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { rpn = key->u.ccmp.rx_pn[i]; p += scnprintf(p, sizeof(buf)+buf-p, "%02x%02x%02x%02x%02x%02x\n", diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index e763f1529ddb..76839d4dfaac 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -30,7 +30,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \ } #define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n") #define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n") -#define STA_READ_LU(name, field) STA_READ(name, 20, field, "%lu\n") #define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n") #define STA_OPS(name) \ @@ -52,19 +51,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_FILE(aid, sta.aid, D); STA_FILE(dev, sdata->name, S); -STA_FILE(rx_packets, rx_packets, LU); -STA_FILE(tx_packets, tx_packets, LU); -STA_FILE(rx_bytes, rx_bytes, LU); -STA_FILE(tx_bytes, tx_bytes, LU); -STA_FILE(rx_duplicates, num_duplicates, LU); -STA_FILE(rx_fragments, rx_fragments, LU); -STA_FILE(rx_dropped, rx_dropped, LU); -STA_FILE(tx_fragments, tx_fragments, LU); -STA_FILE(tx_filtered, tx_filtered_count, LU); -STA_FILE(tx_retry_failed, tx_retry_failed, LU); -STA_FILE(tx_retry_count, tx_retry_count, LU); STA_FILE(last_signal, last_signal, D); -STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -134,28 +121,25 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); p += scnprintf(p, sizeof(buf) + buf - p, - "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n"); + "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < STA_TID_NUM; i++) { p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", - sta->ampdu_mlme.tid_active_rx[i]); + !!sta->ampdu_mlme.tid_rx[i]); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - sta->ampdu_mlme.tid_active_rx[i] ? + sta->ampdu_mlme.tid_rx[i] ? sta->ampdu_mlme.tid_rx[i]->dialog_token : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", - sta->ampdu_mlme.tid_active_rx[i] ? + sta->ampdu_mlme.tid_rx[i] ? sta->ampdu_mlme.tid_rx[i]->ssn : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", - sta->ampdu_mlme.tid_state_tx[i]); + !!sta->ampdu_mlme.tid_tx[i]); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - sta->ampdu_mlme.tid_state_tx[i] ? + sta->ampdu_mlme.tid_tx[i] ? sta->ampdu_mlme.tid_tx[i]->dialog_token : 0); - p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", - sta->ampdu_mlme.tid_state_tx[i] ? - sta->ampdu_mlme.tid_tx[i]->ssn : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d", - sta->ampdu_mlme.tid_state_tx[i] ? + sta->ampdu_mlme.tid_tx[i] ? skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\n"); } @@ -210,8 +194,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (start) ret = ieee80211_start_tx_ba_session(&sta->sta, tid); else - ret = ieee80211_stop_tx_ba_session(&sta->sta, tid, - WLAN_BACK_RECIPIENT); + ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3); ret = 0; @@ -307,6 +290,13 @@ STA_OPS(ht_capa); debugfs_create_file(#name, 0400, \ sta->debugfs.dir, sta, &sta_ ##name## _ops); +#define DEBUGFS_ADD_COUNTER(name, field) \ + if (sizeof(sta->field) == sizeof(u32)) \ + debugfs_create_u32(#name, 0400, sta->debugfs.dir, \ + (u32 *) &sta->field); \ + else \ + debugfs_create_u64(#name, 0400, sta->debugfs.dir, \ + (u64 *) &sta->field); void ieee80211_sta_debugfs_add(struct sta_info *sta) { @@ -339,20 +329,21 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(last_seq_ctrl); DEBUGFS_ADD(agg_status); DEBUGFS_ADD(dev); - DEBUGFS_ADD(rx_packets); - DEBUGFS_ADD(tx_packets); - DEBUGFS_ADD(rx_bytes); - DEBUGFS_ADD(tx_bytes); - DEBUGFS_ADD(rx_duplicates); - DEBUGFS_ADD(rx_fragments); - DEBUGFS_ADD(rx_dropped); - DEBUGFS_ADD(tx_fragments); - DEBUGFS_ADD(tx_filtered); - DEBUGFS_ADD(tx_retry_failed); - DEBUGFS_ADD(tx_retry_count); DEBUGFS_ADD(last_signal); - DEBUGFS_ADD(wep_weak_iv_count); DEBUGFS_ADD(ht_capa); + + DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); + DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); + DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes); + DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes); + DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); + DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); + DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped); + DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments); + DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); + DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed); + DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); + DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count); } void ieee80211_sta_debugfs_remove(struct sta_info *sta) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 9c1da0809160..14123dce544b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -16,10 +16,11 @@ static inline int drv_start(struct ieee80211_local *local) might_sleep(); + trace_drv_start(local); local->started = true; smp_mb(); ret = local->ops->start(&local->hw); - trace_drv_start(local, ret); + trace_drv_return_int(local, ret); return ret; } @@ -27,8 +28,9 @@ static inline void drv_stop(struct ieee80211_local *local) { might_sleep(); - local->ops->stop(&local->hw); trace_drv_stop(local); + local->ops->stop(&local->hw); + trace_drv_return_void(local); /* sync away all work on the tasklet before clearing started */ tasklet_disable(&local->tasklet); @@ -46,8 +48,9 @@ static inline int drv_add_interface(struct ieee80211_local *local, might_sleep(); + trace_drv_add_interface(local, vif_to_sdata(vif)); ret = local->ops->add_interface(&local->hw, vif); - trace_drv_add_interface(local, vif_to_sdata(vif), ret); + trace_drv_return_int(local, ret); return ret; } @@ -56,8 +59,9 @@ static inline void drv_remove_interface(struct ieee80211_local *local, { might_sleep(); - local->ops->remove_interface(&local->hw, vif); trace_drv_remove_interface(local, vif_to_sdata(vif)); + local->ops->remove_interface(&local->hw, vif); + trace_drv_return_void(local); } static inline int drv_config(struct ieee80211_local *local, u32 changed) @@ -66,8 +70,9 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed) might_sleep(); + trace_drv_config(local, changed); ret = local->ops->config(&local->hw, changed); - trace_drv_config(local, changed, ret); + trace_drv_return_int(local, ret); return ret; } @@ -78,9 +83,10 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, { might_sleep(); + trace_drv_bss_info_changed(local, sdata, info, changed); if (local->ops->bss_info_changed) local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed); - trace_drv_bss_info_changed(local, sdata, info, changed); + trace_drv_return_void(local); } static inline u64 drv_prepare_multicast(struct ieee80211_local *local, @@ -88,10 +94,12 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, { u64 ret = 0; + trace_drv_prepare_multicast(local, mc_list->count); + if (local->ops->prepare_multicast) ret = local->ops->prepare_multicast(&local->hw, mc_list); - trace_drv_prepare_multicast(local, mc_list->count, ret); + trace_drv_return_u64(local, ret); return ret; } @@ -103,19 +111,21 @@ static inline void drv_configure_filter(struct ieee80211_local *local, { might_sleep(); - local->ops->configure_filter(&local->hw, changed_flags, total_flags, - multicast); trace_drv_configure_filter(local, changed_flags, total_flags, multicast); + local->ops->configure_filter(&local->hw, changed_flags, total_flags, + multicast); + trace_drv_return_void(local); } static inline int drv_set_tim(struct ieee80211_local *local, struct ieee80211_sta *sta, bool set) { int ret = 0; + trace_drv_set_tim(local, sta, set); if (local->ops->set_tim) ret = local->ops->set_tim(&local->hw, sta, set); - trace_drv_set_tim(local, sta, set, ret); + trace_drv_return_int(local, ret); return ret; } @@ -129,8 +139,9 @@ static inline int drv_set_key(struct ieee80211_local *local, might_sleep(); + trace_drv_set_key(local, cmd, sdata, sta, key); ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); - trace_drv_set_key(local, cmd, sdata, sta, key, ret); + trace_drv_return_int(local, ret); return ret; } @@ -145,10 +156,11 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, if (sta) ista = &sta->sta; + trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); if (local->ops->update_tkip_key) local->ops->update_tkip_key(&local->hw, &sdata->vif, conf, ista, iv32, phase1key); - trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); + trace_drv_return_void(local); } static inline int drv_hw_scan(struct ieee80211_local *local, @@ -159,8 +171,9 @@ static inline int drv_hw_scan(struct ieee80211_local *local, might_sleep(); + trace_drv_hw_scan(local, sdata, req); ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); - trace_drv_hw_scan(local, sdata, req, ret); + trace_drv_return_int(local, ret); return ret; } @@ -168,18 +181,20 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local) { might_sleep(); + trace_drv_sw_scan_start(local); if (local->ops->sw_scan_start) local->ops->sw_scan_start(&local->hw); - trace_drv_sw_scan_start(local); + trace_drv_return_void(local); } static inline void drv_sw_scan_complete(struct ieee80211_local *local) { might_sleep(); + trace_drv_sw_scan_complete(local); if (local->ops->sw_scan_complete) local->ops->sw_scan_complete(&local->hw); - trace_drv_sw_scan_complete(local); + trace_drv_return_void(local); } static inline int drv_get_stats(struct ieee80211_local *local, @@ -211,9 +226,10 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local, might_sleep(); + trace_drv_set_rts_threshold(local, value); if (local->ops->set_rts_threshold) ret = local->ops->set_rts_threshold(&local->hw, value); - trace_drv_set_rts_threshold(local, value, ret); + trace_drv_return_int(local, ret); return ret; } @@ -223,12 +239,13 @@ static inline int drv_set_coverage_class(struct ieee80211_local *local, int ret = 0; might_sleep(); + trace_drv_set_coverage_class(local, value); if (local->ops->set_coverage_class) local->ops->set_coverage_class(&local->hw, value); else ret = -EOPNOTSUPP; - trace_drv_set_coverage_class(local, value, ret); + trace_drv_return_int(local, ret); return ret; } @@ -237,9 +254,10 @@ static inline void drv_sta_notify(struct ieee80211_local *local, enum sta_notify_cmd cmd, struct ieee80211_sta *sta) { + trace_drv_sta_notify(local, sdata, cmd, sta); if (local->ops->sta_notify) local->ops->sta_notify(&local->hw, &sdata->vif, cmd, sta); - trace_drv_sta_notify(local, sdata, cmd, sta); + trace_drv_return_void(local); } static inline int drv_sta_add(struct ieee80211_local *local, @@ -250,13 +268,11 @@ static inline int drv_sta_add(struct ieee80211_local *local, might_sleep(); + trace_drv_sta_add(local, sdata, sta); if (local->ops->sta_add) ret = local->ops->sta_add(&local->hw, &sdata->vif, sta); - else if (local->ops->sta_notify) - local->ops->sta_notify(&local->hw, &sdata->vif, - STA_NOTIFY_ADD, sta); - trace_drv_sta_add(local, sdata, sta, ret); + trace_drv_return_int(local, ret); return ret; } @@ -267,13 +283,11 @@ static inline void drv_sta_remove(struct ieee80211_local *local, { might_sleep(); + trace_drv_sta_remove(local, sdata, sta); if (local->ops->sta_remove) local->ops->sta_remove(&local->hw, &sdata->vif, sta); - else if (local->ops->sta_notify) - local->ops->sta_notify(&local->hw, &sdata->vif, - STA_NOTIFY_REMOVE, sta); - trace_drv_sta_remove(local, sdata, sta); + trace_drv_return_void(local); } static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, @@ -283,9 +297,10 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, might_sleep(); + trace_drv_conf_tx(local, queue, params); if (local->ops->conf_tx) ret = local->ops->conf_tx(&local->hw, queue, params); - trace_drv_conf_tx(local, queue, params, ret); + trace_drv_return_int(local, ret); return ret; } @@ -295,9 +310,10 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local) might_sleep(); + trace_drv_get_tsf(local); if (local->ops->get_tsf) ret = local->ops->get_tsf(&local->hw); - trace_drv_get_tsf(local, ret); + trace_drv_return_u64(local, ret); return ret; } @@ -305,18 +321,20 @@ static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf) { might_sleep(); + trace_drv_set_tsf(local, tsf); if (local->ops->set_tsf) local->ops->set_tsf(&local->hw, tsf); - trace_drv_set_tsf(local, tsf); + trace_drv_return_void(local); } static inline void drv_reset_tsf(struct ieee80211_local *local) { might_sleep(); + trace_drv_reset_tsf(local); if (local->ops->reset_tsf) local->ops->reset_tsf(&local->hw); - trace_drv_reset_tsf(local); + trace_drv_return_void(local); } static inline int drv_tx_last_beacon(struct ieee80211_local *local) @@ -325,9 +343,10 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local) might_sleep(); + trace_drv_tx_last_beacon(local); if (local->ops->tx_last_beacon) ret = local->ops->tx_last_beacon(&local->hw); - trace_drv_tx_last_beacon(local, ret); + trace_drv_return_int(local, ret); return ret; } @@ -338,10 +357,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, u16 *ssn) { int ret = -EOPNOTSUPP; + + might_sleep(); + + trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn); + if (local->ops->ampdu_action) ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, sta, tid, ssn); - trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret); + + trace_drv_return_int(local, ret); + return ret; } @@ -349,9 +375,14 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, struct survey_info *survey) { int ret = -EOPNOTSUPP; + + trace_drv_get_survey(local, idx, survey); + if (local->ops->get_survey) ret = local->ops->get_survey(&local->hw, idx, survey); - /* trace_drv_get_survey(local, idx, survey, ret); */ + + trace_drv_return_int(local, ret); + return ret; } @@ -370,6 +401,7 @@ static inline void drv_flush(struct ieee80211_local *local, bool drop) trace_drv_flush(local, drop); if (local->ops->flush) local->ops->flush(&local->hw, drop); + trace_drv_return_void(local); } static inline void drv_channel_switch(struct ieee80211_local *local, @@ -377,9 +409,9 @@ static inline void drv_channel_switch(struct ieee80211_local *local, { might_sleep(); - local->ops->channel_switch(&local->hw, ch_switch); - trace_drv_channel_switch(local, ch_switch); + local->ops->channel_switch(&local->hw, ch_switch); + trace_drv_return_void(local); } #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6a9b2342a9c2..5d5d2a974668 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -36,20 +36,58 @@ static inline void trace_ ## name(proto) {} * Tracing for driver callbacks. */ -TRACE_EVENT(drv_start, - TP_PROTO(struct ieee80211_local *local, int ret), +TRACE_EVENT(drv_return_void, + TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local), + TP_STRUCT__entry( + LOCAL_ENTRY + ), + TP_fast_assign( + LOCAL_ASSIGN; + ), + TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) +); +TRACE_EVENT(drv_return_int, + TP_PROTO(struct ieee80211_local *local, int ret), TP_ARGS(local, ret), - TP_STRUCT__entry( LOCAL_ENTRY __field(int, ret) ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->ret = ret; + ), + TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret) +); +TRACE_EVENT(drv_return_u64, + TP_PROTO(struct ieee80211_local *local, u64 ret), + TP_ARGS(local, ret), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u64, ret) + ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), + TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret) +); + +TRACE_EVENT(drv_start, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG @@ -76,28 +114,25 @@ TRACE_EVENT(drv_stop, TRACE_EVENT(drv_add_interface, TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - int ret), + struct ieee80211_sub_if_data *sdata), - TP_ARGS(local, sdata, ret), + TP_ARGS(local, sdata), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __array(char, addr, 6) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; memcpy(__entry->addr, sdata->vif.addr, 6); - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " addr:%pM ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT " addr:%pM", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr ) ); @@ -126,15 +161,13 @@ TRACE_EVENT(drv_remove_interface, TRACE_EVENT(drv_config, TP_PROTO(struct ieee80211_local *local, - u32 changed, - int ret), + u32 changed), - TP_ARGS(local, changed, ret), + TP_ARGS(local, changed), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, changed) - __field(int, ret) __field(u32, flags) __field(int, power_level) __field(int, dynamic_ps_timeout) @@ -150,7 +183,6 @@ TRACE_EVENT(drv_config, TP_fast_assign( LOCAL_ASSIGN; __entry->changed = changed; - __entry->ret = ret; __entry->flags = local->hw.conf.flags; __entry->power_level = local->hw.conf.power_level; __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; @@ -164,8 +196,8 @@ TRACE_EVENT(drv_config, ), TP_printk( - LOCAL_PR_FMT " ch:%#x freq:%d ret:%d", - LOCAL_PR_ARG, __entry->changed, __entry->center_freq, __entry->ret + LOCAL_PR_FMT " ch:%#x freq:%d", + LOCAL_PR_ARG, __entry->changed, __entry->center_freq ) ); @@ -220,26 +252,23 @@ TRACE_EVENT(drv_bss_info_changed, ); TRACE_EVENT(drv_prepare_multicast, - TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret), + TP_PROTO(struct ieee80211_local *local, int mc_count), - TP_ARGS(local, mc_count, ret), + TP_ARGS(local, mc_count), TP_STRUCT__entry( LOCAL_ENTRY __field(int, mc_count) - __field(u64, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->mc_count = mc_count; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " prepare mc (%d): %llx", - LOCAL_PR_ARG, __entry->mc_count, - (unsigned long long) __entry->ret + LOCAL_PR_FMT " prepare mc (%d)", + LOCAL_PR_ARG, __entry->mc_count ) ); @@ -273,27 +302,25 @@ TRACE_EVENT(drv_configure_filter, TRACE_EVENT(drv_set_tim, TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sta *sta, bool set, int ret), + struct ieee80211_sta *sta, bool set), - TP_ARGS(local, sta, set, ret), + TP_ARGS(local, sta, set), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(bool, set) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->set = set; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT STA_PR_FMT " set:%d ret:%d", - LOCAL_PR_ARG, STA_PR_FMT, __entry->set, __entry->ret + LOCAL_PR_FMT STA_PR_FMT " set:%d", + LOCAL_PR_ARG, STA_PR_FMT, __entry->set ) ); @@ -301,9 +328,9 @@ TRACE_EVENT(drv_set_key, TP_PROTO(struct ieee80211_local *local, enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, - struct ieee80211_key_conf *key, int ret), + struct ieee80211_key_conf *key), - TP_ARGS(local, cmd, sdata, sta, key, ret), + TP_ARGS(local, cmd, sdata, sta, key), TP_STRUCT__entry( LOCAL_ENTRY @@ -313,7 +340,6 @@ TRACE_EVENT(drv_set_key, __field(u8, hw_key_idx) __field(u8, flags) __field(s8, keyidx) - __field(int, ret) ), TP_fast_assign( @@ -324,12 +350,11 @@ TRACE_EVENT(drv_set_key, __entry->flags = key->flags; __entry->keyidx = key->keyidx; __entry->hw_key_idx = key->hw_key_idx; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG ) ); @@ -364,25 +389,23 @@ TRACE_EVENT(drv_update_tkip_key, TRACE_EVENT(drv_hw_scan, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct cfg80211_scan_request *req, int ret), + struct cfg80211_scan_request *req), - TP_ARGS(local, sdata, req, ret), + TP_ARGS(local, sdata, req), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " ret:%d", - LOCAL_PR_ARG,VIF_PR_ARG, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG,VIF_PR_ARG ) ); @@ -479,48 +502,44 @@ TRACE_EVENT(drv_get_tkip_seq, ); TRACE_EVENT(drv_set_rts_threshold, - TP_PROTO(struct ieee80211_local *local, u32 value, int ret), + TP_PROTO(struct ieee80211_local *local, u32 value), - TP_ARGS(local, value, ret), + TP_ARGS(local, value), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, value) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; __entry->value = value; ), TP_printk( - LOCAL_PR_FMT " value:%d ret:%d", - LOCAL_PR_ARG, __entry->value, __entry->ret + LOCAL_PR_FMT " value:%d", + LOCAL_PR_ARG, __entry->value ) ); TRACE_EVENT(drv_set_coverage_class, - TP_PROTO(struct ieee80211_local *local, u8 value, int ret), + TP_PROTO(struct ieee80211_local *local, u8 value), - TP_ARGS(local, value, ret), + TP_ARGS(local, value), TP_STRUCT__entry( LOCAL_ENTRY __field(u8, value) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; __entry->value = value; ), TP_printk( - LOCAL_PR_FMT " value:%d ret:%d", - LOCAL_PR_ARG, __entry->value, __entry->ret + LOCAL_PR_FMT " value:%d", + LOCAL_PR_ARG, __entry->value ) ); @@ -555,27 +574,25 @@ TRACE_EVENT(drv_sta_notify, TRACE_EVENT(drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, int ret), + struct ieee80211_sta *sta), - TP_ARGS(local, sdata, sta, ret), + TP_ARGS(local, sdata, sta), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG ) ); @@ -606,10 +623,9 @@ TRACE_EVENT(drv_sta_remove, TRACE_EVENT(drv_conf_tx, TP_PROTO(struct ieee80211_local *local, u16 queue, - const struct ieee80211_tx_queue_params *params, - int ret), + const struct ieee80211_tx_queue_params *params), - TP_ARGS(local, queue, params, ret), + TP_ARGS(local, queue, params), TP_STRUCT__entry( LOCAL_ENTRY @@ -618,13 +634,11 @@ TRACE_EVENT(drv_conf_tx, __field(u16, cw_min) __field(u16, cw_max) __field(u8, aifs) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->queue = queue; - __entry->ret = ret; __entry->txop = params->txop; __entry->cw_max = params->cw_max; __entry->cw_min = params->cw_min; @@ -632,29 +646,27 @@ TRACE_EVENT(drv_conf_tx, ), TP_printk( - LOCAL_PR_FMT " queue:%d ret:%d", - LOCAL_PR_ARG, __entry->queue, __entry->ret + LOCAL_PR_FMT " queue:%d", + LOCAL_PR_ARG, __entry->queue ) ); TRACE_EVENT(drv_get_tsf, - TP_PROTO(struct ieee80211_local *local, u64 ret), + TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local, ret), + TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY - __field(u64, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " ret:%llu", - LOCAL_PR_ARG, (unsigned long long)__entry->ret + LOCAL_PR_FMT, + LOCAL_PR_ARG ) ); @@ -698,23 +710,21 @@ TRACE_EVENT(drv_reset_tsf, ); TRACE_EVENT(drv_tx_last_beacon, - TP_PROTO(struct ieee80211_local *local, int ret), + TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local, ret), + TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " ret:%d", - LOCAL_PR_ARG, __entry->ret + LOCAL_PR_FMT, + LOCAL_PR_ARG ) ); @@ -723,9 +733,9 @@ TRACE_EVENT(drv_ampdu_action, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn, int ret), + u16 *ssn), - TP_ARGS(local, sdata, action, sta, tid, ssn, ret), + TP_ARGS(local, sdata, action, sta, tid, ssn), TP_STRUCT__entry( LOCAL_ENTRY @@ -733,7 +743,6 @@ TRACE_EVENT(drv_ampdu_action, __field(u32, action) __field(u16, tid) __field(u16, ssn) - __field(int, ret) VIF_ENTRY ), @@ -741,15 +750,36 @@ TRACE_EVENT(drv_ampdu_action, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->ret = ret; __entry->action = action; __entry->tid = tid; __entry->ssn = ssn ? *ssn : 0; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid + ) +); + +TRACE_EVENT(drv_get_survey, + TP_PROTO(struct ieee80211_local *local, int idx, + struct survey_info *survey), + + TP_ARGS(local, idx, survey), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, idx) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->idx = idx; + ), + + TP_printk( + LOCAL_PR_FMT " idx:%d", + LOCAL_PR_ARG, __entry->idx ) ); @@ -851,25 +881,23 @@ TRACE_EVENT(api_start_tx_ba_cb, ); TRACE_EVENT(api_stop_tx_ba_session, - TP_PROTO(struct ieee80211_sta *sta, u16 tid, u16 initiator), + TP_PROTO(struct ieee80211_sta *sta, u16 tid), - TP_ARGS(sta, tid, initiator), + TP_ARGS(sta, tid), TP_STRUCT__entry( STA_ENTRY __field(u16, tid) - __field(u16, initiator) ), TP_fast_assign( STA_ASSIGN; __entry->tid = tid; - __entry->initiator = initiator; ), TP_printk( - STA_PR_FMT " tid:%d initiator:%d", - STA_PR_ARG, __entry->tid, __entry->initiator + STA_PR_FMT " tid:%d", + STA_PR_ARG, __entry->tid ) ); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 2ab106a0a491..9d101fb33861 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -6,7 +6,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> - * Copyright 2007-2008, Intel Corporation + * Copyright 2007-2010, Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -29,7 +29,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, memset(ht_cap, 0, sizeof(*ht_cap)); - if (!ht_cap_ie) + if (!ht_cap_ie || !sband->ht_cap.ht_supported) return; ht_cap->ht_supported = true; @@ -105,6 +105,8 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) { int i; + cancel_work_sync(&sta->ampdu_mlme.work); + for (i = 0; i < STA_TID_NUM; i++) { __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR); __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, @@ -112,6 +114,43 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) } } +void ieee80211_ba_session_work(struct work_struct *work) +{ + struct sta_info *sta = + container_of(work, struct sta_info, ampdu_mlme.work); + struct tid_ampdu_tx *tid_tx; + int tid; + + /* + * When this flag is set, new sessions should be + * blocked, and existing sessions will be torn + * down by the code that set the flag, so this + * need not run. + */ + if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) + return; + + mutex_lock(&sta->ampdu_mlme.mtx); + for (tid = 0; tid < STA_TID_NUM; tid++) { + if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) + ___ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_TIMEOUT); + + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + if (!tid_tx) + continue; + + if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) + ieee80211_tx_ba_session_handle_start(sta, tid); + else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, + &tid_tx->state)) + ___ieee80211_stop_tx_ba_session(sta, tid, + WLAN_BACK_INITIATOR); + } + mutex_unlock(&sta->ampdu_mlme.mtx); +} + void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code) @@ -176,13 +215,8 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0); - else { /* WLAN_BACK_RECIPIENT */ - spin_lock_bh(&sta->lock); - if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK) - ___ieee80211_stop_tx_ba_session(sta, tid, - WLAN_BACK_RECIPIENT); - spin_unlock_bh(&sta->lock); - } + else + __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT); } int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index b2cc1fda6cfd..c691780725a7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -43,6 +43,8 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, { u16 auth_alg, auth_transaction, status_code; + lockdep_assert_held(&sdata->u.ibss.mtx); + if (len < 24 + 6) return; @@ -78,6 +80,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 bss_change; u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; + lockdep_assert_held(&ifibss->mtx); + /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local); @@ -172,11 +176,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(ifibss->presp, skb); sdata->vif.bss_conf.beacon_int = beacon_int; + sdata->vif.bss_conf.basic_rates = basic_rates; bss_change = BSS_CHANGED_BEACON_INT; bss_change |= ieee80211_reset_erp_info(sdata); bss_change |= BSS_CHANGED_BSSID; bss_change |= BSS_CHANGED_BEACON; bss_change |= BSS_CHANGED_BEACON_ENABLED; + bss_change |= BSS_CHANGED_BASIC_RATES; bss_change |= BSS_CHANGED_IBSS; sdata->vif.bss_conf.ibss_joined = true; ieee80211_bss_info_change_notify(sdata, bss_change); @@ -203,6 +209,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, int i, j; u16 beacon_int = cbss->beacon_interval; + lockdep_assert_held(&sdata->u.ibss.mtx); + if (beacon_int < 10) beacon_int = 10; @@ -447,6 +455,8 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; + lockdep_assert_held(&sdata->u.ibss.mtx); + rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { @@ -471,6 +481,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + lockdep_assert_held(&ifibss->mtx); + mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -503,6 +515,8 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; + lockdep_assert_held(&ifibss->mtx); + if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); } else { @@ -529,7 +543,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) sdata->drop_unencrypted = 0; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, - ifibss->channel, 3, /* first two are basic */ + ifibss->channel, ifibss->basic_rates, capability, 0); } @@ -547,6 +561,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) int active_ibss; u16 capability; + lockdep_assert_held(&ifibss->mtx); + active_ibss = ieee80211_sta_active_ibss(sdata); #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", @@ -635,6 +651,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *resp; u8 *pos, *end; + lockdep_assert_held(&ifibss->mtx); + if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || len < 24 + 2 || !ifibss->presp) return; @@ -727,8 +745,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); } -static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; @@ -738,6 +756,8 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); + mutex_lock(&sdata->u.ibss.mtx); + switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); @@ -755,35 +775,22 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; } - kfree_skb(skb); + mutex_unlock(&sdata->u.ibss.mtx); } -static void ieee80211_ibss_work(struct work_struct *work) +void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.ibss.work); - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_ibss *ifibss; - struct sk_buff *skb; - - if (WARN_ON(local->suspended)) - return; - - if (!ieee80211_sdata_running(sdata)) - return; - - if (local->scanning) - return; - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC)) - return; - ifibss = &sdata->u.ibss; + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - while ((skb = skb_dequeue(&ifibss->skb_queue))) - ieee80211_ibss_rx_queued_mgmt(sdata, skb); + mutex_lock(&ifibss->mtx); - if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request)) - return; + /* + * Work could be scheduled after scan or similar + * when we aren't even joined (or trying) with a + * network. + */ + if (!ifibss->ssid_len) + goto out; switch (ifibss->state) { case IEEE80211_IBSS_MLME_SEARCH: @@ -796,6 +803,9 @@ static void ieee80211_ibss_work(struct work_struct *work) WARN_ON(1); break; } + + out: + mutex_unlock(&ifibss->mtx); } static void ieee80211_ibss_timer(unsigned long data) @@ -810,8 +820,7 @@ static void ieee80211_ibss_timer(unsigned long data) return; } - set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); - ieee80211_queue_work(&local->hw, &ifibss->work); + ieee80211_queue_work(&local->hw, &sdata->work); } #ifdef CONFIG_PM @@ -819,7 +828,6 @@ void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - cancel_work_sync(&ifibss->work); if (del_timer_sync(&ifibss->timer)) ifibss->timer_running = true; } @@ -839,10 +847,9 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - INIT_WORK(&ifibss->work, ieee80211_ibss_work); setup_timer(&ifibss->timer, ieee80211_ibss_timer, (unsigned long) sdata); - skb_queue_head_init(&ifibss->skb_queue); + mutex_init(&ifibss->mtx); } /* scan finished notification */ @@ -856,45 +863,28 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) continue; if (sdata->vif.type != NL80211_IFTYPE_ADHOC) continue; - if (!sdata->u.ibss.ssid_len) - continue; sdata->u.ibss.last_scan_completed = jiffies; - mod_timer(&sdata->u.ibss.timer, 0); + ieee80211_queue_work(&local->hw, &sdata->work); } mutex_unlock(&local->iflist_mtx); } -ieee80211_rx_result -ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_mgmt *mgmt; - u16 fc; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); - - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: - case IEEE80211_STYPE_PROBE_REQ: - case IEEE80211_STYPE_AUTH: - skb_queue_tail(&sdata->u.ibss.skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->u.ibss.work); - return RX_QUEUED; - } - - return RX_DROP_MONITOR; -} - int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params) { struct sk_buff *skb; + skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + + 36 /* bitrates */ + + 34 /* SSID */ + + 3 /* DS params */ + + 4 /* IBSS params */ + + params->ie_len); + if (!skb) + return -ENOMEM; + + mutex_lock(&sdata->u.ibss.mtx); + if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); sdata->u.ibss.fixed_bssid = true; @@ -902,6 +892,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.fixed_bssid = false; sdata->u.ibss.privacy = params->privacy; + sdata->u.ibss.basic_rates = params->basic_rates; sdata->vif.bss_conf.beacon_int = params->beacon_interval; @@ -922,34 +913,18 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.ie_len = params->ie_len; } - skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + - 36 /* bitrates */ + - 34 /* SSID */ + - 3 /* DS params */ + - 4 /* IBSS params */ + - params->ie_len); - if (!skb) - return -ENOMEM; - sdata->u.ibss.skb = skb; sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; sdata->u.ibss.ibss_join_req = jiffies; memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); - - /* - * The ssid_len setting below is used to see whether - * we are active, and we need all other settings - * before that may get visible. - */ - mb(); - sdata->u.ibss.ssid_len = params->ssid_len; ieee80211_recalc_idle(sdata->local); - set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); - ieee80211_queue_work(&sdata->local->hw, &sdata->u.ibss.work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + + mutex_unlock(&sdata->u.ibss.mtx); return 0; } @@ -957,11 +932,33 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) { struct sk_buff *skb; + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + struct cfg80211_bss *cbss; + u16 capability; + int active_ibss; - del_timer_sync(&sdata->u.ibss.timer); - clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); - cancel_work_sync(&sdata->u.ibss.work); - clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); + mutex_lock(&sdata->u.ibss.mtx); + + active_ibss = ieee80211_sta_active_ibss(sdata); + + if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { + capability = WLAN_CAPABILITY_IBSS; + + if (ifibss->privacy) + capability |= WLAN_CAPABILITY_PRIVACY; + + cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel, + ifibss->bssid, ifibss->ssid, + ifibss->ssid_len, WLAN_CAPABILITY_IBSS | + WLAN_CAPABILITY_PRIVACY, + capability); + + if (cbss) { + cfg80211_unlink_bss(local->hw.wiphy, cbss); + cfg80211_put_bss(cbss); + } + } sta_info_flush(sdata->local, sdata); @@ -975,10 +972,14 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) synchronize_rcu(); kfree_skb(skb); - skb_queue_purge(&sdata->u.ibss.skb_queue); + skb_queue_purge(&sdata->skb_queue); memset(sdata->u.ibss.bssid, 0, ETH_ALEN); sdata->u.ibss.ssid_len = 0; + del_timer_sync(&sdata->u.ibss.timer); + + mutex_unlock(&sdata->u.ibss.mtx); + ieee80211_recalc_idle(sdata->local); return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1a9e2da37a93..65e0ed6c2975 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -238,6 +238,7 @@ enum ieee80211_work_type { IEEE80211_WORK_ABORT, IEEE80211_WORK_DIRECT_PROBE, IEEE80211_WORK_AUTH, + IEEE80211_WORK_ASSOC_BEACON_WAIT, IEEE80211_WORK_ASSOC, IEEE80211_WORK_REMAIN_ON_CHANNEL, }; @@ -325,7 +326,6 @@ struct ieee80211_if_managed { struct timer_list conn_mon_timer; struct timer_list bcn_mon_timer; struct timer_list chswitch_timer; - struct work_struct work; struct work_struct monitor_work; struct work_struct chswitch_work; struct work_struct beacon_connection_loss_work; @@ -340,8 +340,6 @@ struct ieee80211_if_managed { u16 aid; - struct sk_buff_head skb_queue; - unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ enum ieee80211_smps_mode req_smps, /* requested smps mode */ @@ -380,19 +378,15 @@ struct ieee80211_if_managed { int last_cqm_event_signal; }; -enum ieee80211_ibss_request { - IEEE80211_IBSS_REQ_RUN = 0, -}; - struct ieee80211_if_ibss { struct timer_list timer; - struct work_struct work; - struct sk_buff_head skb_queue; + struct mutex mtx; - unsigned long request; unsigned long last_scan_completed; + u32 basic_rates; + bool timer_running; bool fixed_bssid; @@ -416,11 +410,9 @@ struct ieee80211_if_ibss { }; struct ieee80211_if_mesh { - struct work_struct work; struct timer_list housekeeping_timer; struct timer_list mesh_path_timer; struct timer_list mesh_path_root_timer; - struct sk_buff_head skb_queue; unsigned long timers_running; @@ -517,6 +509,11 @@ struct ieee80211_sub_if_data { u16 sequence_number; + struct work_struct work; + struct sk_buff_head skb_queue; + + bool arp_filter_state; + /* * AP this belongs to: self in AP mode and * corresponding AP in VLAN mode, NULL for @@ -569,11 +566,15 @@ ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata, #endif } +enum sdata_queue_type { + IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, + IEEE80211_SDATA_QUEUE_AGG_START = 1, + IEEE80211_SDATA_QUEUE_AGG_STOP = 2, +}; + enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, - IEEE80211_DELBA_MSG = 3, - IEEE80211_ADDBA_MSG = 4, }; enum queue_stop_reason { @@ -724,13 +725,7 @@ struct ieee80211_local { struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; - /* - * This lock is used to prevent concurrent A-MPDU - * session start/stop processing, this thus also - * synchronises the ->ampdu_action() callback to - * drivers and limits it to one at a time. - */ - spinlock_t ampdu_lock; + atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES]; /* number of interfaces with corresponding IFF_ flags */ atomic_t iff_allmultis, iff_promiscs; @@ -746,10 +741,10 @@ struct ieee80211_local { struct mutex iflist_mtx; /* - * Key lock, protects sdata's key_list and sta_info's + * Key mutex, protects sdata's key_list and sta_info's * key pointers (write access, they're RCU.) */ - spinlock_t key_lock; + struct mutex key_mtx; /* Scanning and BSS list */ @@ -851,6 +846,15 @@ struct ieee80211_local { struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; struct notifier_block network_latency_notifier; + struct notifier_block ifa_notifier; + + /* + * The dynamic ps timeout configured from user space via WEXT - + * this will override whatever chosen by mac80211 internally. + */ + int dynamic_ps_forced_timeout; + int dynamic_ps_user_timeout; + bool disable_dynamic_ps; int user_power_level; /* in dBm */ int power_constr_level; /* in dBm */ @@ -874,9 +878,8 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) return netdev_priv(dev); } -/* this struct represents 802.11n's RA/TID combination along with our vif */ +/* this struct represents 802.11n's RA/TID combination */ struct ieee80211_ra_tid { - struct ieee80211_vif *vif; u8 ra[ETH_ALEN]; u16 tid; }; @@ -985,29 +988,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_disassoc_request *req, void *cookie); -int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - const u8 *buf, size_t len, u64 *cookie); -ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb); void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); +int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_sw_ie *sw_elem, struct ieee80211_bss *bss, u64 timestamp); void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); +void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); +void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata); -ieee80211_rx_result -ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, u8 *bssid, u8 *addr, u32 supp_rates, gfp_t gfp); @@ -1016,6 +1015,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata); +void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata); +void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); + +/* mesh code */ +void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata); +void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); /* scan/BSS handling */ void ieee80211_scan_work(struct work_struct *work); @@ -1084,7 +1091,7 @@ struct ieee80211_tx_status_rtap_hdr { u8 padding_for_rate; __le16 tx_flags; u8 data_retries; -} __attribute__ ((packed)); +} __packed; /* HT */ @@ -1099,6 +1106,8 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid); +void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta); @@ -1118,6 +1127,10 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator); int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator); +void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); +void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); +void ieee80211_ba_session_work(struct work_struct *work); +void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 50deb017fd6e..ebbe264e2b0b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -249,6 +249,8 @@ static int ieee80211_open(struct net_device *dev) local->fif_other_bss++; ieee80211_configure_filter(local); + + netif_carrier_on(dev); break; default: res = drv_add_interface(local, &sdata->vif); @@ -268,7 +270,6 @@ static int ieee80211_open(struct net_device *dev) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_enable_keys(sdata); if (sdata->vif.type == NL80211_IFTYPE_STATION) netif_carrier_off(dev); @@ -321,15 +322,6 @@ static int ieee80211_open(struct net_device *dev) ieee80211_recalc_ps(local, -1); - /* - * ieee80211_sta_work is disabled while network interface - * is down. Therefore, some configuration changes may not - * yet be effective. Trigger execution of ieee80211_sta_work - * to fix this. - */ - if (sdata->vif.type == NL80211_IFTYPE_STATION) - ieee80211_queue_work(&local->hw, &sdata->u.mgd.work); - netif_tx_start_all_queues(dev); return 0; @@ -349,7 +341,6 @@ static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct sta_info *sta; unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; @@ -366,18 +357,6 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_work_purge(sdata); /* - * Now delete all active aggregation sessions. - */ - rcu_read_lock(); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sta->sdata == sdata) - ieee80211_sta_tear_down_BA_sessions(sta); - } - - rcu_read_unlock(); - - /* * Remove all stations associated with this interface. * * This must be done before calling ops->remove_interface() @@ -483,27 +462,14 @@ static int ieee80211_stop(struct net_device *dev) * whether the interface is running, which, at this point, * it no longer is. */ - cancel_work_sync(&sdata->u.mgd.work); cancel_work_sync(&sdata->u.mgd.chswitch_work); cancel_work_sync(&sdata->u.mgd.monitor_work); cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work); - /* - * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. - */ - synchronize_rcu(); - skb_queue_purge(&sdata->u.mgd.skb_queue); /* fall through */ case NL80211_IFTYPE_ADHOC: - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) del_timer_sync(&sdata->u.ibss.timer); - cancel_work_sync(&sdata->u.ibss.work); - synchronize_rcu(); - skb_queue_purge(&sdata->u.ibss.skb_queue); - } /* fall through */ case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -518,6 +484,16 @@ static int ieee80211_stop(struct net_device *dev) } /* fall through */ default: + flush_work(&sdata->work); + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->skb_queue); + if (local->scan_sdata == sdata) ieee80211_scan_cancel(local); @@ -531,8 +507,8 @@ static int ieee80211_stop(struct net_device *dev) BSS_CHANGED_BEACON_ENABLED); } - /* disable all keys for as long as this netdev is down */ - ieee80211_disable_keys(sdata); + /* free all remaining keys, there shouldn't be any */ + ieee80211_free_keys(sdata); drv_remove_interface(local, &sdata->vif); } @@ -727,6 +703,136 @@ static void ieee80211_if_setup(struct net_device *dev) dev->destructor = free_netdev; } +static void ieee80211_iface_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, work); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct sta_info *sta; + struct ieee80211_ra_tid *ra_tid; + + if (!ieee80211_sdata_running(sdata)) + return; + + if (local->scanning) + return; + + /* + * ieee80211_queue_work() should have picked up most cases, + * here we'll pick the rest. + */ + if (WARN(local->suspended, + "interface work scheduled while going to suspend\n")) + return; + + /* first process frames */ + while ((skb = skb_dequeue(&sdata->skb_queue))) { + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) { + ra_tid = (void *)&skb->cb; + ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra, + ra_tid->tid); + } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) { + ra_tid = (void *)&skb->cb; + ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra, + ra_tid->tid); + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_BACK) { + int len = skb->len; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + if (sta) { + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + ieee80211_process_addba_request( + local, sta, mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + ieee80211_process_addba_resp(local, sta, + mgmt, len); + break; + case WLAN_ACTION_DELBA: + ieee80211_process_delba(sdata, sta, + mgmt, len); + break; + default: + WARN_ON(1); + break; + } + } + mutex_unlock(&local->sta_mtx); + } else if (ieee80211_is_data_qos(mgmt->frame_control)) { + struct ieee80211_hdr *hdr = (void *)mgmt; + /* + * So the frame isn't mgmt, but frame_control + * is at the right place anyway, of course, so + * the if statement is correct. + * + * Warn if we have other data frame types here, + * they must not get here. + */ + WARN_ON(hdr->frame_control & + cpu_to_le16(IEEE80211_STYPE_NULLFUNC)); + WARN_ON(!(hdr->seq_ctrl & + cpu_to_le16(IEEE80211_SCTL_FRAG))); + /* + * This was a fragment of a frame, received while + * a block-ack session was active. That cannot be + * right, so terminate the session. + */ + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + if (sta) { + u16 tid = *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_TID_MASK; + + __ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); + } + mutex_unlock(&local->sta_mtx); + } else switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + ieee80211_sta_rx_queued_mgmt(sdata, skb); + break; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_rx_queued_mgmt(sdata, skb); + break; + case NL80211_IFTYPE_MESH_POINT: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + ieee80211_mesh_rx_queued_mgmt(sdata, skb); + break; + default: + WARN(1, "frame for unexpected interface type"); + break; + } + + kfree_skb(skb); + } + + /* then other type-dependent work */ + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + ieee80211_sta_work(sdata); + break; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_work(sdata); + break; + case NL80211_IFTYPE_MESH_POINT: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + ieee80211_mesh_work(sdata); + break; + default: + break; + } +} + + /* * Helper function to initialise an interface to a specific type. */ @@ -744,6 +850,9 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* only monitor differs */ sdata->dev->type = ARPHRD_ETHER; + skb_queue_head_init(&sdata->skb_queue); + INIT_WORK(&sdata->work, ieee80211_iface_work); + switch (type) { case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps_bc_buf); @@ -969,6 +1078,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; sdata->dev = ndev; +#ifdef CONFIG_INET + sdata->arp_filter_state = true; +#endif for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e8f6e3b252d8..1b9d87ed143a 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -36,80 +36,20 @@ * There is currently no way of knowing this except by looking into * debugfs. * - * All key operations are protected internally so you can call them at - * any time. + * All key operations are protected internally. * * Within mac80211, key references are, just as STA structure references, * protected by RCU. Note, however, that some things are unprotected, * namely the key->sta dereferences within the hardware acceleration - * functions. This means that sta_info_destroy() must flush the key todo - * list. - * - * All the direct key list manipulation functions must not sleep because - * they can operate on STA info structs that are protected by RCU. + * functions. This means that sta_info_destroy() must remove the key + * which waits for an RCU grace period. */ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; -/* key mutex: used to synchronise todo runners */ -static DEFINE_MUTEX(key_mutex); -static DEFINE_SPINLOCK(todo_lock); -static LIST_HEAD(todo_list); - -static void key_todo(struct work_struct *work) -{ - ieee80211_key_todo(); -} - -static DECLARE_WORK(todo_work, key_todo); - -/** - * add_todo - add todo item for a key - * - * @key: key to add to do item for - * @flag: todo flag(s) - * - * Must be called with IRQs or softirqs disabled. - */ -static void add_todo(struct ieee80211_key *key, u32 flag) -{ - if (!key) - return; - - spin_lock(&todo_lock); - key->flags |= flag; - /* - * Remove again if already on the list so that we move it to the end. - */ - if (!list_empty(&key->todo)) - list_del(&key->todo); - list_add_tail(&key->todo, &todo_list); - schedule_work(&todo_work); - spin_unlock(&todo_lock); -} - -/** - * ieee80211_key_lock - lock the mac80211 key operation lock - * - * This locks the (global) mac80211 key operation lock, all - * key operations must be done under this lock. - */ -static void ieee80211_key_lock(void) -{ - mutex_lock(&key_mutex); -} - -/** - * ieee80211_key_unlock - unlock the mac80211 key operation lock - */ -static void ieee80211_key_unlock(void) -{ - mutex_unlock(&key_mutex); -} - -static void assert_key_lock(void) +static void assert_key_lock(struct ieee80211_local *local) { - WARN_ON(!mutex_is_locked(&key_mutex)); + WARN_ON(!mutex_is_locked(&local->key_mtx)); } static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) @@ -126,12 +66,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) struct ieee80211_sta *sta; int ret; - assert_key_lock(); might_sleep(); if (!key->local->ops->set_key) return; + assert_key_lock(key->local); + sta = get_sta_for_key(key); sdata = key->sdata; @@ -142,11 +83,8 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); - if (!ret) { - spin_lock_bh(&todo_lock); + if (!ret) key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock_bh(&todo_lock); - } if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) printk(KERN_ERR "mac80211-%s: failed to set key " @@ -161,18 +99,15 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) struct ieee80211_sta *sta; int ret; - assert_key_lock(); might_sleep(); if (!key || !key->local->ops->set_key) return; - spin_lock_bh(&todo_lock); - if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { - spin_unlock_bh(&todo_lock); + assert_key_lock(key->local); + + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; - } - spin_unlock_bh(&todo_lock); sta = get_sta_for_key(key); sdata = key->sdata; @@ -191,9 +126,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) wiphy_name(key->local->hw.wiphy), key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); - spin_lock_bh(&todo_lock); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock_bh(&todo_lock); } static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, @@ -201,22 +134,24 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, { struct ieee80211_key *key = NULL; + assert_key_lock(sdata->local); + if (idx >= 0 && idx < NUM_DEFAULT_KEYS) key = sdata->keys[idx]; rcu_assign_pointer(sdata->default_key, key); - if (key) - add_todo(key, KEY_FLAG_TODO_DEFKEY); + if (key) { + ieee80211_debugfs_key_remove_default(key->sdata); + ieee80211_debugfs_key_add_default(key->sdata); + } } void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) { - unsigned long flags; - - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); __ieee80211_set_default_key(sdata, idx); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } static void @@ -224,24 +159,26 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) { struct ieee80211_key *key = NULL; + assert_key_lock(sdata->local); + if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = sdata->keys[idx]; rcu_assign_pointer(sdata->default_mgmt_key, key); - if (key) - add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY); + if (key) { + ieee80211_debugfs_key_remove_mgmt_default(key->sdata); + ieee80211_debugfs_key_add_mgmt_default(key->sdata); + } } void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) { - unsigned long flags; - - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); __ieee80211_set_default_mgmt_key(sdata, idx); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } @@ -336,7 +273,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.iv_len = CCMP_HDR_LEN; key->conf.icv_len = CCMP_MIC_LEN; if (seq) { - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) + for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) for (j = 0; j < CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = seq[CCMP_PN_LEN - j - 1]; @@ -352,7 +289,6 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); - INIT_LIST_HEAD(&key->todo); if (alg == ALG_CCMP) { /* @@ -382,12 +318,29 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, return key; } +static void __ieee80211_key_destroy(struct ieee80211_key *key) +{ + if (!key) + return; + + if (key->local) + ieee80211_key_disable_hw_accel(key); + + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + if (key->conf.alg == ALG_AES_CMAC) + ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); + if (key->local) + ieee80211_debugfs_key_remove(key); + + kfree(key); +} + void ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_key *old_key; - unsigned long flags; int idx; BUG_ON(!sdata); @@ -431,7 +384,7 @@ void ieee80211_key_link(struct ieee80211_key *key, } } - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); if (sta) old_key = sta->key; @@ -439,15 +392,13 @@ void ieee80211_key_link(struct ieee80211_key *key, old_key = sdata->keys[idx]; __ieee80211_key_replace(sdata, sta, old_key, key); + __ieee80211_key_destroy(old_key); - /* free old key later */ - add_todo(old_key, KEY_FLAG_TODO_DELETE); + ieee80211_debugfs_key_add(key); - add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS); - if (ieee80211_sdata_running(sdata)) - add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD); + ieee80211_key_enable_hw_accel(key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } static void __ieee80211_key_free(struct ieee80211_key *key) @@ -458,170 +409,62 @@ static void __ieee80211_key_free(struct ieee80211_key *key) if (key->sdata) __ieee80211_key_replace(key->sdata, key->sta, key, NULL); - - add_todo(key, KEY_FLAG_TODO_DELETE); + __ieee80211_key_destroy(key); } -void ieee80211_key_free(struct ieee80211_key *key) +void ieee80211_key_free(struct ieee80211_local *local, + struct ieee80211_key *key) { - unsigned long flags; - if (!key) return; - if (!key->sdata) { - /* The key has not been linked yet, simply free it - * and don't Oops */ - if (key->conf.alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - kfree(key); - return; - } - - spin_lock_irqsave(&key->sdata->local->key_lock, flags); + mutex_lock(&local->key_mtx); __ieee80211_key_free(key); - spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); + mutex_unlock(&local->key_mtx); } -/* - * To be safe against concurrent manipulations of the list (which shouldn't - * actually happen) we need to hold the spinlock. But under the spinlock we - * can't actually do much, so we defer processing to the todo list. Then run - * the todo list to be sure the operation and possibly previously pending - * operations are completed. - */ -static void ieee80211_todo_for_each_key(struct ieee80211_sub_if_data *sdata, - u32 todo_flags) +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; - unsigned long flags; - - might_sleep(); - - spin_lock_irqsave(&sdata->local->key_lock, flags); - list_for_each_entry(key, &sdata->key_list, list) - add_todo(key, todo_flags); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - ieee80211_key_todo(); -} - -void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) -{ ASSERT_RTNL(); if (WARN_ON(!ieee80211_sdata_running(sdata))) return; - ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_ADD); -} + mutex_lock(&sdata->local->key_mtx); -void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) -{ - ASSERT_RTNL(); - - ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_REMOVE); -} - -static void __ieee80211_key_destroy(struct ieee80211_key *key) -{ - if (!key) - return; - - ieee80211_key_disable_hw_accel(key); - - if (key->conf.alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.alg == ALG_AES_CMAC) - ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); - ieee80211_debugfs_key_remove(key); + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_enable_hw_accel(key); - kfree(key); + mutex_unlock(&sdata->local->key_mtx); } -static void __ieee80211_key_todo(void) +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; - bool work_done; - u32 todoflags; - /* - * NB: sta_info_destroy relies on this! - */ - synchronize_rcu(); - - spin_lock_bh(&todo_lock); - while (!list_empty(&todo_list)) { - key = list_first_entry(&todo_list, struct ieee80211_key, todo); - list_del_init(&key->todo); - todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS | - KEY_FLAG_TODO_DEFKEY | - KEY_FLAG_TODO_DEFMGMTKEY | - KEY_FLAG_TODO_HWACCEL_ADD | - KEY_FLAG_TODO_HWACCEL_REMOVE | - KEY_FLAG_TODO_DELETE); - key->flags &= ~todoflags; - spin_unlock_bh(&todo_lock); - - work_done = false; - - if (todoflags & KEY_FLAG_TODO_ADD_DEBUGFS) { - ieee80211_debugfs_key_add(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DEFKEY) { - ieee80211_debugfs_key_remove_default(key->sdata); - ieee80211_debugfs_key_add_default(key->sdata); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) { - ieee80211_debugfs_key_remove_mgmt_default(key->sdata); - ieee80211_debugfs_key_add_mgmt_default(key->sdata); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) { - ieee80211_key_enable_hw_accel(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_HWACCEL_REMOVE) { - ieee80211_key_disable_hw_accel(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DELETE) { - __ieee80211_key_destroy(key); - work_done = true; - } + ASSERT_RTNL(); - WARN_ON(!work_done); + mutex_lock(&sdata->local->key_mtx); - spin_lock_bh(&todo_lock); - } - spin_unlock_bh(&todo_lock); -} + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_disable_hw_accel(key); -void ieee80211_key_todo(void) -{ - ieee80211_key_lock(); - __ieee80211_key_todo(); - ieee80211_key_unlock(); + mutex_unlock(&sdata->local->key_mtx); } void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key, *tmp; - unsigned long flags; - ieee80211_key_lock(); + mutex_lock(&sdata->local->key_mtx); ieee80211_debugfs_key_remove_default(sdata); ieee80211_debugfs_key_remove_mgmt_default(sdata); - spin_lock_irqsave(&sdata->local->key_lock, flags); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) __ieee80211_key_free(key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - - __ieee80211_key_todo(); - ieee80211_key_unlock(); + mutex_unlock(&sdata->local->key_mtx); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index bdc2968c2bbe..b665bbb7a471 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -38,25 +38,9 @@ struct sta_info; * * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present * in the hardware for TX crypto hardware acceleration. - * @KEY_FLAG_TODO_DELETE: Key is marked for deletion and will, after an - * RCU grace period, no longer be reachable other than from the - * todo list. - * @KEY_FLAG_TODO_HWACCEL_ADD: Key needs to be added to hardware acceleration. - * @KEY_FLAG_TODO_HWACCEL_REMOVE: Key needs to be removed from hardware - * acceleration. - * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated. - * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs. - * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs - * to be updated. */ enum ieee80211_internal_key_flags { KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), - KEY_FLAG_TODO_DELETE = BIT(1), - KEY_FLAG_TODO_HWACCEL_ADD = BIT(2), - KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3), - KEY_FLAG_TODO_DEFKEY = BIT(4), - KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), - KEY_FLAG_TODO_DEFMGMTKEY = BIT(6), }; enum ieee80211_internal_tkip_state { @@ -79,10 +63,8 @@ struct ieee80211_key { /* for sdata list */ struct list_head list; - /* for todo list */ - struct list_head todo; - /* protected by todo lock! */ + /* protected by key mutex */ unsigned int flags; union { @@ -95,7 +77,13 @@ struct ieee80211_key { } tkip; struct { u8 tx_pn[6]; - u8 rx_pn[NUM_RX_DATA_QUEUES][6]; + /* + * Last received packet number. The first + * NUM_RX_DATA_QUEUES counters are used with Data + * frames and the last counter is used with Robust + * Management frames. + */ + u8 rx_pn[NUM_RX_DATA_QUEUES + 1][6]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCCMPReplays */ /* scratch buffers for virt_to_page() (crypto API) */ @@ -147,7 +135,8 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, void ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta); -void ieee80211_key_free(struct ieee80211_key *key); +void ieee80211_key_free(struct ieee80211_local *local, + struct ieee80211_key *key); void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); @@ -155,6 +144,4 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); -void ieee80211_key_todo(void); - #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 22a384dfab65..7cc4f913a431 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/bitmap.h> #include <linux/pm_qos_params.h> +#include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> @@ -106,12 +107,15 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) if (scan_chan) { chan = scan_chan; channel_type = NL80211_CHAN_NO_HT; + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else if (local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else { chan = local->oper_channel; channel_type = local->_oper_channel_type; + local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; } if (chan != local->hw.conf.channel || @@ -259,7 +263,6 @@ static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; struct sk_buff *skb; - struct ieee80211_ra_tid *ra_tid; while ((skb = skb_dequeue(&local->skb_queue)) || (skb = skb_dequeue(&local->skb_queue_unreliable))) { @@ -274,18 +277,6 @@ static void ieee80211_tasklet_handler(unsigned long data) skb->pkt_type = 0; ieee80211_tx_status(local_to_hw(local), skb); break; - case IEEE80211_DELBA_MSG: - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_stop_tx_ba_cb(ra_tid->vif, ra_tid->ra, - ra_tid->tid); - dev_kfree_skb(skb); - break; - case IEEE80211_ADDBA_MSG: - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_start_tx_ba_cb(ra_tid->vif, ra_tid->ra, - ra_tid->tid); - dev_kfree_skb(skb); - break ; default: WARN(1, "mac80211: Packet is of unknown type %d\n", skb->pkt_type); @@ -329,6 +320,76 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) mutex_unlock(&local->iflist_mtx); } +#ifdef CONFIG_INET +static int ieee80211_ifa_changed(struct notifier_block *nb, + unsigned long data, void *arg) +{ + struct in_ifaddr *ifa = arg; + struct ieee80211_local *local = + container_of(nb, struct ieee80211_local, + ifa_notifier); + struct net_device *ndev = ifa->ifa_dev->dev; + struct wireless_dev *wdev = ndev->ieee80211_ptr; + struct in_device *idev; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_bss_conf *bss_conf; + struct ieee80211_if_managed *ifmgd; + int c = 0; + + if (!netif_running(ndev)) + return NOTIFY_DONE; + + /* Make sure it's our interface that got changed */ + if (!wdev) + return NOTIFY_DONE; + + if (wdev->wiphy != local->hw.wiphy) + return NOTIFY_DONE; + + sdata = IEEE80211_DEV_TO_SUB_IF(ndev); + bss_conf = &sdata->vif.bss_conf; + + /* ARP filtering is only supported in managed mode */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return NOTIFY_DONE; + + idev = sdata->dev->ip_ptr; + if (!idev) + return NOTIFY_DONE; + + ifmgd = &sdata->u.mgd; + mutex_lock(&ifmgd->mtx); + + /* Copy the addresses to the bss_conf list */ + ifa = idev->ifa_list; + while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) { + bss_conf->arp_addr_list[c] = ifa->ifa_address; + ifa = ifa->ifa_next; + c++; + } + + /* If not all addresses fit the list, disable filtering */ + if (ifa) { + sdata->arp_filter_state = false; + c = 0; + } else { + sdata->arp_filter_state = true; + } + bss_conf->arp_addr_cnt = c; + + /* Configure driver only if associated */ + if (ifmgd->associated) { + bss_conf->arp_filter_enabled = sdata->arp_filter_state; + ieee80211_bss_info_change_notify(sdata, + BSS_CHANGED_ARP_FILTER); + } + + mutex_unlock(&ifmgd->mtx); + + return NOTIFY_DONE; +} +#endif + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -396,7 +457,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, mutex_init(&local->iflist_mtx); mutex_init(&local->scan_mtx); - spin_lock_init(&local->key_lock); + mutex_init(&local->key_mtx); spin_lock_init(&local->filter_lock); spin_lock_init(&local->queue_stop_reason_lock); @@ -419,8 +480,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, sta_info_init(local); - for (i = 0; i < IEEE80211_MAX_QUEUES; i++) + for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { skb_queue_head_init(&local->pending[i]); + atomic_set(&local->agg_queue_stop[i], 0); + } tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, (unsigned long)local); @@ -431,8 +494,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); - spin_lock_init(&local->ampdu_lock); - return local_to_hw(local); } EXPORT_SYMBOL(ieee80211_alloc_hw); @@ -572,18 +633,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.conf.listen_interval = local->hw.max_listen_interval; - local->hw.conf.dynamic_ps_forced_timeout = -1; + local->dynamic_ps_forced_timeout = -1; result = sta_info_start(local); if (result < 0) goto fail_sta_info; result = ieee80211_wep_init(local); - if (result < 0) { + if (result < 0) printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", wiphy_name(local->hw.wiphy), result); - goto fail_wep; - } rtnl_lock(); @@ -612,21 +671,30 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); - if (result) { rtnl_lock(); goto fail_pm_qos; } +#ifdef CONFIG_INET + local->ifa_notifier.notifier_call = ieee80211_ifa_changed; + result = register_inetaddr_notifier(&local->ifa_notifier); + if (result) + goto fail_ifa; +#endif + return 0; + fail_ifa: + pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, + &local->network_latency_notifier); + rtnl_lock(); fail_pm_qos: ieee80211_led_exit(local); ieee80211_remove_interfaces(local); fail_rate: rtnl_unlock(); ieee80211_wep_free(local); - fail_wep: sta_info_stop(local); fail_sta_info: destroy_workqueue(local->workqueue); @@ -647,6 +715,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); +#ifdef CONFIG_INET + unregister_inetaddr_notifier(&local->ifa_notifier); +#endif rtnl_lock(); @@ -704,6 +775,10 @@ static int __init ieee80211_init(void) if (ret) return ret; + ret = rc80211_minstrel_ht_init(); + if (ret) + goto err_minstrel; + ret = rc80211_pid_init(); if (ret) goto err_pid; @@ -716,6 +791,8 @@ static int __init ieee80211_init(void) err_netdev: rc80211_pid_exit(); err_pid: + rc80211_minstrel_ht_exit(); + err_minstrel: rc80211_minstrel_exit(); return ret; @@ -724,6 +801,7 @@ static int __init ieee80211_init(void) static void __exit ieee80211_exit(void) { rc80211_pid_exit(); + rc80211_minstrel_ht_exit(); rc80211_minstrel_exit(); /* diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index bde81031727a..c8a4f19ed13b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -54,7 +54,7 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } /** @@ -345,7 +345,7 @@ static void ieee80211_mesh_path_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } static void ieee80211_mesh_path_root_timer(unsigned long data) @@ -362,7 +362,7 @@ static void ieee80211_mesh_path_root_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) @@ -484,9 +484,6 @@ void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - /* might restart the timer but that doesn't matter */ - cancel_work_sync(&ifmsh->work); - /* use atomic bitops in case both timers fire at the same time */ if (del_timer_sync(&ifmsh->housekeeping_timer)) @@ -518,7 +515,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); ieee80211_mesh_root_setup(ifmsh); - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED | @@ -536,16 +533,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) * whether the interface is running, which, at this point, * it no longer is. */ - cancel_work_sync(&sdata->u.mesh.work); - - /* - * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. - */ - rcu_barrier(); /* Wait for RX path and call_rcu()'s */ - skb_queue_purge(&sdata->u.mesh.skb_queue); + cancel_work_sync(&sdata->work); } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, @@ -608,8 +596,8 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, } } -static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; struct ieee80211_if_mesh *ifmsh; @@ -632,26 +620,11 @@ static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } - - kfree_skb(skb); } -static void ieee80211_mesh_work(struct work_struct *work) +void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.mesh.work); - struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct sk_buff *skb; - - if (!ieee80211_sdata_running(sdata)) - return; - - if (local->scanning) - return; - - while ((skb = skb_dequeue(&ifmsh->skb_queue))) - ieee80211_mesh_rx_queued_mgmt(sdata, skb); if (ifmsh->preq_queue_len && time_after(jiffies, @@ -678,7 +651,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) if (ieee80211_vif_is_mesh(&sdata->vif)) - ieee80211_queue_work(&local->hw, &sdata->u.mesh.work); + ieee80211_queue_work(&local->hw, &sdata->work); rcu_read_unlock(); } @@ -686,11 +659,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - INIT_WORK(&ifmsh->work, ieee80211_mesh_work); setup_timer(&ifmsh->housekeeping_timer, ieee80211_mesh_housekeeping_timer, (unsigned long) sdata); - skb_queue_head_init(&sdata->u.mesh.skb_queue); ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; @@ -731,29 +702,3 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) INIT_LIST_HEAD(&ifmsh->preq_queue.list); spin_lock_init(&ifmsh->mesh_preq_queue_lock); } - -ieee80211_rx_result -ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_mgmt *mgmt; - u16 fc; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); - - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_ACTION: - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: - skb_queue_tail(&ifmsh->skb_queue, skb); - ieee80211_queue_work(&local->hw, &ifmsh->work); - return RX_QUEUED; - } - - return RX_CONTINUE; -} diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index c88087f1cd0f..ebd3f1d9d889 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -237,8 +237,6 @@ void ieee80211s_update_metric(struct ieee80211_local *local, struct sta_info *stainfo, struct sk_buff *skb); void ieee80211s_stop(void); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); -ieee80211_rx_result -ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 0705018d8d1e..829e08a657d0 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -805,14 +805,14 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) spin_unlock(&ifmsh->mesh_preq_queue_lock); if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) - ieee80211_queue_work(&sdata->local->hw, &ifmsh->work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); else if (time_before(jiffies, ifmsh->last_preq)) { /* avoid long wait if did not send preqs for a long time * and jiffies wrapped around */ ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; - ieee80211_queue_work(&sdata->local->hw, &ifmsh->work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } else mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq + min_preq_int_jiff(sdata)); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 181ffd6efd81..349e466cf08b 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -315,7 +315,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) read_unlock(&pathtbl_resize_lock); if (grow) { set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } return 0; @@ -425,7 +425,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) read_unlock(&pathtbl_resize_lock); if (grow) { set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } return 0; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 3cd5f7b5d693..ea13a80a476c 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -65,7 +65,6 @@ void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } static inline @@ -73,7 +72,6 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } /** @@ -115,7 +113,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, } /** - * mesh_plink_deactivate - deactivate mesh peer link + * __mesh_plink_deactivate - deactivate mesh peer link * * @sta: mesh peer link to deactivate * @@ -123,18 +121,23 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, * * Locking: the caller must hold sta->lock */ -static void __mesh_plink_deactivate(struct sta_info *sta) +static bool __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; + bool deactivated = false; - if (sta->plink_state == PLINK_ESTAB) + if (sta->plink_state == PLINK_ESTAB) { mesh_plink_dec_estab_count(sdata); + deactivated = true; + } sta->plink_state = PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); + + return deactivated; } /** - * __mesh_plink_deactivate - deactivate mesh peer link + * mesh_plink_deactivate - deactivate mesh peer link * * @sta: mesh peer link to deactivate * @@ -142,9 +145,15 @@ static void __mesh_plink_deactivate(struct sta_info *sta) */ void mesh_plink_deactivate(struct sta_info *sta) { + struct ieee80211_sub_if_data *sdata = sta->sdata; + bool deactivated; + spin_lock_bh(&sta->lock); - __mesh_plink_deactivate(sta); + deactivated = __mesh_plink_deactivate(sta); spin_unlock_bh(&sta->lock); + + if (deactivated) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, @@ -381,10 +390,16 @@ int mesh_plink_open(struct sta_info *sta) void mesh_plink_block(struct sta_info *sta) { + struct ieee80211_sub_if_data *sdata = sta->sdata; + bool deactivated; + spin_lock_bh(&sta->lock); - __mesh_plink_deactivate(sta); + deactivated = __mesh_plink_deactivate(sta); sta->plink_state = PLINK_BLOCKED; spin_unlock_bh(&sta->lock); + + if (deactivated) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } @@ -397,6 +412,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m enum plink_event event; enum plink_frame_type ftype; size_t baselen; + bool deactivated; u8 ie_len; u8 *baseaddr; __le16 plid, llid, reason; @@ -651,8 +667,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case CNF_ACPT: del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; - mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); + mesh_plink_inc_estab_count(sdata); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mpl_dbg("Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); break; @@ -684,8 +701,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case OPN_ACPT: del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; - mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); + mesh_plink_inc_estab_count(sdata); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mpl_dbg("Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, @@ -702,11 +720,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case CLS_ACPT: reason = cpu_to_le16(MESH_CLOSE_RCVD); sta->reason = reason; - __mesh_plink_deactivate(sta); + deactivated = __mesh_plink_deactivate(sta); sta->plink_state = PLINK_HOLDING; llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); + if (deactivated) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f803f8b72a93..b6c163ac22da 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -478,6 +478,39 @@ static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, } } +void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_conf *conf = &local->hw.conf; + + WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || + !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || + (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); + + local->disable_dynamic_ps = false; + conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout; +} +EXPORT_SYMBOL(ieee80211_enable_dyn_ps); + +void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_conf *conf = &local->hw.conf; + + WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || + !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || + (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); + + local->disable_dynamic_ps = true; + conf->dynamic_ps_timeout = 0; + del_timer_sync(&local->dynamic_ps_timer); + ieee80211_queue_work(&local->hw, + &local->dynamic_ps_enable_work); +} +EXPORT_SYMBOL(ieee80211_disable_dyn_ps); + /* powersave */ static void ieee80211_enable_ps(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) @@ -553,6 +586,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) found->u.mgd.associated->beacon_ies && !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL))) { + struct ieee80211_conf *conf = &local->hw.conf; s32 beaconint_us; if (latency < 0) @@ -561,25 +595,24 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) beaconint_us = ieee80211_tu_to_usec( found->vif.bss_conf.beacon_int); - timeout = local->hw.conf.dynamic_ps_forced_timeout; + timeout = local->dynamic_ps_forced_timeout; if (timeout < 0) { /* + * Go to full PSM if the user configures a very low + * latency requirement. * The 2 second value is there for compatibility until * the PM_QOS_NETWORK_LATENCY is configured with real * values. */ - if (latency == 2000000000) - timeout = 100; - else if (latency <= 50000) - timeout = 300; - else if (latency <= 100000) - timeout = 100; - else if (latency <= 500000) - timeout = 50; - else + if (latency > 1900000000 && latency != 2000000000) timeout = 0; + else + timeout = 100; } - local->hw.conf.dynamic_ps_timeout = timeout; + local->dynamic_ps_user_timeout = timeout; + if (!local->disable_dynamic_ps) + conf->dynamic_ps_timeout = + local->dynamic_ps_user_timeout; if (beaconint_us > latency) { local->ps_sdata = NULL; @@ -665,10 +698,11 @@ void ieee80211_dynamic_ps_timer(unsigned long data) /* MLME */ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, - struct ieee80211_if_managed *ifmgd, + struct ieee80211_sub_if_data *sdata, u8 *wmm_param, size_t wmm_param_len) { struct ieee80211_tx_queue_params params; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t left; int count; u8 *pos, uapsd_queues = 0; @@ -757,8 +791,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, } /* enable WMM or activate new settings */ - local->hw.conf.flags |= IEEE80211_CONF_QOS; - drv_config(local, IEEE80211_CONF_CHANGE_QOS); + sdata->vif.bss_conf.qos = true; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS); } static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, @@ -806,11 +840,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, { struct ieee80211_bss *bss = (void *)cbss->priv; struct ieee80211_local *local = sdata->local; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; bss_info_changed |= BSS_CHANGED_ASSOC; /* set timing information */ - sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; - sdata->vif.bss_conf.timestamp = cbss->tsf; + bss_conf->beacon_int = cbss->beacon_interval; + bss_conf->timestamp = cbss->tsf; bss_info_changed |= BSS_CHANGED_BEACON_INT; bss_info_changed |= ieee80211_handle_bss_capability(sdata, @@ -835,7 +870,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - sdata->vif.bss_conf.assoc = 1; + if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) + bss_conf->dtim_period = bss->dtim_period; + else + bss_conf->dtim_period = 0; + + bss_conf->assoc = 1; /* * For now just always ask the driver to update the basic rateset * when we have associated, we aren't checking whether it actually @@ -848,9 +888,15 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, /* Tell the driver to monitor connection quality (if supported) */ if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) && - sdata->vif.bss_conf.cqm_rssi_thold) + bss_conf->cqm_rssi_thold) bss_info_changed |= BSS_CHANGED_CQM; + /* Enable ARP filtering */ + if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) { + bss_conf->arp_filter_enabled = sdata->arp_filter_state; + bss_info_changed |= BSS_CHANGED_ARP_FILTER; + } + ieee80211_bss_info_change_notify(sdata, bss_info_changed); mutex_lock(&local->iflist_mtx); @@ -898,13 +944,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, bssid); if (sta) { - set_sta_flags(sta, WLAN_STA_DISASSOC); + set_sta_flags(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta); } - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); changed |= ieee80211_reset_erp_info(sdata); @@ -932,6 +978,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_hw_config(local, config_changed); + /* Disable ARP filtering */ + if (sdata->vif.bss_conf.arp_filter_enabled) { + sdata->vif.bss_conf.arp_filter_enabled = false; + changed |= BSS_CHANGED_ARP_FILTER; + } + /* The BSSID (not really interesting) and HT changed */ changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT; ieee80211_bss_info_change_notify(sdata, changed); @@ -1279,7 +1331,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, } if (elems.wmm_param) - ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, + ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, elems.wmm_param_len); else ieee80211_set_wmm_default(sdata); @@ -1551,7 +1603,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); - ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, + ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, elems.wmm_param_len); } @@ -1633,35 +1685,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, changed); } -ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_mgmt *mgmt; - u16 fc; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); - - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: - case IEEE80211_STYPE_DEAUTH: - case IEEE80211_STYPE_DISASSOC: - case IEEE80211_STYPE_ACTION: - skb_queue_tail(&sdata->u.mgd.skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->u.mgd.work); - return RX_QUEUED; - } - - return RX_DROP_MONITOR; -} - -static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_rx_status *rx_status; @@ -1693,44 +1718,6 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; case IEEE80211_STYPE_ACTION: switch (mgmt->u.action.category) { - case WLAN_CATEGORY_BACK: { - struct ieee80211_local *local = sdata->local; - int len = skb->len; - struct sta_info *sta; - - rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->sa); - if (!sta) { - rcu_read_unlock(); - break; - } - - local_bh_disable(); - - switch (mgmt->u.action.u.addba_req.action_code) { - case WLAN_ACTION_ADDBA_REQ: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_req))) - break; - ieee80211_process_addba_request(local, sta, mgmt, len); - break; - case WLAN_ACTION_ADDBA_RESP: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_resp))) - break; - ieee80211_process_addba_resp(local, sta, mgmt, len); - break; - case WLAN_ACTION_DELBA: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.delba))) - break; - ieee80211_process_delba(sdata, sta, mgmt, len); - break; - } - local_bh_enable(); - rcu_read_unlock(); - break; - } case WLAN_CATEGORY_SPECTRUM_MGMT: ieee80211_sta_process_chanswitch(sdata, &mgmt->u.action.u.chan_switch.sw_elem, @@ -1754,7 +1741,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, default: WARN(1, "unexpected: %d", rma); } - goto out; + return; } mutex_unlock(&ifmgd->mtx); @@ -1769,7 +1756,8 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, if (wk->sdata != sdata) continue; - if (wk->type != IEEE80211_WORK_ASSOC) + if (wk->type != IEEE80211_WORK_ASSOC && + wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) continue; if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN)) @@ -1799,8 +1787,6 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); } - out: - kfree_skb(skb); } static void ieee80211_sta_timer(unsigned long data) @@ -1815,39 +1801,13 @@ static void ieee80211_sta_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmgd->work); + ieee80211_queue_work(&local->hw, &sdata->work); } -static void ieee80211_sta_work(struct work_struct *work) +void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.mgd.work); struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd; - struct sk_buff *skb; - - if (!ieee80211_sdata_running(sdata)) - return; - - if (local->scanning) - return; - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) - return; - - /* - * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the the rest. - */ - if (WARN(local->suspended, "STA MLME work scheduled while " - "going to suspend\n")) - return; - - ifmgd = &sdata->u.mgd; - - /* first process frames to avoid timing out while a frame is pending */ - while ((skb = skb_dequeue(&ifmgd->skb_queue))) - ieee80211_sta_rx_queued_mgmt(sdata, skb); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; /* then process the rest of the work */ mutex_lock(&ifmgd->mtx); @@ -1942,8 +1902,7 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); /* and do all the other regular work too */ - ieee80211_queue_work(&sdata->local->hw, - &sdata->u.mgd.work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } } @@ -1958,7 +1917,6 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) * time -- the code here is properly synchronised. */ - cancel_work_sync(&ifmgd->work); cancel_work_sync(&ifmgd->beacon_connection_loss_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -1990,7 +1948,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd; ifmgd = &sdata->u.mgd; - INIT_WORK(&ifmgd->work, ieee80211_sta_work); INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work); INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); INIT_WORK(&ifmgd->beacon_connection_loss_work, @@ -2003,7 +1960,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) (unsigned long) sdata); setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, (unsigned long) sdata); - skb_queue_head_init(&ifmgd->skb_queue); ifmgd->flags = 0; @@ -2081,6 +2037,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, auth_alg = WLAN_AUTH_OPEN; break; case NL80211_AUTHTYPE_SHARED_KEY: + if (IS_ERR(sdata->local->wep_tx_tfm)) + return -EOPNOTSUPP; auth_alg = WLAN_AUTH_SHARED_KEY; break; case NL80211_AUTHTYPE_FT: @@ -2134,6 +2092,8 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt; + struct ieee80211_rx_status *rx_status; + struct ieee802_11_elems elems; u16 status; if (!skb) { @@ -2141,6 +2101,19 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, return WORK_DONE_DESTROY; } + if (wk->type == IEEE80211_WORK_ASSOC_BEACON_WAIT) { + mutex_lock(&wk->sdata->u.mgd.mtx); + rx_status = (void *) skb->cb; + ieee802_11_parse_elems(skb->data + 24 + 12, skb->len - 24 - 12, &elems); + ieee80211_rx_bss_info(wk->sdata, (void *)skb->data, skb->len, rx_status, + &elems, true); + mutex_unlock(&wk->sdata->u.mgd.mtx); + + wk->type = IEEE80211_WORK_ASSOC; + /* not really done yet */ + return WORK_DONE_REQUEUE; + } + mgmt = (void *)skb->data; status = le16_to_cpu(mgmt->u.assoc_resp.status_code); @@ -2153,6 +2126,7 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, wk->filter_ta); return WORK_DONE_DESTROY; } + mutex_unlock(&wk->sdata->u.mgd.mtx); } @@ -2253,10 +2227,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (req->prev_bssid) memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN); - wk->type = IEEE80211_WORK_ASSOC; wk->chan = req->bss->channel; wk->sdata = sdata; wk->done = ieee80211_assoc_done; + if (!bss->dtim_period && + sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) + wk->type = IEEE80211_WORK_ASSOC_BEACON_WAIT; + else + wk->type = IEEE80211_WORK_ASSOC; if (req->use_mfp) { ifmgd->mfp = IEEE80211_MFP_REQUIRED; @@ -2282,14 +2260,16 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_work *wk; - const u8 *bssid = req->bss->bssid; + u8 bssid[ETH_ALEN]; + bool assoc_bss = false; mutex_lock(&ifmgd->mtx); + memcpy(bssid, req->bss->bssid, ETH_ALEN); if (ifmgd->associated == req->bss) { - bssid = req->bss->bssid; - ieee80211_set_disassoc(sdata, true); + ieee80211_set_disassoc(sdata, false); mutex_unlock(&ifmgd->mtx); + assoc_bss = true; } else { bool not_auth_yet = false; @@ -2302,7 +2282,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, if (wk->type != IEEE80211_WORK_DIRECT_PROBE && wk->type != IEEE80211_WORK_AUTH && - wk->type != IEEE80211_WORK_ASSOC) + wk->type != IEEE80211_WORK_ASSOC && + wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) continue; if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN)) @@ -2335,6 +2316,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, cookie, !req->local_state_change); + if (assoc_bss) + sta_info_destroy_addr(sdata, bssid); ieee80211_recalc_idle(sdata->local); @@ -2379,41 +2362,6 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - const u8 *buf, size_t len, u64 *cookie) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct sk_buff *skb; - - /* Check that we are on the requested channel for transmission */ - if ((chan != local->tmp_channel || - channel_type != local->tmp_channel_type) && - (chan != local->oper_channel || - channel_type != local->_oper_channel_type)) - return -EBUSY; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); - if (!skb) - return -ENOMEM; - skb_reserve(skb, local->hw.extra_tx_headroom); - - memcpy(skb_put(skb, len), buf, len); - - if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) - IEEE80211_SKB_CB(skb)->flags |= - IEEE80211_TX_INTFL_DONT_ENCRYPT; - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX | - IEEE80211_TX_CTL_REQ_TX_STATUS; - skb->dev = sdata->dev; - ieee80211_tx_skb(sdata, skb); - - *cookie = (unsigned long) skb; - return 0; -} - void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 75202b295a4e..d287fde0431d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -40,22 +40,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) list_for_each_entry(sdata, &local->interfaces, list) ieee80211_disable_keys(sdata); - /* Tear down aggregation sessions */ - - rcu_read_lock(); - - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { - list_for_each_entry_rcu(sta, &local->sta_list, list) { + /* tear down aggregation sessions and remove STAs */ + mutex_lock(&local->sta_mtx); + list_for_each_entry(sta, &local->sta_list, list) { + if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { set_sta_flags(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta); } - } - rcu_read_unlock(); - - /* remove STAs */ - mutex_lock(&local->sta_mtx); - list_for_each_entry(sta, &local->sta_list, list) { if (sta->uploaded) { sdata = sta->sdata; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -72,6 +64,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { + cancel_work_sync(&sdata->work); + switch(sdata->vif.type) { case NL80211_IFTYPE_STATION: ieee80211_sta_quiesce(sdata); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 065a96190e32..168427b0ffdc 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -147,5 +147,18 @@ static inline void rc80211_minstrel_exit(void) } #endif +#ifdef CONFIG_MAC80211_RC_MINSTREL_HT +extern int rc80211_minstrel_ht_init(void); +extern void rc80211_minstrel_ht_exit(void); +#else +static inline int rc80211_minstrel_ht_init(void) +{ + return 0; +} +static inline void rc80211_minstrel_ht_exit(void) +{ +} +#endif + #endif /* IEEE80211_RATE_H */ diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index f65ce6dcc8e2..778c604d7939 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -67,7 +67,6 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) for (i = rix; i >= 0; i--) if (mi->r[i].rix == rix) break; - WARN_ON(i < 0); return i; } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c new file mode 100644 index 000000000000..c5b465904e3b --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -0,0 +1,827 @@ +/* + * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/random.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rate.h" +#include "rc80211_minstrel.h" +#include "rc80211_minstrel_ht.h" + +#define AVG_PKT_SIZE 1200 +#define SAMPLE_COLUMNS 10 +#define EWMA_LEVEL 75 + +/* Number of bits for an average sized packet */ +#define MCS_NBITS (AVG_PKT_SIZE << 3) + +/* Number of symbols for a packet with (bps) bits per symbol */ +#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) + +/* Transmission time for a packet containing (syms) symbols */ +#define MCS_SYMBOL_TIME(sgi, syms) \ + (sgi ? \ + ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \ + (syms) << 2 /* syms * 4 us */ \ + ) + +/* Transmit duration for the raw data part of an average sized packet */ +#define MCS_DURATION(streams, sgi, bps) MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) + +/* MCS rate information for an MCS group */ +#define MCS_GROUP(_streams, _sgi, _ht40) { \ + .streams = _streams, \ + .flags = \ + (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ + (_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ + .duration = { \ + MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) \ + } \ +} + +/* + * To enable sufficiently targeted rate sampling, MCS rates are divided into + * groups, based on the number of streams and flags (HT40, SGI) that they + * use. + */ +const struct mcs_group minstrel_mcs_groups[] = { + MCS_GROUP(1, 0, 0), + MCS_GROUP(2, 0, 0), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 0, 0), +#endif + + MCS_GROUP(1, 1, 0), + MCS_GROUP(2, 1, 0), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 1, 0), +#endif + + MCS_GROUP(1, 0, 1), + MCS_GROUP(2, 0, 1), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 0, 1), +#endif + + MCS_GROUP(1, 1, 1), + MCS_GROUP(2, 1, 1), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 1, 1), +#endif +}; + +static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; + +/* + * Perform EWMA (Exponentially Weighted Moving Average) calculation + */ +static int +minstrel_ewma(int old, int new, int weight) +{ + return (new * (100 - weight) + old * weight) / 100; +} + +/* + * Look up an MCS group index based on mac80211 rate information + */ +static int +minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) +{ + int streams = (rate->idx / MCS_GROUP_RATES) + 1; + u32 flags = IEEE80211_TX_RC_SHORT_GI | IEEE80211_TX_RC_40_MHZ_WIDTH; + int i; + + for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { + if (minstrel_mcs_groups[i].streams != streams) + continue; + if (minstrel_mcs_groups[i].flags != (rate->flags & flags)) + continue; + + return i; + } + + WARN_ON(1); + return 0; +} + +static inline struct minstrel_rate_stats * +minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) +{ + return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES]; +} + + +/* + * Recalculate success probabilities and counters for a rate using EWMA + */ +static void +minstrel_calc_rate_ewma(struct minstrel_priv *mp, struct minstrel_rate_stats *mr) +{ + if (unlikely(mr->attempts > 0)) { + mr->sample_skipped = 0; + mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts); + if (!mr->att_hist) + mr->probability = mr->cur_prob; + else + mr->probability = minstrel_ewma(mr->probability, + mr->cur_prob, EWMA_LEVEL); + mr->att_hist += mr->attempts; + mr->succ_hist += mr->success; + } else { + mr->sample_skipped++; + } + mr->last_success = mr->success; + mr->last_attempts = mr->attempts; + mr->success = 0; + mr->attempts = 0; +} + +/* + * Calculate throughput based on the average A-MPDU length, taking into account + * the expected number of retransmissions and their expected length + */ +static void +minstrel_ht_calc_tp(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + int group, int rate) +{ + struct minstrel_rate_stats *mr; + unsigned int usecs; + + mr = &mi->groups[group].rates[rate]; + + if (mr->probability < MINSTREL_FRAC(1, 10)) { + mr->cur_tp = 0; + return; + } + + usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + usecs += minstrel_mcs_groups[group].duration[rate]; + mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); +} + +/* + * Update rate statistics and select new primary rates + * + * Rules for rate selection: + * - max_prob_rate must use only one stream, as a tradeoff between delivery + * probability and throughput during strong fluctuations + * - as long as the max prob rate has a probability of more than 3/4, pick + * higher throughput rates, even if the probablity is a bit lower + */ +static void +minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +{ + struct minstrel_mcs_group_data *mg; + struct minstrel_rate_stats *mr; + int cur_prob, cur_prob_tp, cur_tp, cur_tp2; + int group, i, index; + + if (mi->ampdu_packets > 0) { + mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, + MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL); + mi->ampdu_len = 0; + mi->ampdu_packets = 0; + } + + mi->sample_slow = 0; + mi->sample_count = 0; + mi->max_tp_rate = 0; + mi->max_tp_rate2 = 0; + mi->max_prob_rate = 0; + + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + cur_prob = 0; + cur_prob_tp = 0; + cur_tp = 0; + cur_tp2 = 0; + + mg = &mi->groups[group]; + if (!mg->supported) + continue; + + mg->max_tp_rate = 0; + mg->max_tp_rate2 = 0; + mg->max_prob_rate = 0; + mi->sample_count++; + + for (i = 0; i < MCS_GROUP_RATES; i++) { + if (!(mg->supported & BIT(i))) + continue; + + mr = &mg->rates[i]; + mr->retry_updated = false; + index = MCS_GROUP_RATES * group + i; + minstrel_calc_rate_ewma(mp, mr); + minstrel_ht_calc_tp(mp, mi, group, i); + + if (!mr->cur_tp) + continue; + + /* ignore the lowest rate of each single-stream group */ + if (!i && minstrel_mcs_groups[group].streams == 1) + continue; + + if ((mr->cur_tp > cur_prob_tp && mr->probability > + MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) { + mg->max_prob_rate = index; + cur_prob = mr->probability; + cur_prob_tp = mr->cur_tp; + } + + if (mr->cur_tp > cur_tp) { + swap(index, mg->max_tp_rate); + cur_tp = mr->cur_tp; + mr = minstrel_get_ratestats(mi, index); + } + + if (index >= mg->max_tp_rate) + continue; + + if (mr->cur_tp > cur_tp2) { + mg->max_tp_rate2 = index; + cur_tp2 = mr->cur_tp; + } + } + } + + /* try to sample up to half of the availble rates during each interval */ + mi->sample_count *= 4; + + cur_prob = 0; + cur_prob_tp = 0; + cur_tp = 0; + cur_tp2 = 0; + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + mg = &mi->groups[group]; + if (!mg->supported) + continue; + + mr = minstrel_get_ratestats(mi, mg->max_prob_rate); + if (cur_prob_tp < mr->cur_tp && + minstrel_mcs_groups[group].streams == 1) { + mi->max_prob_rate = mg->max_prob_rate; + cur_prob = mr->cur_prob; + cur_prob_tp = mr->cur_tp; + } + + mr = minstrel_get_ratestats(mi, mg->max_tp_rate); + if (cur_tp < mr->cur_tp) { + mi->max_tp_rate = mg->max_tp_rate; + cur_tp = mr->cur_tp; + } + + mr = minstrel_get_ratestats(mi, mg->max_tp_rate2); + if (cur_tp2 < mr->cur_tp) { + mi->max_tp_rate2 = mg->max_tp_rate2; + cur_tp2 = mr->cur_tp; + } + } + + mi->stats_update = jiffies; +} + +static bool +minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) +{ + if (!rate->count) + return false; + + if (rate->idx < 0) + return false; + + return !!(rate->flags & IEEE80211_TX_RC_MCS); +} + +static void +minstrel_next_sample_idx(struct minstrel_ht_sta *mi) +{ + struct minstrel_mcs_group_data *mg; + + for (;;) { + mi->sample_group++; + mi->sample_group %= ARRAY_SIZE(minstrel_mcs_groups); + mg = &mi->groups[mi->sample_group]; + + if (!mg->supported) + continue; + + if (++mg->index >= MCS_GROUP_RATES) { + mg->index = 0; + if (++mg->column >= ARRAY_SIZE(sample_table)) + mg->column = 0; + } + break; + } +} + +static void +minstrel_downgrade_rate(struct minstrel_ht_sta *mi, unsigned int *idx, + bool primary) +{ + int group, orig_group; + + orig_group = group = *idx / MCS_GROUP_RATES; + while (group > 0) { + group--; + + if (!mi->groups[group].supported) + continue; + + if (minstrel_mcs_groups[group].streams > + minstrel_mcs_groups[orig_group].streams) + continue; + + if (primary) + *idx = mi->groups[group].max_tp_rate; + else + *idx = mi->groups[group].max_tp_rate2; + break; + } +} + +static void +minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + u16 tid; + + if (unlikely(!ieee80211_is_data_qos(hdr->frame_control))) + return; + + if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE))) + return; + + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_tx[tid])) + return; + + ieee80211_start_tx_ba_session(pubsta, tid); +} + +static void +minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_rate *ar = info->status.rates; + struct minstrel_rate_stats *rate, *rate2; + struct minstrel_priv *mp = priv; + bool last = false; + int group; + int i = 0; + + if (!msp->is_ht) + return mac80211_minstrel.tx_status(priv, sband, sta, &msp->legacy, skb); + + /* This packet was aggregated but doesn't carry status info */ + if ((info->flags & IEEE80211_TX_CTL_AMPDU) && + !(info->flags & IEEE80211_TX_STAT_AMPDU)) + return; + + if (!info->status.ampdu_len) { + info->status.ampdu_ack_len = 1; + info->status.ampdu_len = 1; + } + + mi->ampdu_packets++; + mi->ampdu_len += info->status.ampdu_len; + + if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { + mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); + mi->sample_tries = 3; + mi->sample_count--; + } + + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { + mi->sample_packets += info->status.ampdu_len; + minstrel_next_sample_idx(mi); + } + + for (i = 0; !last; i++) { + last = (i == IEEE80211_TX_MAX_RATES - 1) || + !minstrel_ht_txstat_valid(&ar[i + 1]); + + if (!minstrel_ht_txstat_valid(&ar[i])) + break; + + group = minstrel_ht_get_group_idx(&ar[i]); + rate = &mi->groups[group].rates[ar[i].idx % 8]; + + if (last && (info->flags & IEEE80211_TX_STAT_ACK)) + rate->success += info->status.ampdu_ack_len; + + rate->attempts += ar[i].count * info->status.ampdu_len; + } + + /* + * check for sudden death of spatial multiplexing, + * downgrade to a lower number of streams if necessary. + */ + rate = minstrel_get_ratestats(mi, mi->max_tp_rate); + if (rate->attempts > 30 && + MINSTREL_FRAC(rate->success, rate->attempts) < + MINSTREL_FRAC(20, 100)) + minstrel_downgrade_rate(mi, &mi->max_tp_rate, true); + + rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2); + if (rate2->attempts > 30 && + MINSTREL_FRAC(rate2->success, rate2->attempts) < + MINSTREL_FRAC(20, 100)) + minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false); + + if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { + minstrel_ht_update_stats(mp, mi); + minstrel_aggr_check(mp, sta, skb); + } +} + +static void +minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + int index) +{ + struct minstrel_rate_stats *mr; + const struct mcs_group *group; + unsigned int tx_time, tx_time_rtscts, tx_time_data; + unsigned int cw = mp->cw_min; + unsigned int t_slot = 9; /* FIXME */ + unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); + + mr = minstrel_get_ratestats(mi, index); + if (mr->probability < MINSTREL_FRAC(1, 10)) { + mr->retry_count = 1; + mr->retry_count_rtscts = 1; + return; + } + + mr->retry_count = 2; + mr->retry_count_rtscts = 2; + mr->retry_updated = true; + + group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; + tx_time = 2 * (t_slot + mi->overhead + tx_time_data); + tx_time_rtscts = 2 * (t_slot + mi->overhead_rtscts + tx_time_data); + do { + cw = (cw << 1) | 1; + cw = min(cw, mp->cw_max); + tx_time += cw + t_slot + mi->overhead; + tx_time_rtscts += cw + t_slot + mi->overhead_rtscts; + if (tx_time_rtscts < mp->segment_size) + mr->retry_count_rtscts++; + } while ((tx_time < mp->segment_size) && + (++mr->retry_count < mp->max_retry)); +} + + +static void +minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_tx_rate *rate, int index, + struct ieee80211_tx_rate_control *txrc, + bool sample, bool rtscts) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + struct minstrel_rate_stats *mr; + + mr = minstrel_get_ratestats(mi, index); + if (!mr->retry_updated) + minstrel_calc_retransmit(mp, mi, index); + + if (mr->probability < MINSTREL_FRAC(20, 100)) + rate->count = 2; + else if (rtscts) + rate->count = mr->retry_count_rtscts; + else + rate->count = mr->retry_count; + + rate->flags = IEEE80211_TX_RC_MCS | group->flags; + if (txrc->short_preamble) + rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + if (txrc->rts || rtscts) + rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; + rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; +} + +static inline int +minstrel_get_duration(int index) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + return group->duration[index % MCS_GROUP_RATES]; +} + +static int +minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +{ + struct minstrel_rate_stats *mr; + struct minstrel_mcs_group_data *mg; + int sample_idx = 0; + + if (mi->sample_wait > 0) { + mi->sample_wait--; + return -1; + } + + if (!mi->sample_tries) + return -1; + + mi->sample_tries--; + mg = &mi->groups[mi->sample_group]; + sample_idx = sample_table[mg->column][mg->index]; + mr = &mg->rates[sample_idx]; + sample_idx += mi->sample_group * MCS_GROUP_RATES; + + /* + * When not using MRR, do not sample if the probability is already + * higher than 95% to avoid wasting airtime + */ + if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) + goto next; + + /* + * Make sure that lower rates get sampled only occasionally, + * if the link is working perfectly. + */ + if (minstrel_get_duration(sample_idx) > + minstrel_get_duration(mi->max_tp_rate)) { + if (mr->sample_skipped < 10) + goto next; + + if (mi->sample_slow++ > 2) + goto next; + } + + return sample_idx; + +next: + minstrel_next_sample_idx(mi); + return -1; +} + +static void +minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_tx_rate_control *txrc) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); + struct ieee80211_tx_rate *ar = info->status.rates; + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_priv *mp = priv; + int sample_idx; + + if (rate_control_send_low(sta, priv_sta, txrc)) + return; + + if (!msp->is_ht) + return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); + + info->flags |= mi->tx_flags; + sample_idx = minstrel_get_sample_rate(mp, mi); + if (sample_idx >= 0) { + minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, + txrc, true, false); + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, + txrc, false, true); + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } else { + minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, + txrc, false, false); + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, + txrc, false, true); + } + minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true); + + ar[3].count = 0; + ar[3].idx = -1; + + mi->total_packets++; + + /* wraparound */ + if (mi->total_packets == ~0) { + mi->total_packets = 0; + mi->sample_packets = 0; + } +} + +static void +minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + enum nl80211_channel_type oper_chan_type) +{ + struct minstrel_priv *mp = priv; + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; + struct ieee80211_local *local = hw_to_local(mp->hw); + u16 sta_cap = sta->ht_cap.cap; + int ack_dur; + int stbc; + int i; + + /* fall back to the old minstrel for legacy stations */ + if (!sta->ht_cap.ht_supported) { + msp->is_ht = false; + memset(&msp->legacy, 0, sizeof(msp->legacy)); + msp->legacy.r = msp->ratelist; + msp->legacy.sample_table = msp->sample_table; + return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy); + } + + BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != + MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); + + msp->is_ht = true; + memset(mi, 0, sizeof(*mi)); + mi->stats_update = jiffies; + + ack_dur = ieee80211_frame_duration(local, 10, 60, 1, 1); + mi->overhead = ieee80211_frame_duration(local, 0, 60, 1, 1) + ack_dur; + mi->overhead_rtscts = mi->overhead + 2 * ack_dur; + + mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); + + /* When using MRR, sample more on the first attempt, without delay */ + if (mp->has_mrr) { + mi->sample_count = 16; + mi->sample_wait = 0; + } else { + mi->sample_count = 8; + mi->sample_wait = 8; + } + mi->sample_tries = 4; + + stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >> + IEEE80211_HT_CAP_RX_STBC_SHIFT; + mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT; + + if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING) + mi->tx_flags |= IEEE80211_TX_CTL_LDPC; + + if (oper_chan_type != NL80211_CHAN_HT40MINUS && + oper_chan_type != NL80211_CHAN_HT40PLUS) + sta_cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { + u16 req = 0; + + mi->groups[i].supported = 0; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) { + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + req |= IEEE80211_HT_CAP_SGI_40; + else + req |= IEEE80211_HT_CAP_SGI_20; + } + + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + if ((sta_cap & req) != req) + continue; + + mi->groups[i].supported = + mcs->rx_mask[minstrel_mcs_groups[i].streams - 1]; + } +} + +static void +minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_priv *mp = priv; + + minstrel_ht_update_caps(priv, sband, sta, priv_sta, mp->hw->conf.channel_type); +} + +static void +minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + u32 changed, enum nl80211_channel_type oper_chan_type) +{ + minstrel_ht_update_caps(priv, sband, sta, priv_sta, oper_chan_type); +} + +static void * +minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) +{ + struct ieee80211_supported_band *sband; + struct minstrel_ht_sta_priv *msp; + struct minstrel_priv *mp = priv; + struct ieee80211_hw *hw = mp->hw; + int max_rates = 0; + int i; + + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = hw->wiphy->bands[i]; + if (sband && sband->n_bitrates > max_rates) + max_rates = sband->n_bitrates; + } + + msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp); + if (!msp) + return NULL; + + msp->ratelist = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp); + if (!msp->ratelist) + goto error; + + msp->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp); + if (!msp->sample_table) + goto error1; + + return msp; + +error1: + kfree(msp->ratelist); +error: + kfree(msp); + return NULL; +} + +static void +minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + kfree(msp->sample_table); + kfree(msp->ratelist); + kfree(msp); +} + +static void * +minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +{ + return mac80211_minstrel.alloc(hw, debugfsdir); +} + +static void +minstrel_ht_free(void *priv) +{ + mac80211_minstrel.free(priv); +} + +static struct rate_control_ops mac80211_minstrel_ht = { + .name = "minstrel_ht", + .tx_status = minstrel_ht_tx_status, + .get_rate = minstrel_ht_get_rate, + .rate_init = minstrel_ht_rate_init, + .rate_update = minstrel_ht_rate_update, + .alloc_sta = minstrel_ht_alloc_sta, + .free_sta = minstrel_ht_free_sta, + .alloc = minstrel_ht_alloc, + .free = minstrel_ht_free, +#ifdef CONFIG_MAC80211_DEBUGFS + .add_sta_debugfs = minstrel_ht_add_sta_debugfs, + .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs, +#endif +}; + + +static void +init_sample_table(void) +{ + int col, i, new_idx; + u8 rnd[MCS_GROUP_RATES]; + + memset(sample_table, 0xff, sizeof(sample_table)); + for (col = 0; col < SAMPLE_COLUMNS; col++) { + for (i = 0; i < MCS_GROUP_RATES; i++) { + get_random_bytes(rnd, sizeof(rnd)); + new_idx = (i + rnd[i]) % MCS_GROUP_RATES; + + while (sample_table[col][new_idx] != 0xff) + new_idx = (new_idx + 1) % MCS_GROUP_RATES; + + sample_table[col][new_idx] = i; + } + } +} + +int __init +rc80211_minstrel_ht_init(void) +{ + init_sample_table(); + return ieee80211_rate_control_register(&mac80211_minstrel_ht); +} + +void +rc80211_minstrel_ht_exit(void) +{ + ieee80211_rate_control_unregister(&mac80211_minstrel_ht); +} diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h new file mode 100644 index 000000000000..462d2b227ed5 --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __RC_MINSTREL_HT_H +#define __RC_MINSTREL_HT_H + +/* + * The number of streams can be changed to 2 to reduce code + * size and memory footprint. + */ +#define MINSTREL_MAX_STREAMS 3 +#define MINSTREL_STREAM_GROUPS 4 + +/* scaled fraction values */ +#define MINSTREL_SCALE 16 +#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) +#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) + +#define MCS_GROUP_RATES 8 + +struct mcs_group { + u32 flags; + unsigned int streams; + unsigned int duration[MCS_GROUP_RATES]; +}; + +extern const struct mcs_group minstrel_mcs_groups[]; + +struct minstrel_rate_stats { + /* current / last sampling period attempts/success counters */ + unsigned int attempts, last_attempts; + unsigned int success, last_success; + + /* total attempts/success counters */ + u64 att_hist, succ_hist; + + /* current throughput */ + unsigned int cur_tp; + + /* packet delivery probabilities */ + unsigned int cur_prob, probability; + + /* maximum retry counts */ + unsigned int retry_count; + unsigned int retry_count_rtscts; + + bool retry_updated; + u8 sample_skipped; +}; + +struct minstrel_mcs_group_data { + u8 index; + u8 column; + + /* bitfield of supported MCS rates of this group */ + u8 supported; + + /* selected primary rates */ + unsigned int max_tp_rate; + unsigned int max_tp_rate2; + unsigned int max_prob_rate; + + /* MCS rate statistics */ + struct minstrel_rate_stats rates[MCS_GROUP_RATES]; +}; + +struct minstrel_ht_sta { + /* ampdu length (average, per sampling interval) */ + unsigned int ampdu_len; + unsigned int ampdu_packets; + + /* ampdu length (EWMA) */ + unsigned int avg_ampdu_len; + + /* best throughput rate */ + unsigned int max_tp_rate; + + /* second best throughput rate */ + unsigned int max_tp_rate2; + + /* best probability rate */ + unsigned int max_prob_rate; + + /* time of last status update */ + unsigned long stats_update; + + /* overhead time in usec for each frame */ + unsigned int overhead; + unsigned int overhead_rtscts; + + unsigned int total_packets; + unsigned int sample_packets; + + /* tx flags to add for frames for this sta */ + u32 tx_flags; + + u8 sample_wait; + u8 sample_tries; + u8 sample_count; + u8 sample_slow; + + /* current MCS group to be sampled */ + u8 sample_group; + + /* MCS rate group info and statistics */ + struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS]; +}; + +struct minstrel_ht_sta_priv { + union { + struct minstrel_ht_sta ht; + struct minstrel_sta_info legacy; + }; +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *dbg_stats; +#endif + void *ratelist; + void *sample_table; + bool is_ht; +}; + +void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); +void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta); + +#endif diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c new file mode 100644 index 000000000000..4a5a4b3e7799 --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rc80211_minstrel.h" +#include "rc80211_minstrel_ht.h" + +static int +minstrel_ht_stats_open(struct inode *inode, struct file *file) +{ + struct minstrel_ht_sta_priv *msp = inode->i_private; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_debugfs_info *ms; + unsigned int i, j, tp, prob, eprob; + char *p; + int ret; + + if (!msp->is_ht) { + inode->i_private = &msp->legacy; + ret = minstrel_stats_open(inode, file); + inode->i_private = msp; + return ret; + } + + ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL); + if (!ms) + return -ENOMEM; + + file->private_data = ms; + p = ms->buf; + p += sprintf(p, "type rate throughput ewma prob this prob " + "this succ/attempt success attempts\n"); + for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) { + char htmode = '2'; + char gimode = 'L'; + + if (!mi->groups[i].supported) + continue; + + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + htmode = '4'; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) + gimode = 'S'; + + for (j = 0; j < MCS_GROUP_RATES; j++) { + struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; + int idx = i * MCS_GROUP_RATES + j; + + if (!(mi->groups[i].supported & BIT(j))) + continue; + + p += sprintf(p, "HT%c0/%cGI ", htmode, gimode); + + *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' '; + *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; + p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) * + MCS_GROUP_RATES + j); + + tp = mr->cur_tp / 10; + prob = MINSTREL_TRUNC(mr->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mr->probability * 1000); + + p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " + "%3u(%3u) %8llu %8llu\n", + tp / 10, tp % 10, + eprob / 10, eprob % 10, + prob / 10, prob % 10, + mr->last_success, + mr->last_attempts, + (unsigned long long)mr->succ_hist, + (unsigned long long)mr->att_hist); + } + } + p += sprintf(p, "\nTotal packet count:: ideal %d " + "lookaround %d\n", + max(0, (int) mi->total_packets - (int) mi->sample_packets), + mi->sample_packets); + p += sprintf(p, "Average A-MPDU length: %d.%d\n", + MINSTREL_TRUNC(mi->avg_ampdu_len), + MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); + ms->len = p - ms->buf; + + return 0; +} + +static const struct file_operations minstrel_ht_stat_fops = { + .owner = THIS_MODULE, + .open = minstrel_ht_stats_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, +}; + +void +minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp, + &minstrel_ht_stat_fops); +} + +void +minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + debugfs_remove(msp->dbg_stats); +} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index be9abc2e6348..fa0f37e4afe4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -293,7 +293,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = prev_dev; - netif_rx(skb2); + netif_receive_skb(skb2); } } @@ -304,7 +304,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (prev_dev) { skb->dev = prev_dev; - netif_rx(skb); + netif_receive_skb(skb); } else dev_kfree_skb(skb); @@ -719,16 +719,13 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; - spin_lock(&sta->lock); - - if (!sta->ampdu_mlme.tid_active_rx[tid]) - goto dont_reorder_unlock; - - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); + if (!tid_agg_rx) + goto dont_reorder; /* qos null data frames are excluded */ if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) - goto dont_reorder_unlock; + goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ @@ -740,20 +737,22 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - spin_unlock(&sta->lock); - __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_REQUIRE_SETUP); - dev_kfree_skb(skb); + skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&rx->sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &rx->sdata->work); return; } - if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) { - spin_unlock(&sta->lock); + /* + * No locking needed -- we will only ever process one + * RX packet at a time, and thus own tid_agg_rx. All + * other code manipulating it needs to (and does) make + * sure that we cannot get to it any more before doing + * anything with it. + */ + if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) return; - } - dont_reorder_unlock: - spin_unlock(&sta->lock); dont_reorder: __skb_queue_tail(frames, skb); } @@ -825,6 +824,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) ieee80211_rx_result result = RX_DROP_UNUSABLE; struct ieee80211_key *stakey = NULL; int mmie_keyidx = -1; + __le16 fc; /* * Key selection 101 @@ -866,13 +866,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (rx->sta) stakey = rcu_dereference(rx->sta->key); - if (!ieee80211_has_protected(hdr->frame_control)) + fc = hdr->frame_control; + + if (!ieee80211_has_protected(fc)) mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); if (!is_multicast_ether_addr(hdr->addr1) && stakey) { rx->key = stakey; /* Skip decryption if the frame is not protected. */ - if (!ieee80211_has_protected(hdr->frame_control)) + if (!ieee80211_has_protected(fc)) return RX_CONTINUE; } else if (mmie_keyidx >= 0) { /* Broadcast/multicast robust management frame / BIP */ @@ -884,7 +886,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) return RX_DROP_MONITOR; /* unexpected BIP keyidx */ rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); - } else if (!ieee80211_has_protected(hdr->frame_control)) { + } else if (!ieee80211_has_protected(fc)) { /* * The frame was not protected, so skip decryption. However, we * need to set rx->key if there is a key that could have been @@ -892,7 +894,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * have been expected. */ struct ieee80211_key *key = NULL; - if (ieee80211_is_mgmt(hdr->frame_control) && + if (ieee80211_is_mgmt(fc) && is_multicast_ether_addr(hdr->addr1) && (key = rcu_dereference(rx->sdata->default_mgmt_key))) rx->key = key; @@ -914,7 +916,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - hdrlen = ieee80211_hdrlen(hdr->frame_control); + hdrlen = ieee80211_hdrlen(fc); if (rx->skb->len < 8 + hdrlen) return RX_DROP_UNUSABLE; /* TODO: count this? */ @@ -947,19 +949,17 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; - - hdr = (struct ieee80211_hdr *)rx->skb->data; - - /* Check for weak IVs if possible */ - if (rx->sta && rx->key->conf.alg == ALG_WEP && - ieee80211_is_data(hdr->frame_control) && - (!(status->flag & RX_FLAG_IV_STRIPPED) || - !(status->flag & RX_FLAG_DECRYPTED)) && - ieee80211_wep_is_weak_iv(rx->skb, rx->key)) - rx->sta->wep_weak_iv_count++; + /* the hdr variable is invalid now! */ switch (rx->key->conf.alg) { case ALG_WEP: + /* Check for weak IVs if possible */ + if (rx->sta && ieee80211_is_data(fc) && + (!(status->flag & RX_FLAG_IV_STRIPPED) || + !(status->flag & RX_FLAG_DECRYPTED)) && + ieee80211_wep_is_weak_iv(rx->skb, rx->key)) + rx->sta->wep_weak_iv_count++; + result = ieee80211_crypto_wep_decrypt(rx); break; case ALG_TKIP: @@ -1267,11 +1267,13 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rx->queue, &(rx->skb)); if (rx->key && rx->key->conf.alg == ALG_CCMP && ieee80211_has_protected(fc)) { + int queue = ieee80211_is_mgmt(fc) ? + NUM_RX_DATA_QUEUES : rx->queue; /* Store CCMP PN so that we can verify that the next * fragment has a sequential PN value. */ entry->ccmp = 1; memcpy(entry->last_pn, - rx->key->u.ccmp.rx_pn[rx->queue], + rx->key->u.ccmp.rx_pn[queue], CCMP_PN_LEN); } return RX_QUEUED; @@ -1291,6 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (entry->ccmp) { int i; u8 pn[CCMP_PN_LEN], *rpn; + int queue; if (!rx->key || rx->key->conf.alg != ALG_CCMP) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, CCMP_PN_LEN); @@ -1299,7 +1302,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (pn[i]) break; } - rpn = rx->key->u.ccmp.rx_pn[rx->queue]; + queue = ieee80211_is_mgmt(fc) ? + NUM_RX_DATA_QUEUES : rx->queue; + rpn = rx->key->u.ccmp.rx_pn[queue]; if (memcmp(pn, rpn, CCMP_PN_LEN)) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, CCMP_PN_LEN); @@ -1573,7 +1578,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); + netif_receive_skb(skb); } } @@ -1829,13 +1834,11 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) &bar_data, sizeof(bar_data))) return RX_DROP_MONITOR; - spin_lock(&rx->sta->lock); tid = le16_to_cpu(bar_data.control) >> 12; - if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) { - spin_unlock(&rx->sta->lock); + + tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); + if (!tid_agg_rx) return RX_DROP_MONITOR; - } - tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; @@ -1848,11 +1851,15 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num, frames); kfree_skb(skb); - spin_unlock(&rx->sta->lock); return RX_QUEUED; } - return RX_CONTINUE; + /* + * After this point, we only want management frames, + * so we can drop all remaining control frames to + * cooked monitor interfaces. + */ + return RX_DROP_MONITOR; } static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, @@ -1944,30 +1951,27 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (len < IEEE80211_MIN_ACTION_SIZE + 1) break; - if (sdata->vif.type == NL80211_IFTYPE_STATION) - return ieee80211_sta_rx_mgmt(sdata, rx->skb); - switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.addba_req))) - return RX_DROP_MONITOR; - ieee80211_process_addba_request(local, rx->sta, mgmt, len); - goto handled; + goto invalid; + break; case WLAN_ACTION_ADDBA_RESP: if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.addba_resp))) - break; - ieee80211_process_addba_resp(local, rx->sta, mgmt, len); - goto handled; + goto invalid; + break; case WLAN_ACTION_DELBA: if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.delba))) - break; - ieee80211_process_delba(sdata, rx->sta, mgmt, len); - goto handled; + goto invalid; + break; + default: + goto invalid; } - break; + + goto queue; case WLAN_CATEGORY_SPECTRUM_MGMT: if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) break; @@ -1997,7 +2001,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN)) break; - return ieee80211_sta_rx_mgmt(sdata, rx->skb); + goto queue; } break; case WLAN_CATEGORY_SA_QUERY: @@ -2015,11 +2019,12 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; case WLAN_CATEGORY_MESH_PLINK: case WLAN_CATEGORY_MESH_PATH_SEL: - if (ieee80211_vif_is_mesh(&sdata->vif)) - return ieee80211_mesh_rx_mgmt(sdata, rx->skb); - break; + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + goto queue; } + invalid: /* * For AP mode, hostapd is responsible for handling any action * frames that we didn't handle, including returning unknown @@ -2039,8 +2044,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) */ status = IEEE80211_SKB_RXCB(rx->skb); - if (sdata->vif.type == NL80211_IFTYPE_STATION && - cfg80211_rx_action(rx->sdata->dev, status->freq, + if (cfg80211_rx_action(rx->sdata->dev, status->freq, rx->skb->data, rx->skb->len, GFP_ATOMIC)) goto handled; @@ -2052,11 +2056,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0, GFP_ATOMIC); if (nskb) { - struct ieee80211_mgmt *mgmt = (void *)nskb->data; + struct ieee80211_mgmt *nmgmt = (void *)nskb->data; - mgmt->u.action.category |= 0x80; - memcpy(mgmt->da, mgmt->sa, ETH_ALEN); - memcpy(mgmt->sa, rx->sdata->vif.addr, ETH_ALEN); + nmgmt->u.action.category |= 0x80; + memcpy(nmgmt->da, nmgmt->sa, ETH_ALEN); + memcpy(nmgmt->sa, rx->sdata->vif.addr, ETH_ALEN); memset(nskb->cb, 0, sizeof(nskb->cb)); @@ -2068,6 +2072,14 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) rx->sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; + + queue: + rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, rx->skb); + ieee80211_queue_work(&local->hw, &sdata->work); + if (rx->sta) + rx->sta->rx_packets++; + return RX_QUEUED; } static ieee80211_rx_result debug_noinline @@ -2075,10 +2087,15 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; ieee80211_rx_result rxs; + struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; + __le16 stype; if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; + if (rx->skb->len < 24) + return RX_DROP_MONITOR; + if (ieee80211_drop_unencrypted_mgmt(rx)) return RX_DROP_UNUSABLE; @@ -2086,16 +2103,42 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) if (rxs != RX_CONTINUE) return rxs; - if (ieee80211_vif_is_mesh(&sdata->vif)) - return ieee80211_mesh_rx_mgmt(sdata, rx->skb); + stype = mgmt->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE); - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - return ieee80211_ibss_rx_mgmt(sdata, rx->skb); + if (!ieee80211_vif_is_mesh(&sdata->vif) && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_STATION) + return RX_DROP_MONITOR; + + switch (stype) { + case cpu_to_le16(IEEE80211_STYPE_BEACON): + case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP): + /* process for all: mesh, mlme, ibss */ + break; + case cpu_to_le16(IEEE80211_STYPE_DEAUTH): + case cpu_to_le16(IEEE80211_STYPE_DISASSOC): + /* process only for station */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return RX_DROP_MONITOR; + break; + case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ): + case cpu_to_le16(IEEE80211_STYPE_AUTH): + /* process only for ibss */ + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_DROP_MONITOR; + break; + default: + return RX_DROP_MONITOR; + } - if (sdata->vif.type == NL80211_IFTYPE_STATION) - return ieee80211_sta_rx_mgmt(sdata, rx->skb); + /* queue up frame and kick off work to process it */ + rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, rx->skb); + ieee80211_queue_work(&rx->local->hw, &sdata->work); + if (rx->sta) + rx->sta->rx_packets++; - return RX_DROP_MONITOR; + return RX_QUEUED; } static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr, @@ -2151,7 +2194,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, u8 rate_or_pad; __le16 chan_freq; __le16 chan_flags; - } __attribute__ ((packed)) *rthdr; + } __packed *rthdr; struct sk_buff *skb = rx->skb, *skb2; struct net_device *prev_dev = NULL; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -2201,7 +2244,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = prev_dev; - netif_rx(skb2); + netif_receive_skb(skb2); } } @@ -2212,7 +2255,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, if (prev_dev) { skb->dev = prev_dev; - netif_rx(skb); + netif_receive_skb(skb); skb = NULL; } else goto out_free_skb; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index e1b0be7a57b9..41f20fb7e670 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -114,6 +114,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->dtim_period = tim_ie->dtim_period; } + /* If the beacon had no TIM IE, or it was invalid, use 1 */ + if (beacon && !bss->dtim_period) + bss->dtim_period = 1; + /* replace old supported rates if we get new values */ srlen = 0; if (elems->supp_rates) { @@ -734,7 +738,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; int ret = -EBUSY; - enum nl80211_band band; + enum ieee80211_band band; mutex_lock(&local->scan_mtx); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ba9360a475b0..6d86f0c1ad04 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -235,6 +235,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); spin_lock_init(&sta->flaglock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); + INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); + mutex_init(&sta->ampdu_mlme.mtx); memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; @@ -246,14 +248,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } for (i = 0; i < STA_TID_NUM; i++) { - /* timer_to_tid must be initialized with identity mapping to - * enable session_timer's data differentiation. refer to - * sta_rx_agg_session_timer_expired for useage */ + /* + * timer_to_tid must be initialized with identity mapping + * to enable session_timer's data differentiation. See + * sta_rx_agg_session_timer_expired for usage. + */ sta->timer_to_tid[i] = i; - /* tx */ - sta->ampdu_mlme.tid_state_tx[i] = HT_AGG_STATE_IDLE; - sta->ampdu_mlme.tid_tx[i] = NULL; - sta->ampdu_mlme.addba_req_num[i] = 0; } skb_queue_head_init(&sta->ps_tx_buf); skb_queue_head_init(&sta->tx_filtered); @@ -647,15 +647,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) return ret; if (sta->key) { - ieee80211_key_free(sta->key); - /* - * We have only unlinked the key, and actually destroying it - * may mean it is removed from hardware which requires that - * the key->sta pointer is still valid, so flush the key todo - * list here. - */ - ieee80211_key_todo(); - + ieee80211_key_free(local, sta->key); WARN_ON(sta->key); } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index df9d45544ca5..54262e72376d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -42,9 +42,6 @@ * be in the queues * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping * station in power-save mode, reply when the driver unblocks. - * @WLAN_STA_DISASSOC: Disassociation in progress. - * This is used to reject TX BA session requests when disassociation - * is in progress. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH = 1<<0, @@ -60,38 +57,44 @@ enum ieee80211_sta_info_flags { WLAN_STA_BLOCK_BA = 1<<11, WLAN_STA_PS_DRIVER = 1<<12, WLAN_STA_PSPOLL = 1<<13, - WLAN_STA_DISASSOC = 1<<14, }; #define STA_TID_NUM 16 #define ADDBA_RESP_INTERVAL HZ -#define HT_AGG_MAX_RETRIES (0x3) +#define HT_AGG_MAX_RETRIES 0x3 -#define HT_AGG_STATE_INITIATOR_SHIFT (4) - -#define HT_ADDBA_REQUESTED_MSK BIT(0) -#define HT_ADDBA_DRV_READY_MSK BIT(1) -#define HT_ADDBA_RECEIVED_MSK BIT(2) -#define HT_AGG_STATE_REQ_STOP_BA_MSK BIT(3) -#define HT_AGG_STATE_INITIATOR_MSK BIT(HT_AGG_STATE_INITIATOR_SHIFT) -#define HT_AGG_STATE_IDLE (0x0) -#define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \ - HT_ADDBA_DRV_READY_MSK | \ - HT_ADDBA_RECEIVED_MSK) +#define HT_AGG_STATE_DRV_READY 0 +#define HT_AGG_STATE_RESPONSE_RECEIVED 1 +#define HT_AGG_STATE_OPERATIONAL 2 +#define HT_AGG_STATE_STOPPING 3 +#define HT_AGG_STATE_WANT_START 4 +#define HT_AGG_STATE_WANT_STOP 5 /** * struct tid_ampdu_tx - TID aggregation information (Tx). * + * @rcu_head: rcu head for freeing structure * @addba_resp_timer: timer for peer's response to addba request * @pending: pending frames queue -- use sta's spinlock to protect - * @ssn: Starting Sequence Number expected to be aggregated. * @dialog_token: dialog token for aggregation session + * @state: session state (see above) + * @stop_initiator: initiator of a session stop + * + * This structure is protected by RCU and the per-station + * spinlock. Assignments to the array holding it must hold + * the spinlock, only the TX path can access it under RCU + * lock-free if, and only if, the state has the flag + * %HT_AGG_STATE_OPERATIONAL set. Otherwise, the TX path + * must also acquire the spinlock and re-check the state, + * see comments in the tx code touching it. */ struct tid_ampdu_tx { + struct rcu_head rcu_head; struct timer_list addba_resp_timer; struct sk_buff_head pending; - u16 ssn; + unsigned long state; u8 dialog_token; + u8 stop_initiator; }; /** @@ -106,8 +109,18 @@ struct tid_ampdu_tx { * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). * @dialog_token: dialog token for aggregation session + * @rcu_head: RCU head used for freeing this struct + * + * This structure is protected by RCU and the per-station + * spinlock. Assignments to the array holding it must hold + * the spinlock, only the RX path can access it under RCU + * lock-free. The RX path, since it is single-threaded, + * can even modify the structure without locking since the + * only other modifications to it are done when the struct + * can not yet or no longer be found by the RX path. */ struct tid_ampdu_rx { + struct rcu_head rcu_head; struct sk_buff **reorder_buf; unsigned long *reorder_time; struct timer_list session_timer; @@ -120,6 +133,32 @@ struct tid_ampdu_rx { }; /** + * struct sta_ampdu_mlme - STA aggregation information. + * + * @tid_rx: aggregation info for Rx per TID -- RCU protected + * @tid_tx: aggregation info for Tx per TID + * @addba_req_num: number of times addBA request has been sent. + * @dialog_token_allocator: dialog token enumerator for each new session; + * @work: work struct for starting/stopping aggregation + * @tid_rx_timer_expired: bitmap indicating on which TIDs the + * RX timer expired until the work for it runs + * @mtx: mutex to protect all TX data (except non-NULL assignments + * to tid_tx[idx], which are protected by the sta spinlock) + */ +struct sta_ampdu_mlme { + struct mutex mtx; + /* rx */ + struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; + unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)]; + /* tx */ + struct work_struct work; + struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; + u8 addba_req_num[STA_TID_NUM]; + u8 dialog_token_allocator; +}; + + +/** * enum plink_state - state of a mesh peer link finite state machine * * @PLINK_LISTEN: initial state, considered the implicit state of non existant @@ -143,28 +182,6 @@ enum plink_state { }; /** - * struct sta_ampdu_mlme - STA aggregation information. - * - * @tid_active_rx: TID's state in Rx session state machine. - * @tid_rx: aggregation info for Rx per TID - * @tid_state_tx: TID's state in Tx session state machine. - * @tid_tx: aggregation info for Tx per TID - * @addba_req_num: number of times addBA request has been sent. - * @dialog_token_allocator: dialog token enumerator for each new session; - */ -struct sta_ampdu_mlme { - /* rx */ - bool tid_active_rx[STA_TID_NUM]; - struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; - /* tx */ - u8 tid_state_tx[STA_TID_NUM]; - struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; - u8 addba_req_num[STA_TID_NUM]; - u8 dialog_token_allocator; -}; - - -/** * struct sta_info - STA information * * This structure collects information about a station that @@ -410,20 +427,20 @@ void for_each_sta_info_type_check(struct ieee80211_local *local, { } -#define for_each_sta_info(local, _addr, sta, nxt) \ +#define for_each_sta_info(local, _addr, _sta, nxt) \ for ( /* initialise loop */ \ - sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ - nxt = sta ? rcu_dereference(sta->hnext) : NULL; \ + _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \ /* typecheck */ \ - for_each_sta_info_type_check(local, (_addr), sta, nxt), \ + for_each_sta_info_type_check(local, (_addr), _sta, nxt),\ /* continue condition */ \ - sta; \ + _sta; \ /* advance loop */ \ - sta = nxt, \ - nxt = sta ? rcu_dereference(sta->hnext) : NULL \ + _sta = nxt, \ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ ) \ /* compare address and run code only if it matches */ \ - if (memcmp(sta->sta.addr, (_addr), ETH_ALEN) == 0) + if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0) /* * Get STA info by index, BROKEN! diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 94613af009f3..10caec5ea8fa 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -47,7 +47,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, /* * This skb 'survived' a round-trip through the driver, and * hopefully the driver didn't mangle it too badly. However, - * we can definitely not rely on the the control information + * we can definitely not rely on the control information * being correct. Clear it so we don't get junk there, and * indicate that it needs new processing, but must not be * modified/encrypted again. @@ -377,7 +377,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = prev_dev; - netif_rx(skb2); + netif_receive_skb(skb2); } } @@ -386,7 +386,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (prev_dev) { skb->dev = prev_dev; - netif_rx(skb); + netif_receive_skb(skb); skb = NULL; } rcu_read_unlock(); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 7ef491e9d66d..e840c9cd46db 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -202,9 +202,9 @@ EXPORT_SYMBOL(ieee80211_get_tkip_key); * @payload_len is the length of payload (_not_ including IV/ICV length). * @ta is the transmitter addresses. */ -void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, - struct ieee80211_key *key, - u8 *pos, size_t payload_len, u8 *ta) +int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, + struct ieee80211_key *key, + u8 *pos, size_t payload_len, u8 *ta) { u8 rc4key[16]; struct tkip_ctx *ctx = &key->u.tkip.tx; @@ -216,7 +216,7 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key); - ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); + return ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); } /* Decrypt packet payload with TKIP using @key. @pos is a pointer to the diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h index d4714383f5fc..7e83dee976fa 100644 --- a/net/mac80211/tkip.h +++ b/net/mac80211/tkip.h @@ -15,7 +15,7 @@ u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16); -void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, +int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *pos, size_t payload_len, u8 *ta); enum { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 680bcb7093db..c54db966926b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -576,17 +576,6 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) } static ieee80211_tx_result debug_noinline -ieee80211_tx_h_sta(struct ieee80211_tx_data *tx) -{ - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - - if (tx->sta && tx->sta->uploaded) - info->control.sta = &tx->sta->sta; - - return TX_CONTINUE; -} - -static ieee80211_tx_result debug_noinline ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); @@ -1092,6 +1081,59 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, return true; } +static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, + struct sk_buff *skb, + struct ieee80211_tx_info *info, + struct tid_ampdu_tx *tid_tx, + int tid) +{ + bool queued = false; + + if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { + info->flags |= IEEE80211_TX_CTL_AMPDU; + } else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { + /* + * nothing -- this aggregation session is being started + * but that might still fail with the driver + */ + } else { + spin_lock(&tx->sta->lock); + /* + * Need to re-check now, because we may get here + * + * 1) in the window during which the setup is actually + * already done, but not marked yet because not all + * packets are spliced over to the driver pending + * queue yet -- if this happened we acquire the lock + * either before or after the splice happens, but + * need to recheck which of these cases happened. + * + * 2) during session teardown, if the OPERATIONAL bit + * was cleared due to the teardown but the pointer + * hasn't been assigned NULL yet (or we loaded it + * before it was assigned) -- in this case it may + * now be NULL which means we should just let the + * packet pass through because splicing the frames + * back is already done. + */ + tid_tx = tx->sta->ampdu_mlme.tid_tx[tid]; + + if (!tid_tx) { + /* do nothing, let packet pass through */ + } else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { + info->flags |= IEEE80211_TX_CTL_AMPDU; + } else { + queued = true; + info->control.vif = &tx->sdata->vif; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + __skb_queue_tail(&tid_tx->pending, skb); + } + spin_unlock(&tx->sta->lock); + } + + return queued; +} + /* * initialises @tx */ @@ -1104,8 +1146,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int hdrlen, tid; - u8 *qc, *state; - bool queued = false; + u8 *qc; memset(tx, 0, sizeof(*tx)); tx->skb = skb; @@ -1157,35 +1198,16 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - spin_lock(&tx->sta->lock); - /* - * XXX: This spinlock could be fairly expensive, but see the - * comment in agg-tx.c:ieee80211_agg_tx_operational(). - * One way to solve this would be to do something RCU-like - * for managing the tid_tx struct and using atomic bitops - * for the actual state -- by introducing an actual - * 'operational' bit that would be possible. It would - * require changing ieee80211_agg_tx_operational() to - * set that bit, and changing the way tid_tx is managed - * everywhere, including races between that bit and - * tid_tx going away (tid_tx being added can be easily - * committed to memory before the 'operational' bit). - */ - tid_tx = tx->sta->ampdu_mlme.tid_tx[tid]; - state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; - if (*state == HT_AGG_STATE_OPERATIONAL) { - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else if (*state != HT_AGG_STATE_IDLE) { - /* in progress */ - queued = true; - info->control.vif = &sdata->vif; - info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - __skb_queue_tail(&tid_tx->pending, skb); - } - spin_unlock(&tx->sta->lock); + tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + if (tid_tx) { + bool queued; - if (unlikely(queued)) - return TX_QUEUED; + queued = ieee80211_tx_prep_agg(tx, skb, info, + tid_tx, tid); + + if (unlikely(queued)) + return TX_QUEUED; + } } if (is_multicast_ether_addr(hdr->addr1)) { @@ -1274,6 +1296,11 @@ static int __ieee80211_tx(struct ieee80211_local *local, break; } + if (sta && sta->uploaded) + info->control.sta = &sta->sta; + else + info->control.sta = NULL; + ret = drv_tx(local, skb); if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { dev_kfree_skb(skb); @@ -1313,7 +1340,6 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) CALL_TXH(ieee80211_tx_h_check_assoc); CALL_TXH(ieee80211_tx_h_ps_buf); CALL_TXH(ieee80211_tx_h_select_key); - CALL_TXH(ieee80211_tx_h_sta); if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); @@ -1909,11 +1935,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, h_pos += encaps_len; } +#ifdef CONFIG_MAC80211_MESH if (meshhdrlen > 0) { memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen); nh_pos += meshhdrlen; h_pos += meshhdrlen; } +#endif if (ieee80211_is_data_qos(fc)) { __le16 *qos_control; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5b79d552780a..748387d45bc0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -803,8 +803,12 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) /* after reinitialize QoS TX queues setting to default, * disable QoS at all */ - local->hw.conf.flags &= ~IEEE80211_CONF_QOS; - drv_config(local, IEEE80211_CONF_CHANGE_QOS); + + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { + sdata->vif.bss_conf.qos = + sdata->vif.type != NL80211_IFTYPE_STATION; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS); + } } void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, @@ -1138,18 +1142,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) } mutex_unlock(&local->sta_mtx); - /* Clear Suspend state so that ADDBA requests can be processed */ - - rcu_read_lock(); - - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { - list_for_each_entry_rcu(sta, &local->sta_list, list) { - clear_sta_flags(sta, WLAN_STA_BLOCK_BA); - } - } - - rcu_read_unlock(); - /* setup RTS threshold */ drv_set_rts_threshold(local, hw->wiphy->rts_threshold); @@ -1173,7 +1165,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT | BSS_CHANGED_BSSID | - BSS_CHANGED_CQM; + BSS_CHANGED_CQM | + BSS_CHANGED_QOS; switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: @@ -1202,13 +1195,26 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } - rcu_read_lock(); + /* + * Clear the WLAN_STA_BLOCK_BA flag so new aggregation + * sessions can be established after a resume. + * + * Also tear down aggregation sessions since reconfiguring + * them in a hardware restart scenario is not easily done + * right now, and the hardware will have lost information + * about the sessions, but we and the AP still think they + * are active. This is really a workaround though. + */ if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { - list_for_each_entry_rcu(sta, &local->sta_list, list) { + mutex_lock(&local->sta_mtx); + + list_for_each_entry(sta, &local->sta_list, list) { ieee80211_sta_tear_down_BA_sessions(sta); + clear_sta_flags(sta, WLAN_STA_BLOCK_BA); } + + mutex_unlock(&local->sta_mtx); } - rcu_read_unlock(); /* add back keys */ list_for_each_entry(sdata, &local->interfaces, list) diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 5f3a4113bda1..9ebc8d8a1f5b 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -32,13 +32,16 @@ int ieee80211_wep_init(struct ieee80211_local *local) local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(local->wep_tx_tfm)) + if (IS_ERR(local->wep_tx_tfm)) { + local->wep_rx_tfm = ERR_PTR(-EINVAL); return PTR_ERR(local->wep_tx_tfm); + } local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_rx_tfm)) { crypto_free_blkcipher(local->wep_tx_tfm); + local->wep_tx_tfm = ERR_PTR(-EINVAL); return PTR_ERR(local->wep_rx_tfm); } @@ -47,8 +50,10 @@ int ieee80211_wep_init(struct ieee80211_local *local) void ieee80211_wep_free(struct ieee80211_local *local) { - crypto_free_blkcipher(local->wep_tx_tfm); - crypto_free_blkcipher(local->wep_rx_tfm); + if (!IS_ERR(local->wep_tx_tfm)) + crypto_free_blkcipher(local->wep_tx_tfm); + if (!IS_ERR(local->wep_rx_tfm)) + crypto_free_blkcipher(local->wep_rx_tfm); } static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen) @@ -122,19 +127,24 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, /* Perform WEP encryption using given key. data buffer must have tailroom * for 4-byte ICV. data_len must not include this ICV. Note: this function * does _not_ add IV. data = RC4(data | CRC32(data)) */ -void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, - size_t klen, u8 *data, size_t data_len) +int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, + size_t klen, u8 *data, size_t data_len) { struct blkcipher_desc desc = { .tfm = tfm }; struct scatterlist sg; __le32 icv; + if (IS_ERR(tfm)) + return -1; + icv = cpu_to_le32(~crc32_le(~0, data, data_len)); put_unaligned(icv, (__le32 *)(data + data_len)); crypto_blkcipher_setkey(tfm, rc4key, klen); sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length); + + return 0; } @@ -168,10 +178,8 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, /* Add room for ICV */ skb_put(skb, WEP_ICV_LEN); - ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, - iv + WEP_IV_LEN, len); - - return 0; + return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, + iv + WEP_IV_LEN, len); } @@ -185,6 +193,9 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, struct scatterlist sg; __le32 crc; + if (IS_ERR(tfm)) + return -1; + crypto_blkcipher_setkey(tfm, rc4key, klen); sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length); diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index fe29d7e5759f..58654ee33518 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -18,7 +18,7 @@ int ieee80211_wep_init(struct ieee80211_local *local); void ieee80211_wep_free(struct ieee80211_local *local); -void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, +int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, size_t klen, u8 *data, size_t data_len); int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, diff --git a/net/mac80211/work.c b/net/mac80211/work.c index be3d4a698692..81d4ad64184a 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -560,6 +560,22 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk) return WORK_ACT_TIMEOUT; } +static enum work_action __must_check +ieee80211_assoc_beacon_wait(struct ieee80211_work *wk) +{ + if (wk->started) + return WORK_ACT_TIMEOUT; + + /* + * Wait up to one beacon interval ... + * should this be more if we miss one? + */ + printk(KERN_DEBUG "%s: waiting for beacon from %pM\n", + wk->sdata->name, wk->filter_ta); + wk->timeout = TU_TO_EXP_TIME(wk->assoc.bss->beacon_interval); + return WORK_ACT_NONE; +} + static void ieee80211_auth_challenge(struct ieee80211_work *wk, struct ieee80211_mgmt *mgmt, size_t len) @@ -709,13 +725,32 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk, return WORK_ACT_DONE; } +static enum work_action __must_check +ieee80211_rx_mgmt_beacon(struct ieee80211_work *wk, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_sub_if_data *sdata = wk->sdata; + struct ieee80211_local *local = sdata->local; + + ASSERT_WORK_MTX(local); + + if (wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) + return WORK_ACT_MISMATCH; + + if (len < 24 + 12) + return WORK_ACT_NONE; + + printk(KERN_DEBUG "%s: beacon received\n", sdata->name); + return WORK_ACT_DONE; +} + static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; struct ieee80211_work *wk; - enum work_action rma; + enum work_action rma = WORK_ACT_NONE; u16 fc; rx_status = (struct ieee80211_rx_status *) skb->cb; @@ -731,6 +766,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, case IEEE80211_WORK_DIRECT_PROBE: case IEEE80211_WORK_AUTH: case IEEE80211_WORK_ASSOC: + case IEEE80211_WORK_ASSOC_BEACON_WAIT: bssid = wk->filter_ta; break; default: @@ -745,6 +781,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, continue; switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_BEACON: + rma = ieee80211_rx_mgmt_beacon(wk, mgmt, skb->len); + break; case IEEE80211_STYPE_PROBE_RESP: rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len, rx_status); @@ -840,7 +879,7 @@ static void ieee80211_work_work(struct work_struct *work) /* * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the the rest. + * here we'll pick the rest. */ if (WARN(local->suspended, "work scheduled while going to suspend\n")) return; @@ -916,6 +955,9 @@ static void ieee80211_work_work(struct work_struct *work) case IEEE80211_WORK_REMAIN_ON_CHANNEL: rma = ieee80211_remain_on_channel_timeout(wk); break; + case IEEE80211_WORK_ASSOC_BEACON_WAIT: + rma = ieee80211_assoc_beacon_wait(wk); + break; } wk->started = started; @@ -1065,6 +1107,7 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata, case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_ASSOC_RESP: case IEEE80211_STYPE_REASSOC_RESP: + case IEEE80211_STYPE_BEACON: skb_queue_tail(&local->work_skb_queue, skb); ieee80211_queue_work(&local->hw, &local->work_work); return RX_QUEUED; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 0adbcc941ac9..8d59d27d887e 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -183,9 +183,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) skb_put(skb, TKIP_ICV_LEN); hdr = (struct ieee80211_hdr *) skb->data; - ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, - key, pos, len, hdr->addr2); - return 0; + return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, + key, pos, len, hdr->addr2); } @@ -436,6 +435,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 pn[CCMP_PN_LEN]; int data_len; + int queue; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -453,7 +453,10 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) ccmp_hdr2pn(pn, skb->data + hdrlen); - if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) { + queue = ieee80211_is_mgmt(hdr->frame_control) ? + NUM_RX_DATA_QUEUES : rx->queue; + + if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } @@ -470,7 +473,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; } - memcpy(key->u.ccmp.rx_pn[rx->queue], pn, CCMP_PN_LEN); + memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN); /* Remove CCMP header and MIC */ skb_trim(skb, skb->len - CCMP_MIC_LEN); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8593a77cfea9..43288259f4a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -40,27 +40,6 @@ config NF_CONNTRACK if NF_CONNTRACK -config NF_CT_ACCT - bool "Connection tracking flow accounting" - depends on NETFILTER_ADVANCED - help - If this option is enabled, the connection tracking code will - keep per-flow packet and byte counters. - - Those counters can be used for flow-based accounting or the - `connbytes' match. - - Please note that currently this option only sets a default state. - You may change it at boot time with nf_conntrack.acct=0/1 kernel - parameter or by loading the nf_conntrack module with acct=0/1. - - You may also disable/enable it on a running system with: - sysctl net.netfilter.nf_conntrack_acct=0/1 - - This option will be removed in 2.6.29. - - If unsure, say `N'. - config NF_CONNTRACK_MARK bool 'Connection mark tracking support' depends on NETFILTER_ADVANCED @@ -347,6 +326,22 @@ config NETFILTER_XT_CONNMARK comment "Xtables targets" +config NETFILTER_XT_TARGET_CHECKSUM + tristate "CHECKSUM target support" + depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds a `CHECKSUM' target, which can be used in the iptables mangle + table. + + You can use this target to compute and fill in the checksum in + a packet that lacks a checksum. This is particularly useful, + if you need to work around old applications such as dhcp clients, + that do not work well with checksum offloads, but don't want to disable + checksum offload in your device. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_CLASSIFY tristate '"CLASSIFY" target support' depends on NETFILTER_ADVANCED @@ -424,6 +419,18 @@ config NETFILTER_XT_TARGET_HL since you can easily create immortal packets that loop forever on the network. +config NETFILTER_XT_TARGET_IDLETIMER + tristate "IDLETIMER target support" + depends on NETFILTER_ADVANCED + help + + This option adds the `IDLETIMER' target. Each matching packet + resets the timer associated with label specified when the rule is + added. When the timer expires, it triggers a sysfs notification. + The remaining time for expiration can be read via sysfs. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_LED tristate '"LED" target support' depends on LEDS_CLASS && LEDS_TRIGGERS @@ -503,7 +510,7 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. config NETFILTER_XT_TARGET_TEE - tristate '"TEE" - packet cloning to alternate destiantion' + tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED depends on (IPV6 || IPV6=n) depends on !NF_CONNTRACK || NF_CONNTRACK @@ -618,7 +625,6 @@ config NETFILTER_XT_MATCH_CONNBYTES tristate '"connbytes" per-connection counter match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - select NF_CT_ACCT help This option adds a `connbytes' match, which allows you to match the number of bytes and/or packets for each direction within a connection. @@ -657,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CPU + tristate '"cpu" match support' + depends on NETFILTER_ADVANCED + help + CPU matching allows you to match packets based on the CPU + currently handling the packet. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' depends on NETFILTER_ADVANCED @@ -736,6 +751,16 @@ config NETFILTER_XT_MATCH_IPRANGE If unsure, say M. +config NETFILTER_XT_MATCH_IPVS + tristate '"ipvs" match support' + depends on IP_VS + depends on NETFILTER_ADVANCED + depends on NF_CONNTRACK + help + This option allows you to match against IPVS properties of a packet. + + If unsure, say N. + config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 14e3a8fd8180..441050f31111 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o # targets +obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o @@ -61,6 +62,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o +obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o @@ -68,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o @@ -75,6 +78,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 712ccad13344..46a77d5c3887 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -3,7 +3,7 @@ # menuconfig IP_VS tristate "IP virtual server support" - depends on NET && INET && NETFILTER + depends on NET && INET && NETFILTER && NF_CONNTRACK ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This @@ -26,7 +26,7 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" - depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) + depends on IPV6 = y || IP_VS = IPV6 ---help--- Add IPv6 support to IPVS. This is incomplete and might be dangerous. @@ -87,19 +87,16 @@ config IP_VS_PROTO_UDP protocol. Say Y if unsure. config IP_VS_PROTO_AH_ESP - bool - depends on UNDEFINED + def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH config IP_VS_PROTO_ESP bool "ESP load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. @@ -238,7 +235,7 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP + depends on IP_VS_PROTO_TCP && NF_NAT ---help--- FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 1cb0e834f8ff..e76f87f4aca8 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = { }; #endif - -/* - * Replace a segment of data with a new segment - */ -int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, - char *o_buf, int o_len, char *n_buf, int n_len) -{ - int diff; - int o_offset; - int o_left; - - EnterFunction(9); - - diff = n_len - o_len; - o_offset = o_buf - (char *)skb->data; - /* The length of left data after o_buf+o_len in the skb data */ - o_left = skb->len - (o_offset + o_len); - - if (diff <= 0) { - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - skb_trim(skb, skb->len + diff); - } else if (diff <= skb_tailroom(skb)) { - skb_put(skb, diff); - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - } else { - if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) - return -ENOMEM; - skb_put(skb, diff); - memmove(skb->data + o_offset + n_len, - skb->data + o_offset + o_len, o_left); - skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); - } - - /* must update the iph total length here */ - ip_hdr(skb)->tot_len = htons(skb->len); - - LeaveFunction(9); - return 0; -} - - int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index d8f7e8ef67b4..b71c69a2db13 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -158,10 +158,14 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) unsigned hash; int ret; + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return 0; + /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); + spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { list_add(&cp->c_list, &ip_vs_conn_tab[hash]); @@ -174,6 +178,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) ret = 0; } + spin_unlock(&cp->lock); ct_write_unlock(hash); return ret; @@ -193,6 +198,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); + spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { list_del(&cp->c_list); @@ -202,6 +208,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) } else ret = 0; + spin_unlock(&cp->lock); ct_write_unlock(hash); return ret; @@ -264,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +struct ip_vs_conn * +ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); + /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, @@ -349,14 +379,37 @@ struct ip_vs_conn *ip_vs_conn_out_get return ret; } +struct ip_vs_conn * +ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); /* * Put back the conn and restart its timer with its timeout */ void ip_vs_conn_put(struct ip_vs_conn *cp) { - /* reset it expire in its timeout */ - mod_timer(&cp->timer, jiffies+cp->timeout); + unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? + 0 : cp->timeout; + mod_timer(&cp->timer, jiffies+t); __ip_vs_conn_put(cp); } @@ -649,7 +702,7 @@ static void ip_vs_conn_expire(unsigned long data) /* * unhash it if it is hashed in the conn table */ - if (!ip_vs_conn_unhash(cp)) + if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) goto expire_later; /* diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1cd6e3fd058b..4f8ddba48011 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -54,7 +54,6 @@ EXPORT_SYMBOL(register_ip_vs_scheduler); EXPORT_SYMBOL(unregister_ip_vs_scheduler); -EXPORT_SYMBOL(ip_vs_skb_replace); EXPORT_SYMBOL(ip_vs_proto_name); EXPORT_SYMBOL(ip_vs_conn_new); EXPORT_SYMBOL(ip_vs_conn_in_get); @@ -194,6 +193,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport; /* destination port to forward */ + __be16 flags; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -340,6 +340,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dport = ports[1]; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a new connection according to the template */ @@ -347,7 +351,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, &iph.saddr, ports[0], &iph.daddr, ports[1], &dest->addr, dport, - 0, + flags, dest); if (cp == NULL) { ip_vs_conn_put(ct); @@ -377,7 +381,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, flags; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -407,6 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a connection entry. */ @@ -414,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) &iph.saddr, pptr[0], &iph.daddr, pptr[1], &dest->addr, dest->port ? dest->port : pptr[1], - 0, + flags, dest); if (cp == NULL) return NULL; @@ -464,6 +472,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; + __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && + iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); @@ -474,7 +485,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &daddr, 0, - IP_VS_CONN_F_BYPASS, + IP_VS_CONN_F_BYPASS | flags, NULL); if (cp == NULL) return NF_DROP; @@ -524,26 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } - -/* - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING - * chain, and is used for VS/NAT. - * It detects packets for VS/NAT connections and sends the packets - * immediately. This can avoid that iptable_nat mangles the packets - * for VS/NAT. - */ -static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - if (!skb->ipvs_property) - return NF_ACCEPT; - /* The packet was sent from IPVS, exit this chain */ - return NF_STOP; -} - __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -1487,14 +1478,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC-1, - }, #ifdef CONFIG_IP_VS_IPV6 /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1523,14 +1506,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC-1, - }, #endif }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 36dc1d88c2fa..0f0c079c422a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) svc->scheduler->name); else #endif - seq_printf(seq, "%s %08X:%04X %s ", + seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name); + svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { - seq_printf(seq, "FWM %08X %s ", - svc->fwmark, svc->scheduler->name); + seq_printf(seq, "FWM %08X %s %s", + svc->fwmark, svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } if (svc->flags & IP_VS_SVC_F_PERSISTENT) diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2ae747a376a5..f228a17ec649 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -20,6 +20,17 @@ * * Author: Wouter Gadeyne * + * + * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from + * http://www.ssi.bg/~ja/nfct/: + * + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS + * + * Portions Copyright (C) 2001-2002 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. + * + * Portions Copyright (C) 2003-2008 + * Julian Anastasov */ #define KMSG_COMPONENT "IPVS" @@ -32,6 +43,9 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat_helper.h> #include <linux/gfp.h> #include <net/protocol.h> #include <net/tcp.h> @@ -43,6 +57,16 @@ #define SERVER_STRING "227 Entering Passive Mode (" #define CLIENT_STRING "PORT " +#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" +#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ + &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ + (T)->dst.protonum + +#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" +#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ + &((C)->vaddr.ip), ntohs((C)->vport), \ + &((C)->daddr.ip), ntohs((C)->dport), \ + (C)->protocol, (C)->state /* * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper @@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, return 1; } +/* + * Called from init_conntrack() as expectfn handler. + */ +static void +ip_vs_expect_callback(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_tuple *orig, new_reply; + struct ip_vs_conn *cp; + + if (exp->tuple.src.l3num != PF_INET) + return; + + /* + * We assume that no NF locks are held before this callback. + * ip_vs_conn_out_get and ip_vs_conn_in_get should match their + * expectations even if they use wildcard values, now we provide the + * actual values from the newly created original conntrack direction. + * The conntrack is confirmed when packet reaches IPVS hooks. + */ + + /* RS->CLIENT */ + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply CLIENT->RS to CLIENT->VS */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found inout cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.dst.u3 = cp->vaddr; + new_reply.dst.u.tcp.port = cp->vport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE + ", inout cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + /* CLIENT->VS */ + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply VS->CLIENT to RS->CLIENT */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found outin cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.src.u3 = cp->daddr; + new_reply.src.u.tcp.port = cp->dport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " + FMT_TUPLE ", outin cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE + " - unknown expect\n", + __func__, ct, ct->status, ARG_TUPLE(orig)); + return; + +alter: + /* Never alter conntrack for non-NAT conns */ + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) + nf_conntrack_alter_reply(ct, &new_reply); + ip_vs_conn_put(cp); + return; +} + +/* + * Create NF conntrack expectation with wildcard (optional) source port. + * Then the default callback function will alter the reply and will confirm + * the conntrack entry when the first packet comes. + */ +static void +ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 *port, int from_rs) +{ + struct nf_conntrack_expect *exp; + + BUG_ON(!ct || ct == &nf_conntrack_untracked); + + exp = nf_ct_expect_alloc(ct); + if (!exp) + return; + + if (from_rs) + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->daddr, &cp->caddr, + proto, port, &cp->cport); + else + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, + proto, port, &cp->vport); + + exp->expectfn = ip_vs_expect_callback; + + IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&exp->tuple)); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); +} /* * Look at outgoing ftp packets to catch the response to a PASV command @@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; - int ret; + int ret = 0; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct)) { + /* If mangling fails this function will return 0 + * which will cause the packet to be dropped. + * Mangling can only fail under memory pressure, + * hopefully it will succeed on the retransmitted + * packet. + */ + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + start-data, end-start, + buf, buf_len); + if (ret) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, NULL, 0); + } + /* - * Calculate required delta-offset to keep TCP happy + * Not setting 'diff' is intentional, otherwise the sequence + * would be adjusted twice. */ - *diff = buf_len - (end-start); - - if (*diff == 0) { - /* simply replace it with new passive address */ - memcpy(start, buf, buf_len); - ret = 1; - } else { - ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, - end-start, buf, buf_len); - } cp->app_data = NULL; ip_vs_tcp_conn_listen(n_cp); @@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } + ct = (struct nf_conn *)skb->nfct; + if (ct && ct != &nf_conntrack_untracked) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, &n_cp->dport, 1); + /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 94a45213faa6..9323f8944199 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -11,7 +11,7 @@ * Changes: * Martin Hamilton : fixed the terrible locking bugs * *lock(tbl->lock) ==> *lock(&tbl->lock) - * Wensong Zhang : fixed the uninitilized tbl->lock bug + * Wensong Zhang : fixed the uninitialized tbl->lock bug * Wensong Zhang : added doing full expiration check to * collect stale entries of 24+ hours when * no partial expire check in a half hour diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 535dc2b419d8..dbeed8ea421a 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -386,7 +386,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, ip_vs_addr_copy(dest->af, &en->addr, daddr); en->lastuse = jiffies; - /* initilize its dest set */ + /* initialize its dest set */ atomic_set(&(en->set.size), 0); INIT_LIST_HEAD(&en->set.list); rwlock_init(&en->set.lock); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 2d3d5e4b35f8..027f654799fe 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) return NULL; } +EXPORT_SYMBOL(ip_vs_proto_get); /* diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index c9a3f7a21d53..4c0855cb006e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -8,55 +8,6 @@ #include <net/sctp/checksum.h> #include <net/ip_vs.h> - -static struct ip_vs_conn * -sctp_conn_in_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - -static struct ip_vs_conn * -sctp_conn_out_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -173,7 +124,7 @@ sctp_dnat_handler(struct sk_buff *skb, return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } @@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = { .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, .conn_schedule = sctp_conn_schedule, - .conn_in_get = sctp_conn_in_get, - .conn_out_get = sctp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, .csum_check = sctp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 91d28e073742..282d24de8592 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -27,52 +27,6 @@ #include <net/ip_vs.h> - -static struct ip_vs_conn * -tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - -static struct ip_vs_conn * -tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - - static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, - .conn_in_get = tcp_conn_in_get, - .conn_out_get = tcp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = tcp_snat_handler, .dnat_handler = tcp_dnat_handler, .csum_check = tcp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index e7a6885e0167..8553231b5d41 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -27,58 +27,6 @@ #include <net/ip.h> #include <net/ip6_checksum.h> -static struct ip_vs_conn * -udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - -static struct ip_vs_conn * -udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .init = udp_init, .exit = udp_exit, .conn_schedule = udp_conn_schedule, - .conn_in_get = udp_conn_in_get, - .conn_out_get = udp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .csum_check = udp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 93c15a107b2c..21e1a5e9b9d3 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -28,6 +28,7 @@ #include <net/ip6_route.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> @@ -90,10 +91,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) &dest->addr.ip); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, - atomic_read(&rt->u.dst.__refcnt), rtos); + atomic_read(&rt->dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); } else { @@ -148,10 +149,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) &dest->addr.in6); return NULL; } - __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", &dest->addr.in6, - atomic_read(&rt->u.dst.__refcnt)); + atomic_read(&rt->dst.__refcnt)); } spin_unlock(&dest->dst_lock); } else { @@ -198,7 +199,7 @@ do { \ (skb)->ipvs_property = 1; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - (rt)->u.dst.dev, dst_output); \ + (rt)->dst.dev, dst_output); \ } while (0) @@ -245,7 +246,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -265,7 +266,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -309,9 +310,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -323,13 +324,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif +static void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +{ + struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conntrack_tuple new_tuple; + + if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct)) + return; + + /* + * The connection is not yet in the hashtable, so we update it. + * CIP->VIP will remain the same, so leave the tuple in + * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the + * real-server we will see RIP->DIP. + */ + new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + new_tuple.src.u3 = cp->daddr; + /* + * This will also take care of UDP and other protocols. + */ + new_tuple.src.u.tcp.port = cp->dport; + nf_conntrack_alter_reply(ct, &new_tuple); +} + /* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP @@ -376,7 +401,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -388,12 +413,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -452,9 +479,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); @@ -465,12 +492,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -498,7 +527,7 @@ tx_error: kfree_skb(skb); return NF_STOLEN; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif @@ -549,9 +578,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { ip_rt_put(rt); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); @@ -601,7 +630,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -615,7 +644,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->ttl = old_iph->ttl; - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -660,12 +689,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!rt) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); /* TODO IPv6: do we need this check in IPv6? */ if (mtu < 1280) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); goto tx_error; } @@ -674,7 +703,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -689,7 +718,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; @@ -707,7 +736,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -760,7 +789,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -780,7 +809,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -813,10 +842,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -827,13 +856,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -888,7 +917,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -900,12 +929,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp(skb, pp, cp, 0); @@ -963,9 +992,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -975,12 +1004,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp_v6(skb, pp, cp, 0); @@ -1001,7 +1030,7 @@ out: LeaveFunction(10); return rc; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index ab81b380eae6..5178c691ecbf 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -17,13 +17,7 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> -#ifdef CONFIG_NF_CT_ACCT -#define NF_CT_ACCT_DEFAULT 1 -#else -#define NF_CT_ACCT_DEFAULT 0 -#endif - -static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; +static int nf_ct_acct __read_mostly; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); @@ -114,12 +108,6 @@ int nf_conntrack_acct_init(struct net *net) net->ct.sysctl_acct = nf_ct_acct; if (net_eq(net, &init_net)) { -#ifdef CONFIG_NF_CT_ACCT - printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n"); - printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n"); - printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); -#endif - ret = nf_ct_extend_register(&acct_extend); if (ret < 0) { printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index eeeb8bc73982..df3eedb142ff 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct nf_conn nf_conntrack_untracked __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_untracked); +DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); -#ifdef CONFIG_NET_NS - ct->ct_net = net; -#endif + write_pnet(&ct->ct_net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES if (zone) { struct nf_conntrack_zone *nf_ct_zone; @@ -968,8 +966,7 @@ acct: if (acct) { spin_lock_bh(&ct->lock); acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + acct[CTINFO2DIR(ctinfo)].bytes += skb->len; spin_unlock_bh(&ct->lock); } } @@ -1183,10 +1180,21 @@ static void nf_ct_release_dying_list(struct net *net) spin_unlock_bh(&nf_conntrack_lock); } +static int untrack_refs(void) +{ + int cnt = 0, cpu; + + for_each_possible_cpu(cpu) { + struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); + + cnt += atomic_read(&ct->ct_general.use) - 1; + } + return cnt; +} + static void nf_conntrack_cleanup_init_net(void) { - /* wait until all references to nf_conntrack_untracked are dropped */ - while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) + while (untrack_refs() > 0) schedule(); nf_conntrack_helper_fini(); @@ -1321,10 +1329,19 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); +void nf_ct_untracked_status_or(unsigned long bits) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(nf_conntrack_untracked, cpu).status |= bits; +} +EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); + static int nf_conntrack_init_init_net(void) { int max_factor = 8; - int ret; + int ret, cpu; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ @@ -1363,13 +1380,13 @@ static int nf_conntrack_init_init_net(void) goto err_extend; #endif /* Set up fake conntrack: to never be deleted, not in any hashes */ -#ifdef CONFIG_NET_NS - nf_conntrack_untracked.ct_net = &init_net; -#endif - atomic_set(&nf_conntrack_untracked.ct_general.use, 1); + for_each_possible_cpu(cpu) { + struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); + write_pnet(&ct->ct_net, &init_net); + atomic_set(&ct->ct_general.use, 1); + } /* - and look it like as a confirmed connection */ - set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - + nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); return 0; #ifdef CONFIG_NF_CONNTRACK_ZONES diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index fdc8fb4ae10f..7dcf7a404190 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) { unsigned int i; struct nf_ct_ext_type *t; + struct nf_ct_ext *ext = ct->ext; for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(ext, i)) continue; rcu_read_lock(); @@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head) void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { - struct nf_ct_ext *new; + struct nf_ct_ext *old, *new; int i, newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - if (!ct->ext) + old = ct->ext; + if (!old) return nf_ct_ext_create(&ct->ext, id, gfp); - if (nf_ct_ext_exist(ct, id)) + if (__nf_ct_ext_exist(old, id)) return NULL; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); - newoff = ALIGN(ct->ext->len, t->align); + newoff = ALIGN(old->len, t->align); newlen = newoff + t->len; rcu_read_unlock(); - new = __krealloc(ct->ext, newlen, gfp); + new = __krealloc(old, newlen, gfp); if (!new) return NULL; - if (new != ct->ext) { + if (new != old) { for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(old, i)) continue; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); if (t && t->move) t->move((void *)new + new->offset[i], - (void *)ct->ext + ct->ext->offset[i]); + (void *)old + old->offset[i]); rcu_read_unlock(); } - call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); + call_rcu(&old->rcu, __nf_ct_ext_free_rcu); ct->ext = new; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 6eaee7c8a337..b969025cf82f 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { if (rt1->rt_gateway == rt2->rt_gateway && - rt1->u.dst.dev == rt2->u.dst.dev) + rt1->dst.dev == rt2->dst.dev) ret = 1; - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); } - dst_release(&rt1->u.dst); + dst_release(&rt1->dst); } break; } @@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && - rt1->u.dst.dev == rt2->u.dst.dev) + rt1->dst.dev == rt2->dst.dev) ret = 1; - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); } - dst_release(&rt1->u.dst); + dst_release(&rt1->dst); } break; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 497b2224536f..aadde018a072 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, goto out; rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { if (ifa->ifa_broadcast == iph->daddr) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c42ff6aa441d..5bae1cd15eea 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) int err; /* ignore our fake conntrack entry */ - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return 0; if (events & (1 << IPCT_DESTROY)) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9dd8cd4fb6e6..c4c885dca3bd 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -585,8 +585,16 @@ static bool tcp_in_window(const struct nf_conn *ct, * Let's try to use the data from the packet. */ sender->td_end = end; + win <<= sender->td_scale; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; + /* + * We haven't seen traffic in the other direction yet + * but we have to tweak window tracking to pass III + * and IV until that happens. + */ + if (receiver->td_maxwin == 0) + receiver->td_end = receiver->td_maxend = sack; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) @@ -680,7 +688,7 @@ static bool tcp_in_window(const struct nf_conn *ct, /* * Update receiver data. */ - if (after(end, sender->td_maxend)) + if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; @@ -736,27 +744,19 @@ static bool tcp_in_window(const struct nf_conn *ct, return res; } -#define TH_FIN 0x01 -#define TH_SYN 0x02 -#define TH_RST 0x04 -#define TH_PUSH 0x08 -#define TH_ACK 0x10 -#define TH_URG 0x20 -#define TH_ECE 0x40 -#define TH_CWR 0x80 - /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ -static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = +static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| + TCPHDR_URG) + 1] = { - [TH_SYN] = 1, - [TH_SYN|TH_URG] = 1, - [TH_SYN|TH_ACK] = 1, - [TH_RST] = 1, - [TH_RST|TH_ACK] = 1, - [TH_FIN|TH_ACK] = 1, - [TH_FIN|TH_ACK|TH_URG] = 1, - [TH_ACK] = 1, - [TH_ACK|TH_URG] = 1, + [TCPHDR_SYN] = 1, + [TCPHDR_SYN|TCPHDR_URG] = 1, + [TCPHDR_SYN|TCPHDR_ACK] = 1, + [TCPHDR_RST] = 1, + [TCPHDR_RST|TCPHDR_ACK] = 1, + [TCPHDR_FIN|TCPHDR_ACK] = 1, + [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, + [TCPHDR_ACK] = 1, + [TCPHDR_ACK|TCPHDR_URG] = 1, }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ @@ -803,7 +803,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, } /* Check TCP flags. */ - tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); + tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fc9a211e629e..6a1572b0ab41 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -66,9 +66,10 @@ struct nfulnl_instance { u_int16_t group_num; /* number of this queue */ u_int16_t flags; u_int8_t copy_mode; + struct rcu_head rcu; }; -static DEFINE_RWLOCK(instances_lock); +static DEFINE_SPINLOCK(instances_lock); static atomic_t global_seq; #define INSTANCE_BUCKETS 16 @@ -88,7 +89,7 @@ __instance_lookup(u_int16_t group_num) struct nfulnl_instance *inst; head = &instance_table[instance_hashfn(group_num)]; - hlist_for_each_entry(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, pos, head, hlist) { if (inst->group_num == group_num) return inst; } @@ -106,22 +107,26 @@ instance_lookup_get(u_int16_t group_num) { struct nfulnl_instance *inst; - read_lock_bh(&instances_lock); + rcu_read_lock_bh(); inst = __instance_lookup(group_num); - if (inst) - instance_get(inst); - read_unlock_bh(&instances_lock); + if (inst && !atomic_inc_not_zero(&inst->use)) + inst = NULL; + rcu_read_unlock_bh(); return inst; } +static void nfulnl_instance_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct nfulnl_instance, rcu)); + module_put(THIS_MODULE); +} + static void instance_put(struct nfulnl_instance *inst) { - if (inst && atomic_dec_and_test(&inst->use)) { - kfree(inst); - module_put(THIS_MODULE); - } + if (inst && atomic_dec_and_test(&inst->use)) + call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); } static void nfulnl_timer(unsigned long data); @@ -132,7 +137,7 @@ instance_create(u_int16_t group_num, int pid) struct nfulnl_instance *inst; int err; - write_lock_bh(&instances_lock); + spin_lock_bh(&instances_lock); if (__instance_lookup(group_num)) { err = -EEXIST; goto out_unlock; @@ -166,32 +171,37 @@ instance_create(u_int16_t group_num, int pid) inst->copy_mode = NFULNL_COPY_PACKET; inst->copy_range = NFULNL_COPY_RANGE_MAX; - hlist_add_head(&inst->hlist, + hlist_add_head_rcu(&inst->hlist, &instance_table[instance_hashfn(group_num)]); - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); return inst; out_unlock: - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); return ERR_PTR(err); } static void __nfulnl_flush(struct nfulnl_instance *inst); +/* called with BH disabled */ static void __instance_destroy(struct nfulnl_instance *inst) { /* first pull it out of the global list */ - hlist_del(&inst->hlist); + hlist_del_rcu(&inst->hlist); /* then flush all pending packets from skb */ - spin_lock_bh(&inst->lock); + spin_lock(&inst->lock); + + /* lockless readers wont be able to use us */ + inst->copy_mode = NFULNL_COPY_DISABLED; + if (inst->skb) __nfulnl_flush(inst); - spin_unlock_bh(&inst->lock); + spin_unlock(&inst->lock); /* and finally put the refcount */ instance_put(inst); @@ -200,9 +210,9 @@ __instance_destroy(struct nfulnl_instance *inst) static inline void instance_destroy(struct nfulnl_instance *inst) { - write_lock_bh(&instances_lock); + spin_lock_bh(&instances_lock); __instance_destroy(inst); - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); } static int @@ -403,8 +413,9 @@ __build_packet_message(struct nfulnl_instance *inst, NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, - htonl(indev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(indev)->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ @@ -430,8 +441,9 @@ __build_packet_message(struct nfulnl_instance *inst, NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, - htonl(outdev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); } else { /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ @@ -619,6 +631,7 @@ nfulnl_log_packet(u_int8_t pf, size += nla_total_size(data_len); break; + case NFULNL_COPY_DISABLED: default: goto unlock_and_release; } @@ -672,7 +685,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, int i; /* destroy all instances for this pid */ - write_lock_bh(&instances_lock); + spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; struct nfulnl_instance *inst; @@ -684,7 +697,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, __instance_destroy(inst); } } - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); } return NOTIFY_DONE; } @@ -861,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st) for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { if (!hlist_empty(&instance_table[st->bucket])) - return instance_table[st->bucket].first; + return rcu_dereference_bh(instance_table[st->bucket].first); } return NULL; } static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) { - h = h->next; + h = rcu_dereference_bh(h->next); while (!h) { if (++st->bucket >= INSTANCE_BUCKETS) return NULL; - h = instance_table[st->bucket].first; + h = rcu_dereference_bh(instance_table[st->bucket].first); } return h; } @@ -890,9 +903,9 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos) } static void *seq_start(struct seq_file *seq, loff_t *pos) - __acquires(instances_lock) + __acquires(rcu_bh) { - read_lock_bh(&instances_lock); + rcu_read_lock_bh(); return get_idx(seq->private, *pos); } @@ -903,9 +916,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) - __releases(instances_lock) + __releases(rcu_bh) { - read_unlock_bh(&instances_lock); + rcu_read_unlock_bh(); } static int seq_show(struct seq_file *s, void *v) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 12e1ab37fcd8..68e67d19724d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -46,17 +46,19 @@ struct nfqnl_instance { int peer_pid; unsigned int queue_maxlen; unsigned int copy_range; - unsigned int queue_total; unsigned int queue_dropped; unsigned int queue_user_dropped; - unsigned int id_sequence; /* 'sequence' of pkt ids */ u_int16_t queue_num; /* number of this queue */ u_int8_t copy_mode; - - spinlock_t lock; - +/* + * Following fields are dirtied for each queued packet, + * keep them in same cache line if possible. + */ + spinlock_t lock; + unsigned int queue_total; + atomic_t id_sequence; /* 'sequence' of pkt ids */ struct list_head queue_list; /* packets in queue */ }; @@ -238,32 +240,24 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, outdev = entry->outdev; - spin_lock_bh(&queue->lock); - - switch ((enum nfqnl_config_mode)queue->copy_mode) { + switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: break; case NFQNL_COPY_PACKET: if (entskb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(entskb)) { - spin_unlock_bh(&queue->lock); + skb_checksum_help(entskb)) return NULL; - } - if (queue->copy_range == 0 - || queue->copy_range > entskb->len) + + data_len = ACCESS_ONCE(queue->copy_range); + if (data_len == 0 || data_len > entskb->len) data_len = entskb->len; - else - data_len = queue->copy_range; size += nla_total_size(data_len); break; } - entry->id = queue->id_sequence++; - - spin_unlock_bh(&queue->lock); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) @@ -278,6 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(queue->queue_num); + entry->id = atomic_inc_return(&queue->id_sequence); pmsg.packet_id = htonl(entry->id); pmsg.hw_protocol = entskb->protocol; pmsg.hook = entry->hook; @@ -296,8 +291,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, - htonl(indev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(indev)->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ @@ -321,8 +317,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, - htonl(outdev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ @@ -866,7 +863,7 @@ static int seq_show(struct seq_file *s, void *v) inst->peer_pid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, - inst->id_sequence, 1); + atomic_read(&inst->id_sequence), 1); } static const struct seq_operations nfqnl_seq_ops = { diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c new file mode 100644 index 000000000000..0f642ef8cd26 --- /dev/null +++ b/net/netfilter/xt_CHECKSUM.c @@ -0,0 +1,70 @@ +/* iptables module for the packet checksum mangling + * + * (C) 2002 by Harald Welte <laforge@netfilter.org> + * (C) 2010 Red Hat, Inc. + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_CHECKSUM.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>"); +MODULE_DESCRIPTION("Xtables: checksum modification"); +MODULE_ALIAS("ipt_CHECKSUM"); +MODULE_ALIAS("ip6t_CHECKSUM"); + +static unsigned int +checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_checksum_help(skb); + + return XT_CONTINUE; +} + +static int checksum_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_CHECKSUM_info *einfo = par->targinfo; + + if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { + pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); + return -EINVAL; + } + if (!einfo->operation) { + pr_info("no CHECKSUM operation enabled\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target checksum_tg_reg __read_mostly = { + .name = "CHECKSUM", + .family = NFPROTO_UNSPEC, + .target = checksum_tg, + .targetsize = sizeof(struct xt_CHECKSUM_info), + .table = "mangle", + .checkentry = checksum_tg_check, + .me = THIS_MODULE, +}; + +static int __init checksum_tg_init(void) +{ + return xt_register_target(&checksum_tg_reg); +} + +static void __exit checksum_tg_exit(void) +{ + xt_unregister_target(&checksum_tg_reg); +} + +module_init(checksum_tg_init); +module_exit(checksum_tg_exit); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 562bf3266e04..0cb6053f02fd 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->flags & XT_CT_NOTRACK) { - ct = &nf_conntrack_untracked; + ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); goto out; } @@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (ct != &nf_conntrack_untracked) { + if (!nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c new file mode 100644 index 000000000000..be1f22e13545 --- /dev/null +++ b/net/netfilter/xt_IDLETIMER.c @@ -0,0 +1,315 @@ +/* + * linux/net/netfilter/xt_IDLETIMER.c + * + * Netfilter module to trigger a timer when packet matches. + * After timer expires a kevent will be sent. + * + * Copyright (C) 2004, 2010 Nokia Corporation + * Written by Timo Teras <ext-timo.teras@nokia.com> + * + * Converted to x_tables and reworked for upstream inclusion + * by Luciano Coelho <luciano.coelho@nokia.com> + * + * Contact: Luciano Coelho <luciano.coelho@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_IDLETIMER.h> +#include <linux/kdev_t.h> +#include <linux/kobject.h> +#include <linux/workqueue.h> +#include <linux/sysfs.h> + +struct idletimer_tg_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); +}; + +struct idletimer_tg { + struct list_head entry; + struct timer_list timer; + struct work_struct work; + + struct kobject *kobj; + struct idletimer_tg_attr attr; + + unsigned int refcnt; +}; + +static LIST_HEAD(idletimer_tg_list); +static DEFINE_MUTEX(list_mutex); + +static struct kobject *idletimer_tg_kobj; + +static +struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) +{ + struct idletimer_tg *entry; + + BUG_ON(!label); + + list_for_each_entry(entry, &idletimer_tg_list, entry) { + if (!strcmp(label, entry->attr.attr.name)) + return entry; + } + + return NULL; +} + +static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct idletimer_tg *timer; + unsigned long expires = 0; + + mutex_lock(&list_mutex); + + timer = __idletimer_tg_find_by_label(attr->name); + if (timer) + expires = timer->timer.expires; + + mutex_unlock(&list_mutex); + + if (time_after(expires, jiffies)) + return sprintf(buf, "%u\n", + jiffies_to_msecs(expires - jiffies) / 1000); + + return sprintf(buf, "0\n"); +} + +static void idletimer_tg_work(struct work_struct *work) +{ + struct idletimer_tg *timer = container_of(work, struct idletimer_tg, + work); + + sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); +} + +static void idletimer_tg_expired(unsigned long data) +{ + struct idletimer_tg *timer = (struct idletimer_tg *) data; + + pr_debug("timer %s expired\n", timer->attr.attr.name); + + schedule_work(&timer->work); +} + +static int idletimer_tg_create(struct idletimer_tg_info *info) +{ + int ret; + + info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); + if (!info->timer) { + pr_debug("couldn't alloc timer\n"); + ret = -ENOMEM; + goto out; + } + + info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); + if (!info->timer->attr.attr.name) { + pr_debug("couldn't alloc attribute name\n"); + ret = -ENOMEM; + goto out_free_timer; + } + info->timer->attr.attr.mode = S_IRUGO; + info->timer->attr.show = idletimer_tg_show; + + ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); + if (ret < 0) { + pr_debug("couldn't add file to sysfs"); + goto out_free_attr; + } + + list_add(&info->timer->entry, &idletimer_tg_list); + + setup_timer(&info->timer->timer, idletimer_tg_expired, + (unsigned long) info->timer); + info->timer->refcnt = 1; + + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + + INIT_WORK(&info->timer->work, idletimer_tg_work); + + return 0; + +out_free_attr: + kfree(info->timer->attr.attr.name); +out_free_timer: + kfree(info->timer); +out: + return ret; +} + +/* + * The actual xt_tables plugin. + */ +static unsigned int idletimer_tg_target(struct sk_buff *skb, + const struct xt_action_param *par) +{ + const struct idletimer_tg_info *info = par->targinfo; + + pr_debug("resetting timer %s, timeout period %u\n", + info->label, info->timeout); + + BUG_ON(!info->timer); + + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + + return XT_CONTINUE; +} + +static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) +{ + struct idletimer_tg_info *info = par->targinfo; + int ret; + + pr_debug("checkentry targinfo%s\n", info->label); + + if (info->timeout == 0) { + pr_debug("timeout value is zero\n"); + return -EINVAL; + } + + if (info->label[0] == '\0' || + strnlen(info->label, + MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) { + pr_debug("label is empty or not nul-terminated\n"); + return -EINVAL; + } + + mutex_lock(&list_mutex); + + info->timer = __idletimer_tg_find_by_label(info->label); + if (info->timer) { + info->timer->refcnt++; + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + + pr_debug("increased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } else { + ret = idletimer_tg_create(info); + if (ret < 0) { + pr_debug("failed to create timer\n"); + mutex_unlock(&list_mutex); + return ret; + } + } + + mutex_unlock(&list_mutex); + return 0; +} + +static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) +{ + const struct idletimer_tg_info *info = par->targinfo; + + pr_debug("destroy targinfo %s\n", info->label); + + mutex_lock(&list_mutex); + + if (--info->timer->refcnt == 0) { + pr_debug("deleting timer %s\n", info->label); + + list_del(&info->timer->entry); + del_timer_sync(&info->timer->timer); + sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + kfree(info->timer->attr.attr.name); + kfree(info->timer); + } else { + pr_debug("decreased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } + + mutex_unlock(&list_mutex); +} + +static struct xt_target idletimer_tg __read_mostly = { + .name = "IDLETIMER", + .family = NFPROTO_UNSPEC, + .target = idletimer_tg_target, + .targetsize = sizeof(struct idletimer_tg_info), + .checkentry = idletimer_tg_checkentry, + .destroy = idletimer_tg_destroy, + .me = THIS_MODULE, +}; + +static struct class *idletimer_tg_class; + +static struct device *idletimer_tg_device; + +static int __init idletimer_tg_init(void) +{ + int err; + + idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer"); + err = PTR_ERR(idletimer_tg_class); + if (IS_ERR(idletimer_tg_class)) { + pr_debug("couldn't register device class\n"); + goto out; + } + + idletimer_tg_device = device_create(idletimer_tg_class, NULL, + MKDEV(0, 0), NULL, "timers"); + err = PTR_ERR(idletimer_tg_device); + if (IS_ERR(idletimer_tg_device)) { + pr_debug("couldn't register system device\n"); + goto out_class; + } + + idletimer_tg_kobj = &idletimer_tg_device->kobj; + + err = xt_register_target(&idletimer_tg); + if (err < 0) { + pr_debug("couldn't register xt target\n"); + goto out_dev; + } + + return 0; +out_dev: + device_destroy(idletimer_tg_class, MKDEV(0, 0)); +out_class: + class_destroy(idletimer_tg_class); +out: + return err; +} + +static void __exit idletimer_tg_exit(void) +{ + xt_unregister_target(&idletimer_tg); + + device_destroy(idletimer_tg_class, MKDEV(0, 0)); + class_destroy(idletimer_tg_class); +} + +module_init(idletimer_tg_init); +module_exit(idletimer_tg_exit); + +MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>"); +MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); +MODULE_DESCRIPTION("Xtables: idle time monitor"); +MODULE_LICENSE("GPL v2"); diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 512b9123252f..9d782181b6c8 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) If there is a real ct entry correspondig to this packet, it'll hang aroun till timing out. We don't deal with it for performance reasons. JK */ - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 69c01e10f8af..de079abd5bc8 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name) } EXPORT_SYMBOL_GPL(xt_rateest_lookup); +static void xt_rateest_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct xt_rateest, rcu)); +} + void xt_rateest_put(struct xt_rateest *est) { mutex_lock(&xt_rateest_mutex); if (--est->refcnt == 0) { hlist_del(&est->list); gen_kill_estimator(&est->bstats, &est->rstats); - kfree(est); + /* + * gen_estimator est_timer() might access est->lock or bstats, + * wait a RCU grace period before freeing 'est' + */ + call_rcu(&est->rcu, xt_rateest_free_rcu); } mutex_unlock(&xt_rateest_mutex); } @@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void) static void __exit xt_rateest_tg_fini(void) { xt_unregister_target(&xt_rateest_tg_reg); + rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */ } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 62ec021fbd50..eb81c380da1b 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_unlock(); if (rt != NULL) { - mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); } return mtu; } @@ -220,15 +220,13 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif -#define TH_SYN 0x02 - /* Must specify -p tcp --syn */ static inline bool find_syn_match(const struct xt_entry_match *m) { const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; if (strcmp(m->u.kernel.match->name, "tcp") == 0 && - tcpinfo->flg_cmp & TH_SYN && + tcpinfo->flg_cmp & TCPHDR_SYN && !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) return true; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 859d9fd429c8..22a2d421e7eb 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -77,8 +77,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); - skb->dev = rt->u.dst.dev; + skb_dst_set(skb, &rt->dst); + skb->dev = rt->dst.dev; skb->protocol = htons(ETH_P_IP); return true; } @@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) #ifdef WITH_CONNTRACK /* Avoid counting cloned packets towards the original connection. */ nf_conntrack_put(skb->nfct); - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); #endif @@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) #ifdef WITH_CONNTRACK nf_conntrack_put(skb->nfct); - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); #endif diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index e1a0dedac258..c61294d85fda 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -37,8 +37,10 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_DROP; sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, - iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, - hp->source, tgi->lport ? tgi->lport : hp->dest, + iph->saddr, + tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, + tgi->lport ? tgi->lport : hp->dest, par->in, true); /* NOTE: assign_sock consumes our sk reference */ diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 30b95a1c1c89..f4af1bfafb1c 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return false; if (ct->master) diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 73517835303d..5b138506690e 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); + + /* + * This filter cannot function correctly unless connection tracking + * accounting is enabled, so complain in the hope that someone notices. + */ + if (!nf_ct_acct_enabled(par->net)) { + pr_warning("Forcing CT accounting to be enabled\n"); + nf_ct_set_acct(par->net, true); + } + return ret; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 39681f10291c..e536710ad916 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); - if (ct == &nf_conntrack_untracked) - statebit = XT_CONNTRACK_STATE_UNTRACKED; - else if (ct != NULL) - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); - else + if (ct) { + if (nf_ct_is_untracked(ct)) + statebit = XT_CONNTRACK_STATE_UNTRACKED; + else + statebit = XT_CONNTRACK_STATE_BIT(ctinfo); + } else statebit = XT_CONNTRACK_STATE_INVALID; if (info->match_flags & XT_CONNTRACK_STATE) { diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c new file mode 100644 index 000000000000..b39db8a5cbae --- /dev/null +++ b/net/netfilter/xt_cpu.c @@ -0,0 +1,63 @@ +/* Kernel module to match running CPU */ + +/* + * Might be used to distribute connections on several daemons, if + * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, + * each RX queue IRQ affined to one CPU (1:1 mapping) + * + */ + +/* (C) 2010 Eric Dumazet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/xt_cpu.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); +MODULE_DESCRIPTION("Xtables: CPU match"); + +static int cpu_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + return 0; +} + +static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + return (info->cpu == smp_processor_id()) ^ info->invert; +} + +static struct xt_match cpu_mt_reg __read_mostly = { + .name = "cpu", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cpu_mt_check, + .match = cpu_mt, + .matchsize = sizeof(struct xt_cpu_info), + .me = THIS_MODULE, +}; + +static int __init cpu_mt_init(void) +{ + return xt_register_match(&cpu_mt_reg); +} + +static void __exit cpu_mt_exit(void) +{ + xt_unregister_match(&cpu_mt_reg); +} + +module_init(cpu_mt_init); +module_exit(cpu_mt_exit); diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c new file mode 100644 index 000000000000..7a4d66db95ae --- /dev/null +++ b/net/netfilter/xt_ipvs.c @@ -0,0 +1,189 @@ +/* + * xt_ipvs - kernel module to match IPVS connection properties + * + * Author: Hannes Eder <heder@google.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#ifdef CONFIG_IP_VS_IPV6 +#include <net/ipv6.h> +#endif +#include <linux/ip_vs.h> +#include <linux/types.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_ipvs.h> +#include <net/netfilter/nf_conntrack.h> + +#include <net/ip_vs.h> + +MODULE_AUTHOR("Hannes Eder <heder@google.com>"); +MODULE_DESCRIPTION("Xtables: match IPVS connection properties"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ipvs"); +MODULE_ALIAS("ip6t_ipvs"); + +/* borrowed from xt_conntrack */ +static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr, + const union nf_inet_addr *uaddr, + const union nf_inet_addr *umask, + unsigned int l3proto) +{ + if (l3proto == NFPROTO_IPV4) + return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; +#ifdef CONFIG_IP_VS_IPV6 + else if (l3proto == NFPROTO_IPV6) + return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, + &uaddr->in6) == 0; +#endif + else + return false; +} + +static bool +ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_ipvs_mtinfo *data = par->matchinfo; + /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ + const u_int8_t family = par->family; + struct ip_vs_iphdr iph; + struct ip_vs_protocol *pp; + struct ip_vs_conn *cp; + bool match = true; + + if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { + match = skb->ipvs_property ^ + !!(data->invert & XT_IPVS_IPVS_PROPERTY); + goto out; + } + + /* other flags than XT_IPVS_IPVS_PROPERTY are set */ + if (!skb->ipvs_property) { + match = false; + goto out; + } + + ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); + + if (data->bitmask & XT_IPVS_PROTO) + if ((iph.protocol == data->l4proto) ^ + !(data->invert & XT_IPVS_PROTO)) { + match = false; + goto out; + } + + pp = ip_vs_proto_get(iph.protocol); + if (unlikely(!pp)) { + match = false; + goto out; + } + + /* + * Check if the packet belongs to an existing entry + */ + cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + if (unlikely(cp == NULL)) { + match = false; + goto out; + } + + /* + * We found a connection, i.e. ct != 0, make sure to call + * __ip_vs_conn_put before returning. In our case jump to out_put_con. + */ + + if (data->bitmask & XT_IPVS_VPORT) + if ((cp->vport == data->vport) ^ + !(data->invert & XT_IPVS_VPORT)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VPORTCTL) + if ((cp->control != NULL && + cp->control->vport == data->vportctl) ^ + !(data->invert & XT_IPVS_VPORTCTL)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_DIR) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + if (ct == NULL || nf_ct_is_untracked(ct)) { + match = false; + goto out_put_cp; + } + + if ((ctinfo >= IP_CT_IS_REPLY) ^ + !!(data->invert & XT_IPVS_DIR)) { + match = false; + goto out_put_cp; + } + } + + if (data->bitmask & XT_IPVS_METHOD) + if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ + !(data->invert & XT_IPVS_METHOD)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VADDR) { + if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, + &data->vmask, family) ^ + !(data->invert & XT_IPVS_VADDR)) { + match = false; + goto out_put_cp; + } + } + +out_put_cp: + __ip_vs_conn_put(cp); +out: + pr_debug("match=%d\n", match); + return match; +} + +static int ipvs_mt_check(const struct xt_mtchk_param *par) +{ + if (par->family != NFPROTO_IPV4 +#ifdef CONFIG_IP_VS_IPV6 + && par->family != NFPROTO_IPV6 +#endif + ) { + pr_info("protocol family %u not supported\n", par->family); + return -EINVAL; + } + + return 0; +} + +static struct xt_match xt_ipvs_mt_reg __read_mostly = { + .name = "ipvs", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = ipvs_mt, + .checkentry = ipvs_mt_check, + .matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)), + .me = THIS_MODULE, +}; + +static int __init ipvs_mt_init(void) +{ + return xt_register_match(&xt_ipvs_mt_reg); +} + +static void __exit ipvs_mt_exit(void) +{ + xt_unregister_match(&xt_ipvs_mt_reg); +} + +module_init(ipvs_mt_init); +module_exit(ipvs_mt_exit); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index b4f7dfea5980..70eb2b4984dd 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -11,7 +11,8 @@ #include <linux/netfilter/xt_quota.h> struct xt_quota_priv { - uint64_t quota; + spinlock_t lock; + uint64_t quota; }; MODULE_LICENSE("GPL"); @@ -20,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: countdown quota match"); MODULE_ALIAS("ipt_quota"); MODULE_ALIAS("ip6t_quota"); -static DEFINE_SPINLOCK(quota_lock); - static bool quota_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -29,7 +28,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par) struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; - spin_lock_bh("a_lock); + spin_lock_bh(&priv->lock); if (priv->quota >= skb->len) { priv->quota -= skb->len; ret = !ret; @@ -37,9 +36,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par) /* we do not allow even small packets from now on */ priv->quota = 0; } - /* Copy quota back to matchinfo so that iptables can display it */ - q->quota = priv->quota; - spin_unlock_bh("a_lock); + spin_unlock_bh(&priv->lock); return ret; } @@ -55,6 +52,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par) if (q->master == NULL) return -ENOMEM; + spin_lock_init(&q->master->lock); q->master->quota = q->quota; return 0; } diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index c04fcf385c59..ef36a56a02c6 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -3,6 +3,7 @@ #include <linux/skbuff.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/sctp/sctp.h> #include <linux/sctp.h> #include <linux/netfilter/x_tables.h> @@ -67,7 +68,7 @@ match_packet(const struct sk_buff *skb, ++i, offset, sch->type, htons(sch->length), sch->flags); #endif - offset += (ntohs(sch->length) + 3) & ~3; + offset += WORD_ROUND(ntohs(sch->length)); pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 3d54c236a1ba..1ca89908cbad 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * reply packet of an established SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); - if (ct && (ct != &nf_conntrack_untracked) && + if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || (iph->protocol == IPPROTO_ICMP && diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index e12e053d3782..a507922d80cd 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_state_info *sinfo = par->matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (nf_ct_is_untracked(skb)) - statebit = XT_STATE_UNTRACKED; - else if (!nf_ct_get(skb, &ctinfo)) + if (!ct) statebit = XT_STATE_INVALID; - else - statebit = XT_STATE_BIT(ctinfo); - + else { + if (nf_ct_is_untracked(ct)) + statebit = XT_STATE_UNTRACKED; + else + statebit = XT_STATE_BIT(ctinfo); + } return (sinfo->statemask & statebit); } diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 96e62b8fd6b1..42ecb71d445f 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -18,8 +18,8 @@ #include <linux/netfilter/x_tables.h> struct xt_statistic_priv { - uint32_t count; -}; + atomic_t count; +} ____cacheline_aligned_in_smp; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); @@ -27,13 +27,12 @@ MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)"); MODULE_ALIAS("ipt_statistic"); MODULE_ALIAS("ip6t_statistic"); -static DEFINE_SPINLOCK(nth_lock); - static bool statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_statistic_info *info = par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; + int nval, oval; switch (info->mode) { case XT_STATISTIC_MODE_RANDOM: @@ -41,12 +40,12 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) ret = !ret; break; case XT_STATISTIC_MODE_NTH: - spin_lock_bh(&nth_lock); - if (info->master->count++ == info->u.nth.every) { - info->master->count = 0; + do { + oval = atomic_read(&info->master->count); + nval = (oval == info->u.nth.every) ? 0 : oval + 1; + } while (atomic_cmpxchg(&info->master->count, oval, nval) != oval); + if (nval == 0) ret = !ret; - } - spin_unlock_bh(&nth_lock); break; } @@ -64,7 +63,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par) info->master = kzalloc(sizeof(*info->master), GFP_KERNEL); if (info->master == NULL) return -ENOMEM; - info->master->count = info->u.nth.count; + atomic_set(&info->master->count, info->u.nth.count); return 0; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a2eb965207d3..2cbf380377d5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1076,14 +1076,15 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); - kfree_skb(skb); + consume_skb(skb); netlink_unlock_table(); - kfree_skb(info.skb2); - - if (info.delivery_failure) + if (info.delivery_failure) { + kfree_skb(info.skb2); return -ENOBUFS; + } else + consume_skb(info.skb2); if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) @@ -1323,19 +1324,23 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) + if (NULL == siocb->scm) { siocb->scm = &scm; + memset(&scm, 0, sizeof(scm)); + } err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; if (msg->msg_namelen) { + err = -EINVAL; if (addr->nl_family != AF_NETLINK) - return -EINVAL; + goto out; dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); + err = -EPERM; if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) - return -EPERM; + goto out; } else { dst_pid = nlk->dst_pid; dst_group = nlk->dst_group; @@ -1387,6 +1392,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); out: + scm_destroy(siocb->scm); return err; } @@ -1400,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb, *frag __maybe_unused = NULL; + struct sk_buff *skb; int err; if (flags&MSG_OOB) @@ -1435,7 +1441,21 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, kfree_skb(skb); skb = compskb; } else { - frag = skb_shinfo(skb)->frag_list; + /* + * Before setting frag_list to NULL, we must get a + * private copy of skb if shared (because of MSG_PEEK) + */ + if (skb_shared(skb)) { + struct sk_buff *nskb; + + nskb = pskb_copy(skb, GFP_KERNEL); + kfree_skb(skb); + skb = nskb; + err = -ENOMEM; + if (!skb) + goto out; + } + kfree_skb(skb_shinfo(skb)->frag_list); skb_shinfo(skb)->frag_list = NULL; } } @@ -1472,10 +1492,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (flags & MSG_TRUNC) copied = skb->len; -#ifdef CONFIG_COMPAT_NETLINK_MESSAGES - skb_shinfo(skb)->frag_list = frag; -#endif - skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index aa4308afcc7f..26ed3e8587c2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -303,6 +303,7 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) errout: return err; } +EXPORT_SYMBOL(genl_register_ops); /** * genl_unregister_ops - unregister generic netlink operations @@ -337,6 +338,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) return -ENOENT; } +EXPORT_SYMBOL(genl_unregister_ops); /** * genl_register_family - register a generic netlink family @@ -405,6 +407,7 @@ errout_locked: errout: return err; } +EXPORT_SYMBOL(genl_register_family); /** * genl_register_family_with_ops - register a generic netlink family @@ -485,6 +488,7 @@ int genl_unregister_family(struct genl_family *family) return -ENOENT; } +EXPORT_SYMBOL(genl_unregister_family); static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { @@ -873,11 +877,7 @@ static int __init genl_init(void) for (i = 0; i < GENL_FAM_TAB_SIZE; i++) INIT_LIST_HEAD(&family_ht[i]); - err = genl_register_family(&genl_ctrl); - if (err < 0) - goto problem; - - err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops); + err = genl_register_family_with_ops(&genl_ctrl, &genl_ctrl_ops, 1); if (err < 0) goto problem; @@ -899,11 +899,6 @@ problem: subsys_initcall(genl_init); -EXPORT_SYMBOL(genl_register_ops); -EXPORT_SYMBOL(genl_unregister_ops); -EXPORT_SYMBOL(genl_register_family); -EXPORT_SYMBOL(genl_unregister_family); - static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, gfp_t flags) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2078a277e06b..9a17f28b1253 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -83,6 +83,7 @@ #include <linux/if_vlan.h> #include <linux/virtio_net.h> #include <linux/errqueue.h> +#include <linux/net_tstamp.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -202,6 +203,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; + unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; @@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sk_buff *copy_skb = NULL; struct timeval tv; struct timespec ts; + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; - if (skb->tstamp.tv64) + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + tv = ktime_to_timeval(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + tv = ktime_to_timeval(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) tv = ktime_to_timeval(skb->tstamp); else do_gettimeofday(&tv); @@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; - if (skb->tstamp.tv64) + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + ts = ktime_to_timespec(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + ts = ktime_to_timespec(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) ts = ktime_to_timespec(skb->tstamp); else getnstimeofday(&ts); @@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->has_vnet_hdr = !!val; return 0; } + case PACKET_TIMESTAMP: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->tp_tstamp = val; + return 0; + } default: return -ENOPROTOOPT; } @@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_loss; data = &val; break; + case PACKET_TIMESTAMP: + if (len > sizeof(int)) + len = sizeof(int); + val = po->tp_tstamp; + data = &val; + break; default: return -ENOPROTOOPT; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 94d72e85a475..b2a3ae6cad78 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -698,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) newsk = NULL; goto out; } + kfree_skb(oskb); sock_hold(sk); pep_sk(newsk)->listener = sk; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index c33da6576942..b18e48fae975 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr) return err; } +static void phonet_device_rcu_free(struct rcu_head *head) +{ + struct phonet_device *pnd; + + pnd = container_of(head, struct phonet_device, rcu); + kfree(pnd); +} + int phonet_address_del(struct net_device *dev, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); @@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = NULL; mutex_unlock(&pndevs->lock); - if (pnd) { - synchronize_rcu(); - kfree(pnd); - } + if (pnd) + call_rcu(&pnd->rcu, phonet_device_rcu_free); + return err; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index cbc244a128bd..b4fdaac233f7 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -109,7 +109,9 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, init_timer(&rose_neigh->t0timer); if (rose_route->ndigis != 0) { - if ((rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) { + rose_neigh->digipeat = + kmalloc(sizeof(ax25_digi), GFP_ATOMIC); + if (rose_neigh->digipeat == NULL) { kfree(rose_neigh); res = -ENOMEM; goto out; diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index f0f85b0123f7..9f1729bd60de 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) return; } - peer->if_mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); + peer->if_mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); _leave(" [if_mtu %u]", peer->if_mtu); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 972378f47f3c..23b25f89e7e0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -26,6 +26,11 @@ #include <net/act_api.h> #include <net/netlink.h> +static void tcf_common_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct tcf_common, tcfc_rcu)); +} + void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) { unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); @@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) write_unlock_bh(hinfo->lock); gen_kill_estimator(&p->tcfc_bstats, &p->tcfc_rate_est); - kfree(p); + /* + * gen_estimator est_timer() might access p->tcfc_lock + * or bstats, wait a RCU grace period before freeing p + */ + call_rcu(&p->tcfc_rcu, tcf_common_free_rcu); return; } } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c0b6863e3b87..11f195af2da0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -33,6 +33,7 @@ static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); +static LIST_HEAD(mirred_list); static struct tcf_hashinfo mirred_hash_info = { .htab = tcf_mirred_ht, @@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) m->tcf_bindcnt--; m->tcf_refcnt--; if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - dev_put(m->tcfm_dev); + list_del(&m->tcfm_list); + if (m->tcfm_dev) + dev_put(m->tcfm_dev); tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } @@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, m->tcfm_ok_push = ok_push; } spin_unlock_bh(&m->tcf_lock); - if (ret == ACT_P_CREATED) + if (ret == ACT_P_CREATED) { + list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(pc, &mirred_hash_info); + } return ret; } @@ -160,22 +165,27 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, spin_lock(&m->tcf_lock); m->tcf_tm.lastuse = jiffies; + m->tcf_bstats.bytes += qdisc_pkt_len(skb); + m->tcf_bstats.packets++; dev = m->tcfm_dev; + if (!dev) { + printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + goto out; + } + if (!(dev->flags & IFF_UP)) { if (net_ratelimit()) - pr_notice("tc mirred to Houston: device %s is gone!\n", + pr_notice("tc mirred to Houston: device %s is down\n", dev->name); goto out; } - skb2 = skb_act_clone(skb, GFP_ATOMIC); + at = G_TC_AT(skb->tc_verd); + skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action); if (skb2 == NULL) goto out; - m->tcf_bstats.bytes += qdisc_pkt_len(skb2); - m->tcf_bstats.packets++; - at = G_TC_AT(skb->tc_verd); if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) skb_push(skb2, skb2->dev->hard_header_len); @@ -185,16 +195,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, if (m->tcfm_eaction != TCA_EGRESS_MIRROR) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); - skb2->dev = dev; skb2->skb_iif = skb->dev->ifindex; + skb2->dev = dev; dev_queue_xmit(skb2); err = 0; out: if (err) { m->tcf_qstats.overlimits++; - m->tcf_bstats.bytes += qdisc_pkt_len(skb); - m->tcf_bstats.packets++; /* should we be asking for packet to be dropped? * may make sense for redirect case only */ @@ -232,6 +240,28 @@ nla_put_failure: return -1; } +static int mirred_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct tcf_mirred *m; + + if (event == NETDEV_UNREGISTER) + list_for_each_entry(m, &mirred_list, tcfm_list) { + if (m->tcfm_dev == dev) { + dev_put(dev); + m->tcfm_dev = NULL; + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block mirred_device_notifier = { + .notifier_call = mirred_device_event, +}; + + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, @@ -252,12 +282,17 @@ MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { + int err = register_netdevice_notifier(&mirred_device_notifier); + if (err) + return err; + pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { + unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 570949417f38..d0386a413e8d 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -205,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, { struct icmphdr *icmph; - if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + if (!pskb_may_pull(skb, ihl + sizeof(*icmph))) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); @@ -215,6 +215,10 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, (icmph->type != ICMP_PARAMETERPROB)) break; + if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); iph = (void *)(icmph + 1); if (egress) addr = iph->daddr; @@ -243,7 +247,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, - 1); + 0); break; } default: @@ -265,40 +269,29 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_nat *p = a->priv; - struct tc_nat *opt; + struct tc_nat opt; struct tcf_t t; - int s; - - s = sizeof(*opt); - /* netlink spinlocks held above us - must use ATOMIC */ - opt = kzalloc(s, GFP_ATOMIC); - if (unlikely(!opt)) - return -ENOBUFS; + opt.old_addr = p->old_addr; + opt.new_addr = p->new_addr; + opt.mask = p->mask; + opt.flags = p->flags; - opt->old_addr = p->old_addr; - opt->new_addr = p->new_addr; - opt->mask = p->mask; - opt->flags = p->flags; + opt.index = p->tcf_index; + opt.action = p->tcf_action; + opt.refcnt = p->tcf_refcnt - ref; + opt.bindcnt = p->tcf_bindcnt - bind; - opt->index = p->tcf_index; - opt->action = p->tcf_action; - opt->refcnt = p->tcf_refcnt - ref; - opt->bindcnt = p->tcf_bindcnt - bind; - - NLA_PUT(skb, TCA_NAT_PARMS, s, opt); + NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); - kfree(opt); - return skb->len; nla_put_failure: nlmsg_trim(skb, b); - kfree(opt); return -1; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 50e3d945e1f4..a0593c9640db 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -127,8 +127,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, int i, munged = 0; unsigned int off; - if (!(skb->tc_verd & TC_OK2MUNGE)) { - /* should we set skb->cloned? */ + if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { return p->tcf_action; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 654f73dff7c1..537a48732e9e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -97,6 +97,11 @@ nla_put_failure: goto done; } +static void tcf_police_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct tcf_police, tcf_rcu)); +} + static void tcf_police_destroy(struct tcf_police *p) { unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); @@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p) qdisc_put_rtab(p->tcfp_R_tab); if (p->tcfp_P_tab) qdisc_put_rtab(p->tcfp_P_tab); - kfree(p); + /* + * gen_estimator est_timer() might access p->tcf_lock + * or bstats, wait a RCU grace period before freeing p + */ + call_rcu(&p->tcf_rcu, tcf_police_free_rcu); return; } } @@ -397,6 +406,7 @@ static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops); + rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */ } module_init(police_init_module); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 1b4bc691d7d1..4a1d640b0cf1 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -73,10 +73,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind) static int alloc_defdata(struct tcf_defact *d, char *defdata) { - d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL); + d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; - + strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); return 0; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4f522143811e..7416a5c73b2a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -134,10 +134,12 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { - unsigned int toff; + int toff = off + key->off + (off2 & key->offmask); __be32 *data, _data; - toff = off + key->off + (off2 & key->offmask); + if (skb_headroom(skb) + toff < 0) + goto out; + data = skb_header_pointer(skb, toff, 4, &_data); if (!data) goto out; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index fcbb86a486a2..e114f23d5eae 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -52,7 +52,7 @@ struct atm_flow_data { int ref; /* reference count */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct atm_flow_data *next; + struct list_head list; struct atm_flow_data *excess; /* flow for excess traffic; NULL to set CLP instead */ int hdr_len; @@ -61,34 +61,23 @@ struct atm_flow_data { struct atm_qdisc_data { struct atm_flow_data link; /* unclassified skbs go here */ - struct atm_flow_data *flows; /* NB: "link" is also on this + struct list_head flows; /* NB: "link" is also on this list */ struct tasklet_struct task; /* dequeue tasklet */ }; /* ------------------------- Class/flow operations ------------------------- */ -static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow) -{ - struct atm_flow_data *walk; - - pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow); - for (walk = qdisc->flows; walk; walk = walk->next) - if (walk == flow) - return 1; - pr_debug("find_flow: not found\n"); - return 0; -} - static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow; - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) { if (flow->classid == classid) - break; - return flow; + return flow; + } + return NULL; } static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, @@ -99,7 +88,7 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", sch, p, flow, new, old); - if (!find_flow(p, flow)) + if (list_empty(&flow->list)) return -EINVAL; if (!new) new = &noop_qdisc; @@ -146,20 +135,12 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)cl; - struct atm_flow_data **prev; pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); if (--flow->ref) return; pr_debug("atm_tc_put: destroying\n"); - for (prev = &p->flows; *prev; prev = &(*prev)->next) - if (*prev == flow) - break; - if (!*prev) { - printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow); - return; - } - *prev = flow->next; + list_del_init(&flow->list); pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); tcf_destroy_chain(&flow->filter_list); @@ -274,7 +255,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (find_flow(p, flow)) { + if (!list_empty(&flow->list)) { error = -EEXIST; goto err_out; } @@ -313,8 +294,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, flow->classid = classid; flow->ref = 1; flow->excess = excess; - flow->next = p->link.next; - p->link.next = flow; + list_add(&flow->list, &p->link.list); flow->hdr_len = hdr_len; if (hdr) memcpy(flow->hdr, hdr, hdr_len); @@ -335,7 +315,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) struct atm_flow_data *flow = (struct atm_flow_data *)arg; pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (!find_flow(qdisc_priv(sch), flow)) + if (list_empty(&flow->list)) return -EINVAL; if (flow->filter_list || flow == &p->link) return -EBUSY; @@ -361,12 +341,12 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); if (walker->stop) return; - for (flow = p->flows; flow; flow = flow->next) { - if (walker->count >= walker->skip) - if (walker->fn(sch, (unsigned long)flow, walker) < 0) { - walker->stop = 1; - break; - } + list_for_each_entry(flow, &p->flows, list) { + if (walker->count >= walker->skip && + walker->fn(sch, (unsigned long)flow, walker) < 0) { + walker->stop = 1; + break; + } walker->count++; } } @@ -385,16 +365,17 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = NULL; /* @@@ */ + struct atm_flow_data *flow; struct tcf_result res; int result; int ret = NET_XMIT_POLICED; pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); result = TC_POLICE_OK; /* be nice to gcc */ + flow = NULL; if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) - for (flow = p->flows; flow; flow = flow->next) + !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) { + list_for_each_entry(flow, &p->flows, list) { if (flow->filter_list) { result = tc_classify_compat(skb, flow->filter_list, @@ -404,8 +385,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - break; + goto done; } + } + flow = NULL; + done: + ; + } if (!flow) flow = &p->link; else { @@ -477,7 +463,9 @@ static void sch_atm_dequeue(unsigned long data) struct sk_buff *skb; pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->link.next; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) { + if (flow == &p->link) + continue; /* * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) @@ -512,6 +500,7 @@ static void sch_atm_dequeue(unsigned long data) /* atm.atm_options are already set by atm_tc_enqueue */ flow->vcc->send(flow->vcc, skb); } + } } static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) @@ -543,9 +532,10 @@ static unsigned int atm_tc_drop(struct Qdisc *sch) unsigned int len; pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) { if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) return len; + } return 0; } @@ -554,7 +544,9 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) struct atm_qdisc_data *p = qdisc_priv(sch); pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - p->flows = &p->link; + INIT_LIST_HEAD(&p->flows); + INIT_LIST_HEAD(&p->link.list); + list_add(&p->link.list, &p->flows); p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (!p->link.q) @@ -565,7 +557,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) p->link.sock = NULL; p->link.classid = sch->handle; p->link.ref = 1; - p->link.next = NULL; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } @@ -576,7 +567,7 @@ static void atm_tc_reset(struct Qdisc *sch) struct atm_flow_data *flow; pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) qdisc_reset(flow->q); sch->q.qlen = 0; } @@ -584,24 +575,17 @@ static void atm_tc_reset(struct Qdisc *sch) static void atm_tc_destroy(struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; + struct atm_flow_data *flow, *tmp; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) tcf_destroy_chain(&flow->filter_list); - /* races ? */ - while ((flow = p->flows)) { + list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); atm_tc_put(sch, (unsigned long)flow); - if (p->flows == flow) { - printk(KERN_ERR "atm_destroy: putting flow %p didn't " - "kill it\n", flow); - p->flows = flow->next; /* brute force */ - break; - } } tasklet_kill(&p->task); } @@ -615,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", sch, p, flow, skb, tcm); - if (!find_flow(p, flow)) + if (list_empty(&flow->list)) return -EINVAL; tcm->tcm_handle = flow->classid; tcm->tcm_info = flow->q->handle; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a63029ef3edd..2aeb3a4386a1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -96,7 +96,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * Another cpu is holding lock, requeue & delay xmits for * some time. */ - __get_cpu_var(softnet_data).cpu_collision++; + __this_cpu_inc(softnet_data.cpu_collision); ret = dev_requeue_skb(skb, q); } @@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q) } } - clear_bit(__QDISC_STATE_RUNNING, &q->state); + qdisc_run_end(q); } unsigned long dev_trans_start(struct net_device *dev) @@ -327,6 +327,24 @@ void netif_carrier_off(struct net_device *dev) } EXPORT_SYMBOL(netif_carrier_off); +/** + * netif_notify_peers - notify network peers about existence of @dev + * @dev: network device + * + * Generate traffic such that interested network peers are aware of + * @dev, such as by generating a gratuitous ARP. This may be used when + * a device wants to inform the rest of the network about some sort of + * reconfiguration such as a failover event or virtual machine + * migration. + */ +void netif_notify_peers(struct net_device *dev) +{ + rtnl_lock(); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + rtnl_unlock(); +} +EXPORT_SYMBOL(netif_notify_peers); + /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. @@ -543,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); + spin_lock_init(&sch->busylock); sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; @@ -779,7 +798,7 @@ static bool some_qdisc_is_busy(struct net_device *dev) spin_lock_bh(root_lock); - val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || + val = (qdisc_is_running(q) || test_bit(__QDISC_STATE_SCHED, &q->state)); spin_unlock_bh(root_lock); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0b52b8de562c..4be8d04b262d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1550,7 +1550,6 @@ static const struct Qdisc_class_ops htb_class_ops = { }; static struct Qdisc_ops htb_qdisc_ops __read_mostly = { - .next = NULL, .cl_ops = &htb_class_ops, .id = "htb", .priv_size = sizeof(struct htb_sched), @@ -1561,7 +1560,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .init = htb_init, .reset = htb_reset, .destroy = htb_destroy, - .change = NULL /* htb_change */, .dump = htb_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 3415b6ce1c0a..807643bdcbac 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -449,6 +449,7 @@ static __init void teql_master_setup(struct net_device *dev) dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static LIST_HEAD(master_dev_list); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index e41feff19e43..0b85e5256434 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -172,7 +172,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = (unsigned long)sp->autoclose * HZ; - /* Initilizes the timers */ + /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) setup_timer(&asoc->timers[i], sctp_timer_events[i], (unsigned long)asoc); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 182749867c72..5027b83f1cc0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, __func__, &fl.fl4_dst, &fl.fl4_src); if (!ip_route_output_key(&init_net, &rt, &fl)) { - dst = &rt->u.dst; + dst = &rt->dst; } /* If there is no association or if a source address is passed, no @@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, fl.fl4_src = laddr->a.v4.sin_addr.s_addr; fl.fl_ip_sport = laddr->a.v4.sin_port; if (!ip_route_output_key(&init_net, &rt, &fl)) { - dst = &rt->u.dst; + dst = &rt->dst; goto out_unlock; } } @@ -1002,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static inline int init_sctp_mibs(void) { return snmp_mib_init((void __percpu **)sctp_statistics, - sizeof(struct sctp_mib)); + sizeof(struct sctp_mib), + __alignof__(struct sctp_mib)); } static inline void cleanup_sctp_mibs(void) @@ -1162,7 +1163,7 @@ SCTP_STATIC __init int sctp_init(void) /* Set the pressure threshold to be a fraction of global memory that * is up to 1/2 at 256 MB, decreasing toward zero with the amount of * memory, with a floor of 128 pages. - * Note this initalizes the data in sctpv6_prot too + * Note this initializes the data in sctpv6_prot too * Unabashedly stolen from tcp_init */ nr_pages = totalram_pages - totalhigh_pages; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index bd2a50b482ac..246f92924658 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1817,7 +1817,7 @@ malformed: struct __sctp_missing { __be32 num_missing; __be16 type; -} __attribute__((packed)); +} __packed; /* * Report a missing mandatory parameter. diff --git a/net/socket.c b/net/socket.c index 367d5477d00f..2270b941bcc7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags); /* @@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly; * Statistics counters of the socket lists */ -static DEFINE_PER_CPU(int, sockets_in_use) = 0; +static DEFINE_PER_CPU(int, sockets_in_use); /* * Support routines. @@ -170,15 +170,6 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; * divide and look after the messy bits. */ -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - - 16 for IP, 16 for IPX, - 24 for IPv6, - about 80 for AX.25 - must be at least one bigger than - the AF_UNIX size (see net/unix/af_unix.c - :unix_mkname()). - */ - /** * move_addr_to_kernel - copy a socket address into kernel space * @uaddr: Address in user space @@ -309,9 +300,9 @@ static int init_inodecache(void) } static const struct super_operations sockfs_ops = { - .alloc_inode = sock_alloc_inode, - .destroy_inode =sock_destroy_inode, - .statfs = simple_statfs, + .alloc_inode = sock_alloc_inode, + .destroy_inode = sock_destroy_inode, + .statfs = simple_statfs, }; static int sockfs_get_sb(struct file_system_type *fs_type, @@ -411,6 +402,7 @@ int sock_map_fd(struct socket *sock, int flags) return fd; } +EXPORT_SYMBOL(sock_map_fd); static struct socket *sock_from_file(struct file *file, int *err) { @@ -422,7 +414,7 @@ static struct socket *sock_from_file(struct file *file, int *err) } /** - * sockfd_lookup - Go from a file number to its socket slot + * sockfd_lookup - Go from a file number to its socket slot * @fd: file handle * @err: pointer to an error code return * @@ -450,6 +442,7 @@ struct socket *sockfd_lookup(int fd, int *err) fput(file); return sock; } +EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { @@ -540,6 +533,7 @@ void sock_release(struct socket *sock) } sock->file = NULL; } +EXPORT_SYMBOL(sock_release); int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, union skb_shared_tx *shtx) @@ -586,6 +580,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) @@ -604,6 +599,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_sendmsg); static int ktime2ts(ktime_t kt, struct timespec *ts) { @@ -664,7 +660,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(ts), &ts); } - EXPORT_SYMBOL_GPL(__sock_recv_timestamp); inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -720,6 +715,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_recvmsg); static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -752,6 +748,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_recvmsg); static void sock_aio_dtor(struct kiocb *iocb) { @@ -774,7 +771,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct socket *sock = file->private_data; @@ -887,7 +884,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { @@ -895,7 +892,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } - EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); @@ -907,7 +903,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } - EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); @@ -919,7 +914,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } - EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, @@ -1047,6 +1041,7 @@ out_release: sock = NULL; goto out; } +EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) @@ -1147,6 +1142,7 @@ call_kill: rcu_read_unlock(); return 0; } +EXPORT_SYMBOL(sock_wake_async); static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) @@ -1265,11 +1261,13 @@ int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } +EXPORT_SYMBOL(sock_create); int sock_create_kern(int family, int type, int protocol, struct socket **res) { return __sock_create(&init_net, family, type, protocol, res, 1); } +EXPORT_SYMBOL(sock_create_kern); SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { @@ -1474,7 +1472,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + newsock = sock_alloc(); + if (!newsock) goto out_put; newsock->type = sock->type; @@ -1861,8 +1860,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1964,8 +1962,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; err = -EMSGSIZE; @@ -2191,10 +2188,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) static const unsigned char nargs[20] = { - AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5) + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) }; #undef AL @@ -2340,6 +2337,7 @@ int sock_register(const struct net_proto_family *ops) printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; } +EXPORT_SYMBOL(sock_register); /** * sock_unregister - remove a protocol handler @@ -2366,6 +2364,7 @@ void sock_unregister(int family) printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); } +EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { @@ -2395,6 +2394,10 @@ static int __init sock_init(void) netfilter_init(); #endif +#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING + skb_timestamping_init(); +#endif + return 0; } @@ -2490,13 +2493,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifc.ifc_req = NULL; uifc = compat_alloc_user_space(sizeof(struct ifconf)); } else { - size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * - sizeof (struct ifreq); + size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * + sizeof(struct ifreq); uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); ifc.ifc_len = len; ifr = ifc.ifc_req = (void __user *)(uifc + 1); ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { + for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) return -EFAULT; ifr++; @@ -2516,9 +2519,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifr = ifc.ifc_req; ifr32 = compat_ptr(ifc32.ifcbuf); for (i = 0, j = 0; - i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) + i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; + i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { + if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) return -EFAULT; ifr32++; ifr++; @@ -2567,7 +2570,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2601,9 +2604,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, &kifr); - set_fs (old_fs); + set_fs(old_fs); return err; case SIOCBONDSLAVEINFOQUERY: @@ -2710,9 +2713,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, (void __user *)&ifr); - set_fs (old_fs); + set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@ -2734,7 +2737,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2750,20 +2753,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif } struct rtentry32 { - u32 rt_pad1; + u32 rt_pad1; struct sockaddr rt_dst; /* target address */ struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ unsigned short rt_irtt; /* Initial RTT */ }; @@ -2793,29 +2796,29 @@ static int routing_ioctl(struct net *net, struct socket *sock, if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), + ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3 * sizeof(struct in6_addr)); - ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); r = (void *) &r6; } else { /* ipv4 */ struct rtentry32 __user *ur4 = argp; - ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), + ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3 * sizeof(struct sockaddr)); - ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); - ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); - ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); - ret |= __get_user (r4.rt_window, &(ur4->rt_window)); - ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); - ret |= __get_user (rtdev, &(ur4->rt_dev)); + ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); + ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); + ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); + ret |= __get_user(r4.rt_window, &(ur4->rt_window)); + ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); + ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { - ret |= copy_from_user (devname, compat_ptr(rtdev), 15); + ret |= copy_from_user(devname, compat_ptr(rtdev), 15); r4.rt_dev = devname; devname[15] = 0; } else r4.rt_dev = NULL; @@ -2828,9 +2831,9 @@ static int routing_ioctl(struct net *net, struct socket *sock, goto out; } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs (old_fs); + set_fs(old_fs); out: return ret; @@ -2993,11 +2996,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } +EXPORT_SYMBOL(kernel_bind); int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } +EXPORT_SYMBOL(kernel_listen); int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { @@ -3022,24 +3027,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) done: return err; } +EXPORT_SYMBOL(kernel_accept); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { return sock->ops->connect(sock, addr, addrlen, flags); } +EXPORT_SYMBOL(kernel_connect); int kernel_getsockname(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 0); } +EXPORT_SYMBOL(kernel_getsockname); int kernel_getpeername(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 1); } +EXPORT_SYMBOL(kernel_getpeername); int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) @@ -3056,6 +3065,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_getsockopt); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) @@ -3072,6 +3082,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_setsockopt); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) @@ -3083,6 +3094,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, return sock_no_sendpage(sock, page, offset, size, flags); } +EXPORT_SYMBOL(kernel_sendpage); int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) { @@ -3095,33 +3107,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) return err; } +EXPORT_SYMBOL(kernel_sock_ioctl); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } - -EXPORT_SYMBOL(sock_create); -EXPORT_SYMBOL(sock_create_kern); -EXPORT_SYMBOL(sock_create_lite); -EXPORT_SYMBOL(sock_map_fd); -EXPORT_SYMBOL(sock_recvmsg); -EXPORT_SYMBOL(sock_register); -EXPORT_SYMBOL(sock_release); -EXPORT_SYMBOL(sock_sendmsg); -EXPORT_SYMBOL(sock_unregister); -EXPORT_SYMBOL(sock_wake_async); -EXPORT_SYMBOL(sockfd_lookup); -EXPORT_SYMBOL(kernel_sendmsg); -EXPORT_SYMBOL(kernel_recvmsg); -EXPORT_SYMBOL(kernel_bind); -EXPORT_SYMBOL(kernel_listen); -EXPORT_SYMBOL(kernel_accept); -EXPORT_SYMBOL(kernel_connect); -EXPORT_SYMBOL(kernel_getsockname); -EXPORT_SYMBOL(kernel_getpeername); -EXPORT_SYMBOL(kernel_getsockopt); -EXPORT_SYMBOL(kernel_setsockopt); -EXPORT_SYMBOL(kernel_sendpage); -EXPORT_SYMBOL(kernel_sock_ioctl); EXPORT_SYMBOL(kernel_sock_shutdown); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 95721426296d..880d0de3f50f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -321,7 +321,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Run memory cache shrinker. */ static int -rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(free); int res; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fef2cc5e9d2b..4414a18c63b4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net, return s; } -static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) +static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; struct hlist_node *node; @@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; - if (!net_eq(sock_net(s), net)) - continue; - if (dentry && dentry->d_inode == i) { sock_hold(s); goto found; @@ -450,11 +447,31 @@ static int unix_release_sock(struct sock *sk, int embrion) return 0; } +static void init_peercred(struct sock *sk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(task_tgid(current)); + sk->sk_peer_cred = get_current_cred(); +} + +static void copy_peercred(struct sock *sk, struct sock *peersk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); + sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); +} + static int unix_listen(struct socket *sock, int backlog) { int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); + struct pid *old_pid = NULL; + const struct cred *old_cred = NULL; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) @@ -470,12 +487,14 @@ static int unix_listen(struct socket *sock, int backlog) sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ - sk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid); + init_peercred(sk); err = 0; out_unlock: unix_state_unlock(sk); + put_pid(old_pid); + if (old_cred) + put_cred(old_cred); out: return err; } @@ -736,7 +755,7 @@ static struct sock *unix_find_other(struct net *net, err = -ECONNREFUSED; if (!S_ISSOCK(inode->i_mode)) goto put_fail; - u = unix_find_socket_byinode(net, inode); + u = unix_find_socket_byinode(inode); if (!u) goto put_fail; @@ -1140,8 +1159,7 @@ restart: unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; - newsk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); + init_peercred(newsk); newu = unix_sk(newsk); newsk->sk_wq = &newu->peer_wq; otheru = unix_sk(other); @@ -1157,7 +1175,7 @@ restart: } /* Set credentials */ - sk->sk_peercred = other->sk_peercred; + copy_peercred(sk, other); sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; @@ -1199,10 +1217,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) sock_hold(skb); unix_peer(ska) = skb; unix_peer(skb) = ska; - ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid); - ska->sk_peercred.uid = skb->sk_peercred.uid; - ska->sk_peercred.gid = skb->sk_peercred.gid; + init_peercred(ska); + init_peercred(skb); if (ska->sk_type != SOCK_DGRAM) { ska->sk_state = TCP_ESTABLISHED; @@ -1297,18 +1313,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) int i; scm->fp = UNIXCB(skb).fp; - skb->destructor = sock_wfree; UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->fp[i]); } -static void unix_destruct_fds(struct sk_buff *skb) +static void unix_destruct_scm(struct sk_buff *skb) { struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); - unix_detach_fds(&scm, skb); + scm.pid = UNIXCB(skb).pid; + scm.cred = UNIXCB(skb).cred; + if (UNIXCB(skb).fp) + unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ /* So fscking what? fput() had been SMP-safe since the last Summer */ @@ -1331,10 +1349,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count-1; i >= 0; i--) unix_inflight(scm->fp->fp[i]); - skb->destructor = unix_destruct_fds; return 0; } +static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) +{ + int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); + UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).fp = NULL; + if (scm->fp && send_fds) + err = unix_attach_fds(scm, skb); + + skb->destructor = unix_destruct_scm; + return err; +} + /* * Send AF_UNIX data. */ @@ -1391,12 +1421,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) { - err = unix_attach_fds(siocb->scm, skb); - if (err) - goto out_free; - } + err = unix_scm_to_skb(siocb->scm, skb, true); + if (err) + goto out_free; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1566,16 +1593,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, */ size = min_t(int, size, skb_tailroom(skb)); - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); + /* Only send the fds in the first buffer */ - if (siocb->scm->fp && !fds_sent) { - err = unix_attach_fds(siocb->scm, skb); - if (err) { - kfree_skb(skb); - goto out_err; - } - fds_sent = true; + err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); + if (err) { + kfree_skb(skb); + goto out_err; } + fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err) { @@ -1692,7 +1717,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1841,14 +1866,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ - if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, - sizeof(siocb->scm->creds)) != 0) { + if ((UNIXCB(skb).pid != siocb->scm->pid) || + (UNIXCB(skb).cred != siocb->scm->cred)) { skb_queue_head(&sk->sk_receive_queue, skb); break; } } else { /* Copy credentials */ - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; } @@ -1881,7 +1906,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, break; } - kfree_skb(skb); + consume_skb(skb); if (siocb->scm->fp) break; diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 258daa80ad92..2bf23406637a 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -48,7 +48,7 @@ #include <linux/kernel.h> #include <linux/module.h> /* support for loadable modules */ #include <linux/slab.h> /* kmalloc(), kfree() */ -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/mm.h> #include <linux/string.h> /* inline mem*, str* functions */ @@ -71,6 +71,7 @@ * WAN device IOCTL handlers */ +static DEFINE_MUTEX(wanrouter_mutex); static int wanrouter_device_setup(struct wan_device *wandev, wandev_conf_t __user *u_conf); static int wanrouter_device_stat(struct wan_device *wandev, @@ -376,7 +377,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wandev->magic != ROUTER_MAGIC) return -EINVAL; - lock_kernel(); + mutex_lock(&wanrouter_mutex); switch (cmd) { case ROUTER_SETUP: err = wanrouter_device_setup(wandev, data); @@ -408,7 +409,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = wandev->ioctl(wandev, cmd, arg); else err = -EINVAL; } - unlock_kernel(); + mutex_unlock(&wanrouter_mutex); return err; } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index c44d96b3a437..11f25c7a7a05 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -27,7 +27,7 @@ #include <linux/module.h> #include <linux/wanrouter.h> /* WAN router API definitions */ #include <linux/seq_file.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <net/net_namespace.h> #include <asm/io.h> @@ -66,6 +66,7 @@ * /proc/net/router */ +static DEFINE_MUTEX(config_mutex); static struct proc_dir_entry *proc_router; /* Strings */ @@ -85,7 +86,7 @@ static void *r_start(struct seq_file *m, loff_t *pos) struct wan_device *wandev; loff_t l = *pos; - lock_kernel(); + mutex_lock(&config_mutex); if (!l--) return SEQ_START_TOKEN; for (wandev = wanrouter_router_devlist; l-- && wandev; @@ -104,7 +105,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) static void r_stop(struct seq_file *m, void *v) __releases(kernel_lock) { - unlock_kernel(); + mutex_unlock(&config_mutex); } static int config_show(struct seq_file *m, void *v) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index b01a6f6397d7..d0c92dddb26b 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -35,8 +35,9 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev, if (!ht_cap->ht_supported) return NULL; - if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || - ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) + if (channel_type != NL80211_CHAN_HT20 && + (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || + ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT)) return NULL; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 37d0e0ab4432..541e2fff5e9c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -472,24 +472,22 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); + mutex_lock(&cfg80211_mutex); + res = device_add(&rdev->wiphy.dev); if (res) - return res; + goto out_unlock; res = rfkill_register(rdev->rfkill); if (res) goto out_rm_dev; - mutex_lock(&cfg80211_mutex); - /* set up regulatory info */ wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); list_add_rcu(&rdev->list, &cfg80211_rdev_list); cfg80211_rdev_list_generation++; - mutex_unlock(&cfg80211_mutex); - /* add to debugfs */ rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy), @@ -509,11 +507,15 @@ int wiphy_register(struct wiphy *wiphy) } cfg80211_debugfs_rdev_add(rdev); + mutex_unlock(&cfg80211_mutex); return 0; - out_rm_dev: +out_rm_dev: device_del(&rdev->wiphy.dev); + +out_unlock: + mutex_unlock(&cfg80211_mutex); return res; } EXPORT_SYMBOL(wiphy_register); @@ -894,7 +896,7 @@ out_fail_pernet: } subsys_initcall(cfg80211_init); -static void cfg80211_exit(void) +static void __exit cfg80211_exit(void) { debugfs_remove(ieee80211_debugfs_dir); nl80211_exit(); @@ -905,3 +907,52 @@ static void cfg80211_exit(void) destroy_workqueue(cfg80211_wq); } module_exit(cfg80211_exit); + +static int ___wiphy_printk(const char *level, const struct wiphy *wiphy, + struct va_format *vaf) +{ + if (!wiphy) + return printk("%s(NULL wiphy *): %pV", level, vaf); + + return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf); +} + +int __wiphy_printk(const char *level, const struct wiphy *wiphy, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int r; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + r = ___wiphy_printk(level, wiphy, &vaf); + va_end(args); + + return r; +} +EXPORT_SYMBOL(__wiphy_printk); + +#define define_wiphy_printk_level(func, kern_level) \ +int func(const struct wiphy *wiphy, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + int r; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + r = ___wiphy_printk(kern_level, wiphy, &vaf); \ + va_end(args); \ + \ + return r; \ +} \ +EXPORT_SYMBOL(func); + +define_wiphy_printk_level(wiphy_debug, KERN_DEBUG); diff --git a/net/wireless/core.h b/net/wireless/core.h index ae930acf75e9..63d57ae399c3 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -339,6 +339,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie); /* SME */ diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index 3cc9e69880a8..53c143f5e770 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -21,6 +21,7 @@ BEGIN { print "" print "#include <linux/nl80211.h>" print "#include <net/cfg80211.h>" + print "#include \"regdb.h\"" print "" regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n" } diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index adcabba02e20..27a8ce9343c3 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -247,8 +247,10 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return 0; - if (wdev->wext.keys) + if (wdev->wext.keys) { wdev->wext.keys->def = wdev->wext.default_key; + wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; + } wdev->wext.ibss.privacy = wdev->wext.default_key != -1; diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index b7fa31d5fd13..dacb3b4b1bdb 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -467,7 +467,6 @@ static struct lib80211_crypto_ops lib80211_crypt_ccmp = { .name = "CCMP", .init = lib80211_ccmp_init, .deinit = lib80211_ccmp_deinit, - .build_iv = lib80211_ccmp_hdr, .encrypt_mpdu = lib80211_ccmp_encrypt, .decrypt_mpdu = lib80211_ccmp_decrypt, .encrypt_msdu = NULL, diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 8cbdb32ff316..0fe40510e2cb 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -578,7 +578,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) } if (ieee80211_is_data_qos(hdr11->frame_control)) { - hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11)) + hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11))) & IEEE80211_QOS_CTL_TID_MASK; } else hdr[12] = 0; /* priority */ @@ -757,7 +757,6 @@ static struct lib80211_crypto_ops lib80211_crypt_tkip = { .name = "TKIP", .init = lib80211_tkip_init, .deinit = lib80211_tkip_deinit, - .build_iv = lib80211_tkip_hdr, .encrypt_mpdu = lib80211_tkip_encrypt, .decrypt_mpdu = lib80211_tkip_decrypt, .encrypt_msdu = lib80211_michael_mic_add, diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c index 6d41e05ca33b..e2e88878ba35 100644 --- a/net/wireless/lib80211_crypt_wep.c +++ b/net/wireless/lib80211_crypt_wep.c @@ -269,7 +269,6 @@ static struct lib80211_crypto_ops lib80211_crypt_wep = { .name = "WEP", .init = lib80211_wep_init, .deinit = lib80211_wep_deinit, - .build_iv = lib80211_wep_build_iv, .encrypt_mpdu = lib80211_wep_encrypt, .decrypt_mpdu = lib80211_wep_decrypt, .encrypt_msdu = NULL, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 48ead6f0426d..e74a1a2119d3 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -44,10 +44,10 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) } } - WARN_ON(!done); - - nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); - cfg80211_sme_rx_auth(dev, buf, len); + if (done) { + nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); + cfg80211_sme_rx_auth(dev, buf, len); + } wdev_unlock(wdev); } @@ -827,6 +827,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -845,8 +846,9 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, if (!wdev->current_bss || memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, ETH_ALEN) != 0 || - memcmp(wdev->current_bss->pub.bssid, mgmt->da, - ETH_ALEN) != 0) + (wdev->iftype == NL80211_IFTYPE_STATION && + memcmp(wdev->current_bss->pub.bssid, mgmt->da, + ETH_ALEN) != 0)) return -ENOTCONN; } @@ -855,7 +857,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, /* Transmit the Action frame as requested by user space */ return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, - buf, len, cookie); + channel_type_valid, buf, len, cookie); } bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index db71150b8040..37902a54e9c1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -153,6 +153,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, + + [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, }; /* policy for the attributes */ @@ -869,6 +872,34 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { + enum nl80211_tx_power_setting type; + int idx, mbm = 0; + + if (!rdev->ops->set_tx_power) { + result = -EOPNOTSUPP; + goto bad_res; + } + + idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; + type = nla_get_u32(info->attrs[idx]); + + if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && + (type != NL80211_TX_POWER_AUTOMATIC)) { + result = -EINVAL; + goto bad_res; + } + + if (type != NL80211_TX_POWER_AUTOMATIC) { + idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; + mbm = nla_get_u32(info->attrs[idx]); + } + + result = rdev->ops->set_tx_power(&rdev->wiphy, type, mbm); + if (result) + goto bad_res; + } + changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { @@ -1107,7 +1138,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { if (!use_4addr) { - if (netdev && netdev->br_port) + if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT)) return -EBUSY; return 0; } @@ -2738,6 +2769,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, nla_put_failure: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); err = -EMSGSIZE; out: /* Cleanup */ @@ -2929,6 +2961,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_put_failure: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); err = -EMSGSIZE; out: mutex_unlock(&cfg80211_mutex); @@ -3955,6 +3988,55 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + u8 *rates = + nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + int n_rates = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + struct ieee80211_supported_band *sband = + wiphy->bands[ibss.channel->band]; + int i, j; + + if (n_rates == 0) { + err = -EINVAL; + goto out; + } + + for (i = 0; i < n_rates; i++) { + int rate = (rates[i] & 0x7f) * 5; + bool found = false; + + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].bitrate == rate) { + found = true; + ibss.basic_rates |= BIT(j); + break; + } + } + if (!found) { + err = -EINVAL; + goto out; + } + } + } else { + /* + * If no rates were explicitly configured, + * use the mandatory rate set for 11b or + * 11a for maximum compatibility. + */ + struct ieee80211_supported_band *sband = + wiphy->bands[ibss.channel->band]; + int j; + u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ? + IEEE80211_RATE_MANDATORY_A : + IEEE80211_RATE_MANDATORY_B; + + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].flags & flag) + ibss.basic_rates |= BIT(j); + } + } + err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); out: @@ -4653,7 +4735,8 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { err = -EOPNOTSUPP; goto out; } @@ -4681,6 +4764,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) struct net_device *dev; struct ieee80211_channel *chan; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + bool channel_type_valid = false; u32 freq; int err; void *hdr; @@ -4702,7 +4786,8 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) goto out; } - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { err = -EOPNOTSUPP; goto out; } @@ -4722,6 +4807,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out; } + channel_type_valid = true; } freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); @@ -4745,6 +4831,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) goto free_msg; } err = cfg80211_mlme_action(rdev, dev, chan, channel_type, + channel_type_valid, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), &cookie); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8f0d97dd3109..f180db0de66c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -67,20 +67,12 @@ static struct platform_device *reg_pdev; const struct ieee80211_regdomain *cfg80211_regdomain; /* - * We use this as a place for the rd structure built from the - * last parsed country IE to rest until CRDA gets back to us with - * what it thinks should apply for the same country - */ -static const struct ieee80211_regdomain *country_ie_regdomain; - -/* * Protects static reg.c components: * - cfg80211_world_regdom * - cfg80211_regdom - * - country_ie_regdomain * - last_request */ -DEFINE_MUTEX(reg_mutex); +static DEFINE_MUTEX(reg_mutex); #define assert_reg_lock() WARN_ON(!mutex_is_locked(®_mutex)) /* Used to queue up regulatory hints */ @@ -275,25 +267,6 @@ static bool is_user_regdom_saved(void) return true; } -/** - * country_ie_integrity_changes - tells us if the country IE has changed - * @checksum: checksum of country IE of fields we are interested in - * - * If the country IE has not changed you can ignore it safely. This is - * useful to determine if two devices are seeing two different country IEs - * even on the same alpha2. Note that this will return false if no IE has - * been set on the wireless core yet. - */ -static bool country_ie_integrity_changes(u32 checksum) -{ - /* If no IE has been set then the checksum doesn't change */ - if (unlikely(!last_request->country_ie_checksum)) - return false; - if (unlikely(last_request->country_ie_checksum != checksum)) - return true; - return false; -} - static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, const struct ieee80211_regdomain *src_regd) { @@ -506,471 +479,6 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, } /* - * This is a work around for sanity checking ieee80211_channel_to_frequency()'s - * work. ieee80211_channel_to_frequency() can for example currently provide a - * 2 GHz channel when in fact a 5 GHz channel was desired. An example would be - * an AP providing channel 8 on a country IE triplet when it sent this on the - * 5 GHz band, that channel is designed to be channel 8 on 5 GHz, not a 2 GHz - * channel. - * - * This can be removed once ieee80211_channel_to_frequency() takes in a band. - */ -static bool chan_in_band(int chan, enum ieee80211_band band) -{ - int center_freq = ieee80211_channel_to_frequency(chan); - - switch (band) { - case IEEE80211_BAND_2GHZ: - if (center_freq <= 2484) - return true; - return false; - case IEEE80211_BAND_5GHZ: - if (center_freq >= 5005) - return true; - return false; - default: - return false; - } -} - -/* - * Some APs may send a country IE triplet for each channel they - * support and while this is completely overkill and silly we still - * need to support it. We avoid making a single rule for each channel - * though and to help us with this we use this helper to find the - * actual subband end channel. These type of country IE triplet - * scenerios are handled then, all yielding two regulaotry rules from - * parsing a country IE: - * - * [1] - * [2] - * [36] - * [40] - * - * [1] - * [2-4] - * [5-12] - * [36] - * [40-44] - * - * [1-4] - * [5-7] - * [36-44] - * [48-64] - * - * [36-36] - * [40-40] - * [44-44] - * [48-48] - * [52-52] - * [56-56] - * [60-60] - * [64-64] - * [100-100] - * [104-104] - * [108-108] - * [112-112] - * [116-116] - * [120-120] - * [124-124] - * [128-128] - * [132-132] - * [136-136] - * [140-140] - * - * Returns 0 if the IE has been found to be invalid in the middle - * somewhere. - */ -static int max_subband_chan(enum ieee80211_band band, - int orig_cur_chan, - int orig_end_channel, - s8 orig_max_power, - u8 **country_ie, - u8 *country_ie_len) -{ - u8 *triplets_start = *country_ie; - u8 len_at_triplet = *country_ie_len; - int end_subband_chan = orig_end_channel; - - /* - * We'll deal with padding for the caller unless - * its not immediate and we don't process any channels - */ - if (*country_ie_len == 1) { - *country_ie += 1; - *country_ie_len -= 1; - return orig_end_channel; - } - - /* Move to the next triplet and then start search */ - *country_ie += 3; - *country_ie_len -= 3; - - if (!chan_in_band(orig_cur_chan, band)) - return 0; - - while (*country_ie_len >= 3) { - int end_channel = 0; - struct ieee80211_country_ie_triplet *triplet = - (struct ieee80211_country_ie_triplet *) *country_ie; - int cur_channel = 0, next_expected_chan; - - /* means last triplet is completely unrelated to this one */ - if (triplet->ext.reg_extension_id >= - IEEE80211_COUNTRY_EXTENSION_ID) { - *country_ie -= 3; - *country_ie_len += 3; - break; - } - - if (triplet->chans.first_channel == 0) { - *country_ie += 1; - *country_ie_len -= 1; - if (*country_ie_len != 0) - return 0; - break; - } - - if (triplet->chans.num_channels == 0) - return 0; - - /* Monitonically increasing channel order */ - if (triplet->chans.first_channel <= end_subband_chan) - return 0; - - if (!chan_in_band(triplet->chans.first_channel, band)) - return 0; - - /* 2 GHz */ - if (triplet->chans.first_channel <= 14) { - end_channel = triplet->chans.first_channel + - triplet->chans.num_channels - 1; - } - else { - end_channel = triplet->chans.first_channel + - (4 * (triplet->chans.num_channels - 1)); - } - - if (!chan_in_band(end_channel, band)) - return 0; - - if (orig_max_power != triplet->chans.max_power) { - *country_ie -= 3; - *country_ie_len += 3; - break; - } - - cur_channel = triplet->chans.first_channel; - - /* The key is finding the right next expected channel */ - if (band == IEEE80211_BAND_2GHZ) - next_expected_chan = end_subband_chan + 1; - else - next_expected_chan = end_subband_chan + 4; - - if (cur_channel != next_expected_chan) { - *country_ie -= 3; - *country_ie_len += 3; - break; - } - - end_subband_chan = end_channel; - - /* Move to the next one */ - *country_ie += 3; - *country_ie_len -= 3; - - /* - * Padding needs to be dealt with if we processed - * some channels. - */ - if (*country_ie_len == 1) { - *country_ie += 1; - *country_ie_len -= 1; - break; - } - - /* If seen, the IE is invalid */ - if (*country_ie_len == 2) - return 0; - } - - if (end_subband_chan == orig_end_channel) { - *country_ie = triplets_start; - *country_ie_len = len_at_triplet; - return orig_end_channel; - } - - return end_subband_chan; -} - -/* - * Converts a country IE to a regulatory domain. A regulatory domain - * structure has a lot of information which the IE doesn't yet have, - * so for the other values we use upper max values as we will intersect - * with our userspace regulatory agent to get lower bounds. - */ -static struct ieee80211_regdomain *country_ie_2_rd( - enum ieee80211_band band, - u8 *country_ie, - u8 country_ie_len, - u32 *checksum) -{ - struct ieee80211_regdomain *rd = NULL; - unsigned int i = 0; - char alpha2[2]; - u32 flags = 0; - u32 num_rules = 0, size_of_regd = 0; - u8 *triplets_start = NULL; - u8 len_at_triplet = 0; - /* the last channel we have registered in a subband (triplet) */ - int last_sub_max_channel = 0; - - *checksum = 0xDEADBEEF; - - /* Country IE requirements */ - BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN || - country_ie_len & 0x01); - - alpha2[0] = country_ie[0]; - alpha2[1] = country_ie[1]; - - /* - * Third octet can be: - * 'I' - Indoor - * 'O' - Outdoor - * - * anything else we assume is no restrictions - */ - if (country_ie[2] == 'I') - flags = NL80211_RRF_NO_OUTDOOR; - else if (country_ie[2] == 'O') - flags = NL80211_RRF_NO_INDOOR; - - country_ie += 3; - country_ie_len -= 3; - - triplets_start = country_ie; - len_at_triplet = country_ie_len; - - *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); - - /* - * We need to build a reg rule for each triplet, but first we must - * calculate the number of reg rules we will need. We will need one - * for each channel subband - */ - while (country_ie_len >= 3) { - int end_channel = 0; - struct ieee80211_country_ie_triplet *triplet = - (struct ieee80211_country_ie_triplet *) country_ie; - int cur_sub_max_channel = 0, cur_channel = 0; - - if (triplet->ext.reg_extension_id >= - IEEE80211_COUNTRY_EXTENSION_ID) { - country_ie += 3; - country_ie_len -= 3; - continue; - } - - /* - * APs can add padding to make length divisible - * by two, required by the spec. - */ - if (triplet->chans.first_channel == 0) { - country_ie++; - country_ie_len--; - /* This is expected to be at the very end only */ - if (country_ie_len != 0) - return NULL; - break; - } - - if (triplet->chans.num_channels == 0) - return NULL; - - if (!chan_in_band(triplet->chans.first_channel, band)) - return NULL; - - /* 2 GHz */ - if (band == IEEE80211_BAND_2GHZ) - end_channel = triplet->chans.first_channel + - triplet->chans.num_channels - 1; - else - /* - * 5 GHz -- For example in country IEs if the first - * channel given is 36 and the number of channels is 4 - * then the individual channel numbers defined for the - * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 - * and not 36, 37, 38, 39. - * - * See: http://tinyurl.com/11d-clarification - */ - end_channel = triplet->chans.first_channel + - (4 * (triplet->chans.num_channels - 1)); - - cur_channel = triplet->chans.first_channel; - - /* - * Enhancement for APs that send a triplet for every channel - * or for whatever reason sends triplets with multiple channels - * separated when in fact they should be together. - */ - end_channel = max_subband_chan(band, - cur_channel, - end_channel, - triplet->chans.max_power, - &country_ie, - &country_ie_len); - if (!end_channel) - return NULL; - - if (!chan_in_band(end_channel, band)) - return NULL; - - cur_sub_max_channel = end_channel; - - /* Basic sanity check */ - if (cur_sub_max_channel < cur_channel) - return NULL; - - /* - * Do not allow overlapping channels. Also channels - * passed in each subband must be monotonically - * increasing - */ - if (last_sub_max_channel) { - if (cur_channel <= last_sub_max_channel) - return NULL; - if (cur_sub_max_channel <= last_sub_max_channel) - return NULL; - } - - /* - * When dot11RegulatoryClassesRequired is supported - * we can throw ext triplets as part of this soup, - * for now we don't care when those change as we - * don't support them - */ - *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | - ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | - ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); - - last_sub_max_channel = cur_sub_max_channel; - - num_rules++; - - if (country_ie_len >= 3) { - country_ie += 3; - country_ie_len -= 3; - } - - /* - * Note: this is not a IEEE requirement but - * simply a memory requirement - */ - if (num_rules > NL80211_MAX_SUPP_REG_RULES) - return NULL; - } - - country_ie = triplets_start; - country_ie_len = len_at_triplet; - - size_of_regd = sizeof(struct ieee80211_regdomain) + - (num_rules * sizeof(struct ieee80211_reg_rule)); - - rd = kzalloc(size_of_regd, GFP_KERNEL); - if (!rd) - return NULL; - - rd->n_reg_rules = num_rules; - rd->alpha2[0] = alpha2[0]; - rd->alpha2[1] = alpha2[1]; - - /* This time around we fill in the rd */ - while (country_ie_len >= 3) { - int end_channel = 0; - struct ieee80211_country_ie_triplet *triplet = - (struct ieee80211_country_ie_triplet *) country_ie; - struct ieee80211_reg_rule *reg_rule = NULL; - struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_power_rule *power_rule = NULL; - - /* - * Must parse if dot11RegulatoryClassesRequired is true, - * we don't support this yet - */ - if (triplet->ext.reg_extension_id >= - IEEE80211_COUNTRY_EXTENSION_ID) { - country_ie += 3; - country_ie_len -= 3; - continue; - } - - if (triplet->chans.first_channel == 0) { - country_ie++; - country_ie_len--; - break; - } - - reg_rule = &rd->reg_rules[i]; - freq_range = ®_rule->freq_range; - power_rule = ®_rule->power_rule; - - reg_rule->flags = flags; - - /* 2 GHz */ - if (band == IEEE80211_BAND_2GHZ) - end_channel = triplet->chans.first_channel + - triplet->chans.num_channels -1; - else - end_channel = triplet->chans.first_channel + - (4 * (triplet->chans.num_channels - 1)); - - end_channel = max_subband_chan(band, - triplet->chans.first_channel, - end_channel, - triplet->chans.max_power, - &country_ie, - &country_ie_len); - - /* - * The +10 is since the regulatory domain expects - * the actual band edge, not the center of freq for - * its start and end freqs, assuming 20 MHz bandwidth on - * the channels passed - */ - freq_range->start_freq_khz = - MHZ_TO_KHZ(ieee80211_channel_to_frequency( - triplet->chans.first_channel) - 10); - freq_range->end_freq_khz = - MHZ_TO_KHZ(ieee80211_channel_to_frequency( - end_channel) + 10); - - /* - * These are large arbitrary values we use to intersect later. - * Increment this if we ever support >= 40 MHz channels - * in IEEE 802.11 - */ - freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); - power_rule->max_antenna_gain = DBI_TO_MBI(100); - power_rule->max_eirp = DBM_TO_MBM(triplet->chans.max_power); - - i++; - - if (country_ie_len >= 3) { - country_ie += 3; - country_ie_len -= 3; - } - - BUG_ON(i > NL80211_MAX_SUPP_REG_RULES); - } - - return rd; -} - - -/* * Helper for regdom_intersect(), this does the real * mathematical intersection fun */ @@ -1191,7 +699,6 @@ static int freq_reg_info_regd(struct wiphy *wiphy, return -EINVAL; } -EXPORT_SYMBOL(freq_reg_info); int freq_reg_info(struct wiphy *wiphy, u32 center_freq, @@ -1205,6 +712,7 @@ int freq_reg_info(struct wiphy *wiphy, reg_rule, NULL); } +EXPORT_SYMBOL(freq_reg_info); /* * Note that right now we assume the desired channel bandwidth @@ -1243,41 +751,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, desired_bw_khz, ®_rule); - if (r) { - /* - * This means no regulatory rule was found in the country IE - * with a frequency range on the center_freq's band, since - * IEEE-802.11 allows for a country IE to have a subset of the - * regulatory information provided in a country we ignore - * disabling the channel unless at least one reg rule was - * found on the center_freq's band. For details see this - * clarification: - * - * http://tinyurl.com/11d-clarification - */ - if (r == -ERANGE && - last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { - REG_DBG_PRINT("cfg80211: Leaving channel %d MHz " - "intact on %s - no rule found in band on " - "Country IE\n", - chan->center_freq, wiphy_name(wiphy)); - } else { - /* - * In this case we know the country IE has at least one reg rule - * for the band so we respect its band definitions - */ - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) - REG_DBG_PRINT("cfg80211: Disabling " - "channel %d MHz on %s due to " - "Country IE\n", - chan->center_freq, wiphy_name(wiphy)); - flags |= IEEE80211_CHAN_DISABLED; - chan->flags = flags; - } + if (r) return; - } power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -1831,6 +1306,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) { int r = 0; struct wiphy *wiphy = NULL; + enum nl80211_reg_initiator initiator = reg_request->initiator; BUG_ON(!reg_request->alpha2); @@ -1850,7 +1326,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) /* This is required so that the orig_* parameters are saved */ if (r == -EALREADY && wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) - wiphy_update_regulatory(wiphy, reg_request->initiator); + wiphy_update_regulatory(wiphy, initiator); out: mutex_unlock(®_mutex); mutex_unlock(&cfg80211_mutex); @@ -2008,35 +1484,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -/* Caller must hold reg_mutex */ -static bool reg_same_country_ie_hint(struct wiphy *wiphy, - u32 country_ie_checksum) -{ - struct wiphy *request_wiphy; - - assert_reg_lock(); - - if (unlikely(last_request->initiator != - NL80211_REGDOM_SET_BY_COUNTRY_IE)) - return false; - - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - - if (!request_wiphy) - return false; - - if (likely(request_wiphy != wiphy)) - return !country_ie_integrity_changes(country_ie_checksum); - /* - * We should not have let these through at this point, they - * should have been picked up earlier by the first alpha2 check - * on the device - */ - if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) - return true; - return false; -} - /* * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and * therefore cannot iterate over the rdev list here. @@ -2046,9 +1493,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, u8 *country_ie, u8 country_ie_len) { - struct ieee80211_regdomain *rd = NULL; char alpha2[2]; - u32 checksum = 0; enum environment_cap env = ENVIRON_ANY; struct regulatory_request *request; @@ -2064,14 +1509,6 @@ void regulatory_hint_11d(struct wiphy *wiphy, if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) goto out; - /* - * Pending country IE processing, this can happen after we - * call CRDA and wait for a response if a beacon was received before - * we were able to process the last regulatory_hint_11d() call - */ - if (country_ie_regdomain) - goto out; - alpha2[0] = country_ie[0]; alpha2[1] = country_ie[1]; @@ -2090,39 +1527,14 @@ void regulatory_hint_11d(struct wiphy *wiphy, wiphy_idx_valid(last_request->wiphy_idx))) goto out; - rd = country_ie_2_rd(band, country_ie, country_ie_len, &checksum); - if (!rd) { - REG_DBG_PRINT("cfg80211: Ignoring bogus country IE\n"); - goto out; - } - - /* - * This will not happen right now but we leave it here for the - * the future when we want to add suspend/resume support and having - * the user move to another country after doing so, or having the user - * move to another AP. Right now we just trust the first AP. - * - * If we hit this before we add this support we want to be informed of - * it as it would indicate a mistake in the current design - */ - if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum))) - goto free_rd_out; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) - goto free_rd_out; - - /* - * We keep this around for when CRDA comes back with a response so - * we can intersect with that - */ - country_ie_regdomain = rd; + goto out; request->wiphy_idx = get_wiphy_idx(wiphy); - request->alpha2[0] = rd->alpha2[0]; - request->alpha2[1] = rd->alpha2[1]; + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; - request->country_ie_checksum = checksum; request->country_ie_env = env; mutex_unlock(®_mutex); @@ -2131,8 +1543,6 @@ void regulatory_hint_11d(struct wiphy *wiphy, return; -free_rd_out: - kfree(rd); out: mutex_unlock(®_mutex); } @@ -2383,33 +1793,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } -#ifdef CONFIG_CFG80211_REG_DEBUG -static void reg_country_ie_process_debug( - const struct ieee80211_regdomain *rd, - const struct ieee80211_regdomain *country_ie_regdomain, - const struct ieee80211_regdomain *intersected_rd) -{ - printk(KERN_DEBUG "cfg80211: Received country IE:\n"); - print_regdomain_info(country_ie_regdomain); - printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n"); - print_regdomain_info(rd); - if (intersected_rd) { - printk(KERN_DEBUG "cfg80211: We intersect both of these " - "and get:\n"); - print_regdomain_info(intersected_rd); - return; - } - printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); -} -#else -static inline void reg_country_ie_process_debug( - const struct ieee80211_regdomain *rd, - const struct ieee80211_regdomain *country_ie_regdomain, - const struct ieee80211_regdomain *intersected_rd) -{ -} -#endif - /* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { @@ -2521,34 +1904,6 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - /* - * Country IE requests are handled a bit differently, we intersect - * the country IE rd with what CRDA believes that country should have - */ - - /* - * Userspace could have sent two replies with only - * one kernel request. By the second reply we would have - * already processed and consumed the country_ie_regdomain. - */ - if (!country_ie_regdomain) - return -EALREADY; - BUG_ON(rd == country_ie_regdomain); - - /* - * Intersect what CRDA returned and our what we - * had built from the Country IE received - */ - - intersected_rd = regdom_intersect(rd, country_ie_regdomain); - - reg_country_ie_process_debug(rd, - country_ie_regdomain, - intersected_rd); - - kfree(country_ie_regdomain); - country_ie_regdomain = NULL; - if (!intersected_rd) return -EINVAL; @@ -2630,7 +1985,7 @@ out: mutex_unlock(®_mutex); } -int regulatory_init(void) +int __init regulatory_init(void) { int err = 0; @@ -2676,7 +2031,7 @@ int regulatory_init(void) return 0; } -void regulatory_exit(void) +void /* __init_or_exit */ regulatory_exit(void) { struct regulatory_request *reg_request, *tmp; struct reg_beacon *reg_beacon, *btmp; @@ -2688,9 +2043,6 @@ void regulatory_exit(void) reset_regdomains(); - kfree(country_ie_regdomain); - country_ie_regdomain = NULL; - kfree(last_request); platform_device_unregister(reg_pdev); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index b26224a9f3bc..c4695d07af23 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -10,7 +10,7 @@ int regulatory_hint_user(const char *alpha2); void reg_device_remove(struct wiphy *wiphy); -int regulatory_init(void); +int __init regulatory_init(void); void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 58401d246bda..5ca8c7180141 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -275,6 +275,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, { struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; + unsigned long now = jiffies; spin_lock_bh(&dev->bss_lock); @@ -283,6 +284,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, continue; if (channel && bss->pub.channel != channel) continue; + /* Don't get expired BSS structs */ + if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) && + !atomic_read(&bss->hold)) + continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { res = bss; kref_get(&res->ref); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 72222f0074db..a8c2d6b877ae 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -35,7 +35,7 @@ struct cfg80211_conn { bool auto_auth, prev_bssid_valid; }; -bool cfg80211_is_all_idle(void) +static bool cfg80211_is_all_idle(void) { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; diff --git a/net/wireless/util.c b/net/wireless/util.c index 3416373a9c0c..0c8a1e8b7690 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -770,8 +770,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; /* if it's part of a bridge, reject changing type to station/ibss */ - if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC || - ntype == NL80211_IFTYPE_STATION)) + if ((dev->priv_flags & IFF_BRIDGE_PORT) && + (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION)) return -EBUSY; if (ntype != otype) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 96342993cf93..bb5e0a5ecfa1 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -829,7 +829,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - enum tx_power_setting type; + enum nl80211_tx_power_setting type; int dbm = 0; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) @@ -852,7 +852,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, if (data->txpower.value < 0) return -EINVAL; dbm = data->txpower.value; - type = TX_POWER_FIXED; + type = NL80211_TX_POWER_FIXED; /* TODO: do regulatory check! */ } else { /* @@ -860,10 +860,10 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, * passed in from userland. */ if (data->txpower.value < 0) { - type = TX_POWER_AUTOMATIC; + type = NL80211_TX_POWER_AUTOMATIC; } else { dbm = data->txpower.value; - type = TX_POWER_LIMITED; + type = NL80211_TX_POWER_LIMITED; } } } else { @@ -872,7 +872,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, return 0; } - return rdev->ops->set_tx_power(wdev->wiphy, type, dbm); + return rdev->ops->set_tx_power(wdev->wiphy, type, DBM_TO_MBM(dbm)); } EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower); @@ -1471,6 +1471,7 @@ int cfg80211_wext_siwpmksa(struct net_device *dev, return -EOPNOTSUPP; } } +EXPORT_SYMBOL_GPL(cfg80211_wext_siwpmksa); static const iw_handler cfg80211_handlers[] = { [IW_IOCTL_IDX(SIOCGIWNAME)] = (iw_handler) cfg80211_wext_giwname, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4bf27d901333..2b3ed7ad4933 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); - if (err < 0) { - if (err != -EAGAIN) + if (err <= 0) { + if (err != 0 && err != -EAGAIN) XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); return ERR_PTR(err); } @@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, goto make_dummy_bundle; dst_hold(&xdst->u.dst); return oldflo; + } else if (new_xdst == NULL) { + num_xfrms = 0; + if (oldflo == NULL) + goto make_dummy_bundle; + xdst->num_xfrms = 0; + dst_hold(&xdst->u.dst); + return oldflo; } /* Kill the previous bundle */ @@ -1760,6 +1767,10 @@ restart: xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); goto dropdst; + } else if (xdst == NULL) { + num_xfrms = 0; + drop_pols = num_pols; + goto no_transform; } spin_lock_bh(&xfrm_policy_sk_bundle_lock); @@ -2300,7 +2311,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 0; if (xdst->xfrm_genid != dst->xfrm->genid) return 0; - if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) + if (xdst->num_pols > 0 && + xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; if (strict && fl && @@ -2480,7 +2492,8 @@ static int __net_init xfrm_statistics_init(struct net *net) int rv; if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, - sizeof(struct linux_xfrm_mib)) < 0) + sizeof(struct linux_xfrm_mib), + __alignof__(struct linux_xfrm_mib)) < 0) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) |