diff options
Diffstat (limited to 'net')
185 files changed, 6535 insertions, 3395 deletions
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 99bb22aea346..68c80f3c9add 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -148,6 +148,11 @@ (((a)->s6_addr16[6]) == 0) && \ (((a)->s6_addr[14]) == 0)) +#define lowpan_is_linklocal_zero_padded(a) \ + (!(hdr->saddr.s6_addr[1] & 0x3f) && \ + !hdr->saddr.s6_addr16[1] && \ + !hdr->saddr.s6_addr32[1]) + #define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f) #define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4) @@ -1101,7 +1106,8 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, true); iphc1 |= LOWPAN_IPHC_SAC; } else { - if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL) { + if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL && + lowpan_is_linklocal_zero_padded(hdr->saddr)) { iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr, saddr, true); @@ -1135,7 +1141,8 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, false); iphc1 |= LOWPAN_IPHC_DAC; } else { - if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL) { + if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL && + lowpan_is_linklocal_zero_padded(hdr->daddr)) { iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr, daddr, false); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 955eda93e66f..3df7aefb7663 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -65,7 +65,7 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { void bt_sock_reclassify_lock(struct sock *sk, int proto) { BUG_ON(!sk); - BUG_ON(sock_owned_by_user(sk)); + BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c162af5d16bf..d4b3dd5413be 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4727,6 +4727,19 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u32 flags; u8 *ptr, real_len; + switch (type) { + case LE_ADV_IND: + case LE_ADV_DIRECT_IND: + case LE_ADV_SCAN_IND: + case LE_ADV_NONCONN_IND: + case LE_ADV_SCAN_RSP: + break; + default: + BT_ERR_RATELIMITED("Unknown advetising packet type: 0x%02x", + type); + return; + } + /* Find the end of the data in case the report contains padded zero * bytes at the end causing an invalid length value. * diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 6e125d76df0d..c045b3c54768 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1065,6 +1065,9 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) flags |= LE_AD_LIMITED; + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { /* If a discovery flag wasn't provided, simply use the global * settings. @@ -1072,9 +1075,6 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) if (!flags) flags |= mgmt_get_adv_discov_flags(hdev); - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - flags |= LE_AD_NO_BREDR; - /* If flags would still be empty, then there is no need to * include the "Flags" AD field". */ diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e4cae72895a7..388ee8b59145 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -778,7 +778,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, } if (sec.level < BT_SECURITY_LOW || - sec.level > BT_SECURITY_HIGH) { + sec.level > BT_SECURITY_FIPS) { err = -EINVAL; break; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 263b4de4de57..f8fc6241469a 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -112,7 +112,9 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p = NULL; unsigned long args[4]; + int ret = -EOPNOTSUPP; if (copy_from_user(args, rq->ifr_data, sizeof(args))) return -EFAULT; @@ -182,25 +184,29 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_forward_delay(br, args[1]); + ret = br_set_forward_delay(br, args[1]); + break; case BRCTL_SET_BRIDGE_HELLO_TIME: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_hello_time(br, args[1]); + ret = br_set_hello_time(br, args[1]); + break; case BRCTL_SET_BRIDGE_MAX_AGE: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_max_age(br, args[1]); + ret = br_set_max_age(br, args[1]); + break; case BRCTL_SET_AGEING_TIME: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - return br_set_ageing_time(br, args[1]); + ret = br_set_ageing_time(br, args[1]); + break; case BRCTL_GET_PORT_INFO: { @@ -240,20 +246,19 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EPERM; br_stp_set_enabled(br, args[1]); - return 0; + ret = 0; + break; case BRCTL_SET_BRIDGE_PRIORITY: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; br_stp_set_bridge_priority(br, args[1]); - return 0; + ret = 0; + break; case BRCTL_SET_PORT_PRIORITY: { - struct net_bridge_port *p; - int ret; - if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -263,14 +268,11 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) else ret = br_stp_set_port_priority(p, args[2]); spin_unlock_bh(&br->lock); - return ret; + break; } case BRCTL_SET_PATH_COST: { - struct net_bridge_port *p; - int ret; - if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -280,8 +282,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) else ret = br_stp_set_path_cost(p, args[2]); spin_unlock_bh(&br->lock); - - return ret; + break; } case BRCTL_GET_FDB_ENTRIES: @@ -289,7 +290,14 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) args[2], args[3]); } - return -EOPNOTSUPP; + if (!ret) { + if (p) + br_ifinfo_notify(RTM_NEWLINK, p); + else + netdev_state_change(br->dev); + } + + return ret; } static int old_deviceless(struct net *net, void __user *uarg) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 6b8091407ca3..70bddfd0f3e9 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -43,7 +43,14 @@ static ssize_t store_bridge_parm(struct device *d, if (endp == buf) return -EINVAL; + if (!rtnl_trylock()) + return restart_syscall(); + err = (*set)(br, val); + if (!err) + netdev_state_change(br->dev); + rtnl_unlock(); + return err ? err : len; } @@ -101,15 +108,7 @@ static ssize_t ageing_time_show(struct device *d, static int set_ageing_time(struct net_bridge *br, unsigned long val) { - int ret; - - if (!rtnl_trylock()) - return restart_syscall(); - - ret = br_set_ageing_time(br, val); - rtnl_unlock(); - - return ret; + return br_set_ageing_time(br, val); } static ssize_t ageing_time_store(struct device *d, @@ -128,27 +127,18 @@ static ssize_t stp_state_show(struct device *d, } +static int set_stp_state(struct net_bridge *br, unsigned long val) +{ + br_stp_set_enabled(br, val); + + return 0; +} + static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - struct net_bridge *br = to_bridge(d); - char *endp; - unsigned long val; - - if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; - - if (!rtnl_trylock()) - return restart_syscall(); - br_stp_set_enabled(br, val); - rtnl_unlock(); - - return len; + return store_bridge_parm(d, buf, len, set_stp_state); } static DEVICE_ATTR_RW(stp_state); @@ -160,29 +150,22 @@ static ssize_t group_fwd_mask_show(struct device *d, return sprintf(buf, "%#x\n", br->group_fwd_mask); } - -static ssize_t group_fwd_mask_store(struct device *d, - struct device_attribute *attr, - const char *buf, - size_t len) +static int set_group_fwd_mask(struct net_bridge *br, unsigned long val) { - struct net_bridge *br = to_bridge(d); - char *endp; - unsigned long val; - - if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; - if (val & BR_GROUPFWD_RESTRICTED) return -EINVAL; br->group_fwd_mask = val; - return len; + return 0; +} + +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_group_fwd_mask); } static DEVICE_ATTR_RW(group_fwd_mask); @@ -328,6 +311,7 @@ static ssize_t group_addr_store(struct device *d, br->group_addr_set = true; br_recalculate_fwd_mask(br); + netdev_state_change(br->dev); rtnl_unlock(); @@ -336,17 +320,17 @@ static ssize_t group_addr_store(struct device *d, static DEVICE_ATTR_RW(group_addr); +static int set_flush(struct net_bridge *br, unsigned long val) +{ + br_fdb_flush(br); + return 0; +} + static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - struct net_bridge *br = to_bridge(d); - - if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - br_fdb_flush(br); - return len; + return store_bridge_parm(d, buf, len, set_flush); } static DEVICE_ATTR_WO(flush); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index efe415ad842a..1e04d4d44273 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -61,7 +61,6 @@ static int store_flag(struct net_bridge_port *p, unsigned long v, if (flags != p->flags) { p->flags = flags; br_port_flags_change(p, mask); - br_ifinfo_notify(RTM_NEWLINK, p); } return 0; } @@ -253,8 +252,10 @@ static ssize_t brport_store(struct kobject *kobj, spin_lock_bh(&p->br->lock); ret = brport_attr->store(p, val); spin_unlock_bh(&p->br->lock); - if (ret == 0) + if (!ret) { + br_ifinfo_notify(RTM_NEWLINK, p); ret = count; + } } rtnl_unlock(); } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9309bb4f2a5b..e001152d6ad1 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -651,15 +651,7 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { - int err; - - if (!rtnl_trylock()) - return restart_syscall(); - - err = __br_vlan_filter_toggle(br, val); - rtnl_unlock(); - - return err; + return __br_vlan_filter_toggle(br, val); } int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) @@ -713,18 +705,10 @@ err_filt: int br_vlan_set_proto(struct net_bridge *br, unsigned long val) { - int err; - if (val != ETH_P_8021Q && val != ETH_P_8021AD) return -EPROTONOSUPPORT; - if (!rtnl_trylock()) - return restart_syscall(); - - err = __br_vlan_set_proto(br, htons(val)); - rtnl_unlock(); - - return err; + return __br_vlan_set_proto(br, htons(val)); } static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid) @@ -855,21 +839,17 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val) if (val >= VLAN_VID_MASK) return -EINVAL; - if (!rtnl_trylock()) - return restart_syscall(); - if (pvid == br->default_pvid) - goto unlock; + goto out; /* Only allow default pvid change when filtering is disabled */ if (br->vlan_enabled) { pr_info_once("Please disable vlan filtering to change default_pvid\n"); err = -EPERM; - goto unlock; + goto out; } err = __br_vlan_set_default_pvid(br, pvid); -unlock: - rtnl_unlock(); +out: return err; } diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 7fcdd7261d88..a78c4e2826e5 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -162,15 +162,57 @@ static const struct nf_chain_type filter_bridge = { (1 << NF_BR_POST_ROUTING), }; +static void nf_br_saveroute(const struct sk_buff *skb, + struct nf_queue_entry *entry) +{ +} + +static int nf_br_reroute(struct net *net, struct sk_buff *skb, + const struct nf_queue_entry *entry) +{ + return 0; +} + +static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol) +{ + return 0; +} + +static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol) +{ + return 0; +} + +static int nf_br_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict __always_unused) +{ + return 0; +} + +static const struct nf_afinfo nf_br_afinfo = { + .family = AF_BRIDGE, + .checksum = nf_br_checksum, + .checksum_partial = nf_br_checksum_partial, + .route = nf_br_route, + .saveroute = nf_br_saveroute, + .reroute = nf_br_reroute, + .route_key_size = 0, +}; + static int __init nf_tables_bridge_init(void) { int ret; + nf_register_afinfo(&nf_br_afinfo); nft_register_chain_type(&filter_bridge); ret = register_pernet_subsys(&nf_tables_bridge_net_ops); - if (ret < 0) + if (ret < 0) { nft_unregister_chain_type(&filter_bridge); - + nf_unregister_afinfo(&nf_br_afinfo); + } return ret; } @@ -178,6 +220,7 @@ static void __exit nf_tables_bridge_exit(void) { unregister_pernet_subsys(&nf_tables_bridge_net_ops); nft_unregister_chain_type(&filter_bridge); + nf_unregister_afinfo(&nf_br_afinfo); } module_init(nf_tables_bridge_init); diff --git a/net/can/raw.c b/net/can/raw.c index 2e67b1423cd3..972c187d40ab 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -755,7 +755,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err < 0) goto free_skb; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/core/datagram.c b/net/core/datagram.c index fa9dc6450b08..b7de71f8d5d3 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -301,16 +301,19 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(skb_free_datagram); -void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) +void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) { bool slow; if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + else if (likely(!atomic_dec_and_test(&skb->users))) { + sk_peek_offset_bwd(sk, len); return; + } slow = lock_sock_fast(sk); + sk_peek_offset_bwd(sk, len); skb_orphan(skb); sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); @@ -318,7 +321,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); } -EXPORT_SYMBOL(skb_free_datagram_locked); +EXPORT_SYMBOL(__skb_free_datagram_locked); /** * skb_kill_datagram - Free a datagram skbuff forcibly diff --git a/net/core/dev.c b/net/core/dev.c index 77a71cd68535..6324bc9267f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2711,6 +2711,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); @@ -2825,14 +2838,45 @@ static netdev_features_t dflt_features_check(const struct sk_buff *skb, return vlan_features_check(skb, features); } +static netdev_features_t gso_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + u16 gso_segs = skb_shinfo(skb)->gso_segs; + + if (gso_segs > dev->gso_max_segs) + return features & ~NETIF_F_GSO_MASK; + + /* Support for GSO partial features requires software + * intervention before we can actually process the packets + * so we need to strip support for any partial features now + * and we can pull them back in after we have partially + * segmented the frame. + */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)) + features &= ~dev->gso_partial_features; + + /* Make sure to clear the IPv4 ID mangling feature if the + * IPv4 header has the potential to be fragmented. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + struct iphdr *iph = skb->encapsulation ? + inner_ip_hdr(skb) : ip_hdr(skb); + + if (!(iph->frag_off & htons(IP_DF))) + features &= ~NETIF_F_TSO_MANGLEID; + } + + return features; +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; - u16 gso_segs = skb_shinfo(skb)->gso_segs; - if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) + features = gso_features_check(skb, dev, features); /* If encapsulation offload request, verify we are testing * hardware encapsulation features instead of standard @@ -2915,9 +2959,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; - if (skb->next) - return skb; - features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) @@ -2960,6 +3001,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device out_kfree_skb: kfree_skb(skb); out_null: + atomic_long_inc(&dev->tx_dropped); return NULL; } @@ -3349,7 +3391,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb = validate_xmit_skb(skb, dev); if (!skb) - goto drop; + goto out; HARD_TX_LOCK(dev, txq, cpu); @@ -3376,7 +3418,6 @@ recursion_alert: } rc = -ENETDOWN; -drop: rcu_read_unlock_bh(); atomic_long_inc(&dev->tx_dropped); @@ -4440,6 +4481,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->is_fou = 0; + NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ @@ -4664,6 +4706,8 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) if (unlikely(skb_gro_header_hard(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { + net_warn_ratelimited("%s: dropping impossible skb from %s\n", + __func__, napi->dev->name); napi_reuse_skb(napi, skb); return NULL; } @@ -6704,6 +6748,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + /* GSO partial features require GSO partial be set */ + if ((features & dev->gso_partial_features) && + !(features & NETIF_F_GSO_PARTIAL)) { + netdev_dbg(dev, + "Dropping partially supported GSO features since no GSO partial.\n"); + features &= ~dev->gso_partial_features; + } + #ifdef CONFIG_NET_RX_BUSY_POLL if (dev->netdev_ops->ndo_busy_poll) features |= NETIF_F_BUSY_POLL; @@ -6974,9 +7026,22 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - if (!(dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) dev->hw_features |= NETIF_F_NOCACHE_COPY; - } + + /* If IPv4 TCP segmentation offload is supported we should also + * allow the device to enable segmenting the frame with the option + * of ignoring a static IP ID value. This doesn't enable the + * feature itself but allows the user to enable it later. + */ + if (dev->hw_features & NETIF_F_TSO) + dev->hw_features |= NETIF_F_TSO_MANGLEID; + if (dev->vlan_features & NETIF_F_TSO) + dev->vlan_features |= NETIF_F_TSO_MANGLEID; + if (dev->mpls_features & NETIF_F_TSO) + dev->mpls_features |= NETIF_F_TSO_MANGLEID; + if (dev->hw_enc_features & NETIF_F_TSO) + dev->hw_enc_features |= NETIF_F_TSO_MANGLEID; /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ @@ -6984,7 +7049,7 @@ int register_netdevice(struct net_device *dev) /* Make NETIF_F_SG inheritable to tunnel devices. */ - dev->hw_enc_features |= NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL; /* Make NETIF_F_SG inheritable to MPLS. */ @@ -7427,7 +7492,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; - dev->gso_min_segs = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); diff --git a/net/core/devlink.c b/net/core/devlink.c index 590fa561cb7f..933e8d4d3968 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -119,7 +119,171 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, return devlink_port_get_from_attrs(devlink, info->attrs); } -#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) +struct devlink_sb { + struct list_head list; + unsigned int index; + u32 size; + u16 ingress_pools_count; + u16 egress_pools_count; + u16 ingress_tc_count; + u16 egress_tc_count; +}; + +static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb) +{ + return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count; +} + +static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink, + unsigned int sb_index) +{ + struct devlink_sb *devlink_sb; + + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + if (devlink_sb->index == sb_index) + return devlink_sb; + } + return NULL; +} + +static bool devlink_sb_index_exists(struct devlink *devlink, + unsigned int sb_index) +{ + return devlink_sb_get_by_index(devlink, sb_index); +} + +static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink, + struct nlattr **attrs) +{ + if (attrs[DEVLINK_ATTR_SB_INDEX]) { + u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]); + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); + if (!devlink_sb) + return ERR_PTR(-ENODEV); + return devlink_sb; + } + return ERR_PTR(-EINVAL); +} + +static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + return devlink_sb_get_from_attrs(devlink, info->attrs); +} + +static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb, + struct nlattr **attrs, + u16 *p_pool_index) +{ + u16 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX]) + return -EINVAL; + + val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]); + if (val >= devlink_sb_pool_count(devlink_sb)) + return -EINVAL; + *p_pool_index = val; + return 0; +} + +static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb, + struct genl_info *info, + u16 *p_pool_index) +{ + return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs, + p_pool_index); +} + +static int +devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs, + enum devlink_sb_pool_type *p_pool_type) +{ + u8 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE]) + return -EINVAL; + + val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]); + if (val != DEVLINK_SB_POOL_TYPE_INGRESS && + val != DEVLINK_SB_POOL_TYPE_EGRESS) + return -EINVAL; + *p_pool_type = val; + return 0; +} + +static int +devlink_sb_pool_type_get_from_info(struct genl_info *info, + enum devlink_sb_pool_type *p_pool_type) +{ + return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type); +} + +static int +devlink_sb_th_type_get_from_attrs(struct nlattr **attrs, + enum devlink_sb_threshold_type *p_th_type) +{ + u8 val; + + if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]) + return -EINVAL; + + val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]); + if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC && + val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC) + return -EINVAL; + *p_th_type = val; + return 0; +} + +static int +devlink_sb_th_type_get_from_info(struct genl_info *info, + enum devlink_sb_threshold_type *p_th_type) +{ + return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type); +} + +static int +devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb, + struct nlattr **attrs, + enum devlink_sb_pool_type pool_type, + u16 *p_tc_index) +{ + u16 val; + + if (!attrs[DEVLINK_ATTR_SB_TC_INDEX]) + return -EINVAL; + + val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]); + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS && + val >= devlink_sb->ingress_tc_count) + return -EINVAL; + if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS && + val >= devlink_sb->egress_tc_count) + return -EINVAL; + *p_tc_index = val; + return 0; +} + +static int +devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb, + struct genl_info *info, + enum devlink_sb_pool_type pool_type, + u16 *p_tc_index) +{ + return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs, + pool_type, p_tc_index); +} + +#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) +#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_SB BIT(2) +#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3) + /* port is not needed but we need to ensure they don't + * change in the middle of command + */ static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -132,8 +296,9 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); return PTR_ERR(devlink); } - info->user_ptr[0] = devlink; - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { + info->user_ptr[0] = devlink; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { struct devlink_port *devlink_port; mutex_lock(&devlink_port_mutex); @@ -143,7 +308,22 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); return PTR_ERR(devlink_port); } - info->user_ptr[1] = devlink_port; + info->user_ptr[0] = devlink_port; + } + if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) { + mutex_lock(&devlink_port_mutex); + } + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) { + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + return PTR_ERR(devlink_sb); + } + info->user_ptr[1] = devlink_sb; } return 0; } @@ -151,7 +331,8 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, static void devlink_nl_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT || + ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) mutex_unlock(&devlink_port_mutex); mutex_unlock(&devlink_mutex); } @@ -356,8 +537,8 @@ out: static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink *devlink = info->user_ptr[0]; - struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; @@ -436,8 +617,8 @@ static int devlink_port_type_set(struct devlink *devlink, static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink *devlink = info->user_ptr[0]; - struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; int err; if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) { @@ -497,12 +678,735 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink_port_unsplit(devlink, port_index); } +static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_sb *devlink_sb, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT, + devlink_sb->ingress_pools_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT, + devlink_sb->egress_pools_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT, + devlink_sb->ingress_tc_count)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT, + devlink_sb->egress_tc_count)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_fill(msg, devlink, devlink_sb, + DEVLINK_CMD_SB_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_sb_fill(msg, devlink, devlink_sb, + DEVLINK_CMD_SB_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) + goto out; + idx++; + } + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_sb *devlink_sb, + u16 pool_index, enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + struct devlink_sb_pool_info pool_info; + void *hdr; + int err; + + err = devlink->ops->sb_pool_get(devlink, devlink_sb->index, + pool_index, &pool_info); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, + pool_info.threshold_type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + u16 pool_index; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_pool_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index, + DEVLINK_CMD_SB_POOL_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + u16 pool_count = devlink_sb_pool_count(devlink_sb); + u16 pool_index; + int err; + + for (pool_index = 0; pool_index < pool_count; pool_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_pool_fill(msg, devlink, + devlink_sb, + pool_index, + DEVLINK_CMD_SB_POOL_NEW, + portid, seq, NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + return 0; +} + +static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_pool_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_pool_get_dumpit(msg, start, &idx, devlink, + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) + +{ + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_pool_set) + return ops->sb_pool_set(devlink, sb_index, pool_index, + size, threshold_type); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + enum devlink_sb_threshold_type threshold_type; + u16 pool_index; + u32 size; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + err = devlink_sb_th_type_get_from_info(info, &threshold_type); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]) + return -EINVAL; + + size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]); + return devlink_sb_pool_set(devlink, devlink_sb->index, + pool_index, size, threshold_type); +} + +static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_port *devlink_port, + struct devlink_sb *devlink_sb, + u16 pool_index, + enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + const struct devlink_ops *ops = devlink->ops; + u32 threshold; + void *hdr; + int err; + + err = ops->sb_port_pool_get(devlink_port, devlink_sb->index, + pool_index, &threshold); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) + goto nla_put_failure; + + if (ops->sb_occ_port_pool_get) { + u32 cur; + u32 max; + + err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, + pool_index, &cur, &max); + if (err && err != -EOPNOTSUPP) + return err; + if (!err) { + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) + goto nla_put_failure; + } + } + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + u16 pool_index; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_port_pool_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port, + devlink_sb, pool_index, + DEVLINK_CMD_SB_PORT_POOL_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + struct devlink_port *devlink_port; + u16 pool_count = devlink_sb_pool_count(devlink_sb); + u16 pool_index; + int err; + + list_for_each_entry(devlink_port, &devlink->port_list, list) { + for (pool_index = 0; pool_index < pool_count; pool_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_port_pool_fill(msg, devlink, + devlink_port, + devlink_sb, + pool_index, + DEVLINK_CMD_SB_PORT_POOL_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + } + return 0; +} + +static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + mutex_lock(&devlink_port_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_port_pool_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_port_pool_get_dumpit(msg, start, &idx, + devlink, devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) + +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + if (ops && ops->sb_port_pool_set) + return ops->sb_port_pool_set(devlink_port, sb_index, + pool_index, threshold); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + u16 pool_index; + u32 threshold; + int err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + return -EINVAL; + + threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); + return devlink_sb_port_pool_set(devlink_port, devlink_sb->index, + pool_index, threshold); +} + +static int +devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_port *devlink_port, + struct devlink_sb *devlink_sb, u16 tc_index, + enum devlink_sb_pool_type pool_type, + enum devlink_command cmd, + u32 portid, u32 seq, int flags) +{ + const struct devlink_ops *ops = devlink->ops; + u16 pool_index; + u32 threshold; + void *hdr; + int err; + + err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index, + tc_index, pool_type, + &pool_index, &threshold); + if (err) + return err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index)) + goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type)) + goto nla_put_failure; + if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) + goto nla_put_failure; + + if (ops->sb_occ_tc_port_bind_get) { + u32 cur; + u32 max; + + err = ops->sb_occ_tc_port_bind_get(devlink_port, + devlink_sb->index, + tc_index, pool_type, + &cur, &max); + if (err && err != -EOPNOTSUPP) + return err; + if (!err) { + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) + goto nla_put_failure; + } + } + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink *devlink = devlink_port->devlink; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct sk_buff *msg; + enum devlink_sb_pool_type pool_type; + u16 tc_index; + int err; + + err = devlink_sb_pool_type_get_from_info(info, &pool_type); + if (err) + return err; + + err = devlink_sb_tc_index_get_from_info(devlink_sb, info, + pool_type, &tc_index); + if (err) + return err; + + if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port, + devlink_sb, tc_index, pool_type, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + info->snd_portid, + info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, + int start, int *p_idx, + struct devlink *devlink, + struct devlink_sb *devlink_sb, + u32 portid, u32 seq) +{ + struct devlink_port *devlink_port; + u16 tc_index; + int err; + + list_for_each_entry(devlink_port, &devlink->port_list, list) { + for (tc_index = 0; + tc_index < devlink_sb->ingress_tc_count; tc_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, + devlink_port, + devlink_sb, + tc_index, + DEVLINK_SB_POOL_TYPE_INGRESS, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + for (tc_index = 0; + tc_index < devlink_sb->egress_tc_count; tc_index++) { + if (*p_idx < start) { + (*p_idx)++; + continue; + } + err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, + devlink_port, + devlink_sb, + tc_index, + DEVLINK_SB_POOL_TYPE_EGRESS, + DEVLINK_CMD_SB_TC_POOL_BIND_NEW, + portid, seq, + NLM_F_MULTI); + if (err) + return err; + (*p_idx)++; + } + } + return 0; +} + +static int +devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + struct devlink_sb *devlink_sb; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + mutex_lock(&devlink_port_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || + !devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + continue; + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, + devlink, + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err && err != -EOPNOTSUPP) + goto out; + } + } +out: + mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) + +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + if (ops && ops->sb_tc_pool_bind_set) + return ops->sb_tc_pool_bind_set(devlink_port, sb_index, + tc_index, pool_type, + pool_index, threshold); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + enum devlink_sb_pool_type pool_type; + u16 tc_index; + u16 pool_index; + u32 threshold; + int err; + + err = devlink_sb_pool_type_get_from_info(info, &pool_type); + if (err) + return err; + + err = devlink_sb_tc_index_get_from_info(devlink_sb, info, + pool_type, &tc_index); + if (err) + return err; + + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, + &pool_index); + if (err) + return err; + + if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + return -EINVAL; + + threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); + return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index, + tc_index, pool_type, + pool_index, threshold); +} + +static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_occ_snapshot) + return ops->sb_occ_snapshot(devlink, devlink_sb->index); + return -EOPNOTSUPP; +} + +static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb = info->user_ptr[1]; + const struct devlink_ops *ops = devlink->ops; + + if (ops && ops->sb_occ_max_clear) + return ops->sb_occ_max_clear(devlink, devlink_sb->index); + return -EOPNOTSUPP; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 }, [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -511,6 +1415,7 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -533,12 +1438,92 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_port_split_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .doit = devlink_nl_cmd_port_unsplit_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_SB_GET, + .doit = devlink_nl_cmd_sb_get_doit, + .dumpit = devlink_nl_cmd_sb_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_POOL_GET, + .doit = devlink_nl_cmd_sb_pool_get_doit, + .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_POOL_SET, + .doit = devlink_nl_cmd_sb_pool_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, + .doit = devlink_nl_cmd_sb_port_pool_get_doit, + .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, + .doit = devlink_nl_cmd_sb_port_pool_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, + .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, + .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, + .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | + DEVLINK_NL_FLAG_NEED_SB, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, + .doit = devlink_nl_cmd_sb_occ_snapshot_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB | + DEVLINK_NL_FLAG_LOCK_PORTS, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, + .doit = devlink_nl_cmd_sb_occ_max_clear_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NEED_SB | + DEVLINK_NL_FLAG_LOCK_PORTS, }, }; @@ -561,6 +1546,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) devlink->ops = ops; devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); + INIT_LIST_HEAD(&devlink->sb_list); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -630,7 +1616,6 @@ int devlink_port_register(struct devlink *devlink, } devlink_port->devlink = devlink; devlink_port->index = port_index; - devlink_port->type = DEVLINK_PORT_TYPE_NOTSET; devlink_port->registered = true; list_add_tail(&devlink_port->list, &devlink->port_list); mutex_unlock(&devlink_port_mutex); @@ -717,6 +1702,51 @@ void devlink_port_split_set(struct devlink_port *devlink_port, } EXPORT_SYMBOL_GPL(devlink_port_split_set); +int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count) +{ + struct devlink_sb *devlink_sb; + int err = 0; + + mutex_lock(&devlink_mutex); + if (devlink_sb_index_exists(devlink, sb_index)) { + err = -EEXIST; + goto unlock; + } + + devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL); + if (!devlink_sb) { + err = -ENOMEM; + goto unlock; + } + devlink_sb->index = sb_index; + devlink_sb->size = size; + devlink_sb->ingress_pools_count = ingress_pools_count; + devlink_sb->egress_pools_count = egress_pools_count; + devlink_sb->ingress_tc_count = ingress_tc_count; + devlink_sb->egress_tc_count = egress_tc_count; + list_add_tail(&devlink_sb->list, &devlink->sb_list); +unlock: + mutex_unlock(&devlink_mutex); + return err; +} +EXPORT_SYMBOL_GPL(devlink_sb_register); + +void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) +{ + struct devlink_sb *devlink_sb; + + mutex_lock(&devlink_mutex); + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); + WARN_ON(!devlink_sb); + list_del(&devlink_sb->list); + mutex_unlock(&devlink_mutex); + kfree(devlink_sb); +} +EXPORT_SYMBOL_GPL(devlink_sb_unregister); + static int __init devlink_module_init(void) { return genl_register_family_with_ops_groups(&devlink_nl_family, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f426c5ad6149..bdb4013581b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -79,12 +79,16 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", + [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", @@ -387,15 +391,17 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) +void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, + u32 legacy_u32) { bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); dst[0] = legacy_u32; } +EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); /* return false if src had higher bits set. lower bits always updated. */ -static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, - const unsigned long *src) +bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, + const unsigned long *src) { bool retval = true; @@ -415,6 +421,7 @@ static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, *legacy_u32 = src[0]; return retval; } +EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); /* return false if legacy contained non-0 deprecated fields * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated @@ -437,13 +444,13 @@ convert_legacy_settings_to_link_ksettings( legacy_settings->maxrxpkt) retval = false; - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.supported, legacy_settings->supported); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.advertising, legacy_settings->advertising); - convert_legacy_u32_to_link_mode( + ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.lp_advertising, legacy_settings->lp_advertising); link_ksettings->base.speed @@ -482,13 +489,13 @@ convert_link_ksettings_to_legacy_settings( * __u32 maxrxpkt; */ - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->supported, link_ksettings->link_modes.supported); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->advertising, link_ksettings->link_modes.advertising); - retval &= convert_link_mode_to_legacy_u32( + retval &= ethtool_convert_link_mode_to_legacy_u32( &legacy_settings->lp_advertising, link_ksettings->link_modes.lp_advertising); ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed); diff --git a/net/core/filter.c b/net/core/filter.c index ca7f832b2980..218e5de8c402 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1149,8 +1149,7 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, - bool locked) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) { struct sk_filter *fp, *old_fp; @@ -1166,8 +1165,10 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, locked); + old_fp = rcu_dereference_protected(sk->sk_filter, + lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_filter, fp); + if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1246,8 +1247,7 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, - bool locked) +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct bpf_prog *prog = __get_filter(fprog, sk); int err; @@ -1255,7 +1255,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk, locked); + err = __sk_attach_prog(prog, sk); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1263,12 +1263,7 @@ int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, return 0; } -EXPORT_SYMBOL_GPL(__sk_attach_filter); - -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) -{ - return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); -} +EXPORT_SYMBOL_GPL(sk_attach_filter); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { @@ -1314,7 +1309,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); + err = __sk_attach_prog(prog, sk); if (err < 0) { bpf_prog_put(prog); return err; @@ -1414,16 +1409,19 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) unsigned int len = (unsigned int) r4; void *ptr; - if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK)) - return -EFAULT; + if (unlikely((u32) offset > 0xffff)) + goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); if (unlikely(!ptr)) - return -EFAULT; + goto err_clear; if (ptr != to) memcpy(to, ptr, len); return 0; +err_clear: + memset(to, 0, len); + return -EFAULT; } static const struct bpf_func_proto bpf_skb_load_bytes_proto = { @@ -1432,7 +1430,7 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_STACK, + .arg3_type = ARG_PTR_TO_RAW_STACK, .arg4_type = ARG_CONST_STACK_SIZE, }; @@ -1761,12 +1759,19 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); u8 compat[sizeof(struct bpf_tunnel_key)]; + void *to_orig = to; + int err; - if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) - return -EINVAL; - if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) - return -EPROTO; + if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) { + err = -EINVAL; + goto err_clear; + } + if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) { + err = -EPROTO; + goto err_clear; + } if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + err = -EINVAL; switch (size) { case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): @@ -1776,12 +1781,12 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) * a common path later on. */ if (ip_tunnel_info_af(info) != AF_INET) - return -EINVAL; + goto err_clear; set_compat: to = (struct bpf_tunnel_key *)compat; break; default: - return -EINVAL; + goto err_clear; } } @@ -1798,9 +1803,12 @@ set_compat: } if (unlikely(size != sizeof(struct bpf_tunnel_key))) - memcpy((void *)(long) r2, to, size); + memcpy(to_orig, to, size); return 0; +err_clear: + memset(to_orig, 0, size); + return err; } static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { @@ -1808,7 +1816,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_PTR_TO_RAW_STACK, .arg3_type = ARG_CONST_STACK_SIZE, .arg4_type = ARG_ANYTHING, }; @@ -1818,16 +1826,26 @@ static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; u8 *to = (u8 *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); + int err; if (unlikely(!info || - !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) - return -ENOENT; - if (unlikely(size < info->options_len)) - return -ENOMEM; + !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { + err = -ENOENT; + goto err_clear; + } + if (unlikely(size < info->options_len)) { + err = -ENOMEM; + goto err_clear; + } ip_tunnel_info_opts_get(to, info); + if (size > info->options_len) + memset(to + info->options_len, 0, size - info->options_len); return info->options_len; +err_clear: + memset(to, 0, size); + return err; } static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { @@ -1835,7 +1853,7 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_PTR_TO_RAW_STACK, .arg3_type = ARG_CONST_STACK_SIZE, }; @@ -2021,6 +2039,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_redirect_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; + case BPF_FUNC_perf_event_output: + return bpf_get_event_output_proto(); default: return sk_filter_func_proto(func_id); } @@ -2255,7 +2275,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int __sk_detach_filter(struct sock *sk, bool locked) +int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; struct sk_filter *filter; @@ -2263,7 +2283,8 @@ int __sk_detach_filter(struct sock *sk, bool locked) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, locked); + filter = rcu_dereference_protected(sk->sk_filter, + lockdep_sock_is_held(sk)); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -2272,12 +2293,7 @@ int __sk_detach_filter(struct sock *sk, bool locked) return ret; } -EXPORT_SYMBOL_GPL(__sk_detach_filter); - -int sk_detach_filter(struct sock *sk) -{ - return __sk_detach_filter(sk, sock_owned_by_user(sk)); -} +EXPORT_SYMBOL_GPL(sk_detach_filter); int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) @@ -2288,7 +2304,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, lock_sock(sk); filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (!filter) goto out; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a75f7e94b445..5ec059d52823 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -808,11 +808,6 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->rx_nohandler = b->rx_nohandler; } -static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) -{ - memcpy(v, b, sizeof(*b)); -} - /* All VF info */ static inline int rtnl_vfinfo_size(const struct net_device *dev, u32 ext_filter_mask) @@ -883,7 +878,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) - + nla_total_size(sizeof(struct rtnl_link_stats64)) + + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + nla_total_size(4) /* IFLA_TXQLEN */ @@ -1054,25 +1049,23 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, struct net_device *dev) { - const struct rtnl_link_stats64 *stats; - struct rtnl_link_stats64 temp; + struct rtnl_link_stats64 *sp; struct nlattr *attr; - stats = dev_get_stats(dev, &temp); - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); + attr = nla_reserve_64bit(skb, IFLA_STATS64, + sizeof(struct rtnl_link_stats64), IFLA_PAD); if (!attr) return -EMSGSIZE; - copy_rtnl_link_stats(nla_data(attr), stats); + sp = nla_data(attr); + dev_get_stats(dev, sp); - attr = nla_reserve(skb, IFLA_STATS64, - sizeof(struct rtnl_link_stats64)); + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); if (!attr) return -EMSGSIZE; - copy_rtnl_link_stats64(nla_data(attr), stats); + copy_rtnl_link_stats(nla_data(attr), sp); return 0; } @@ -3451,6 +3444,154 @@ out: return err; } +static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, + int type, u32 pid, u32 seq, u32 change, + unsigned int flags, unsigned int filter_mask) +{ + struct if_stats_msg *ifsm; + struct nlmsghdr *nlh; + struct nlattr *attr; + + ASSERT_RTNL(); + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags); + if (!nlh) + return -EMSGSIZE; + + ifsm = nlmsg_data(nlh); + ifsm->ifindex = dev->ifindex; + ifsm->filter_mask = filter_mask; + + if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64)) { + struct rtnl_link_stats64 *sp; + + attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64, + sizeof(struct rtnl_link_stats64), + IFLA_STATS_UNSPEC); + if (!attr) + goto nla_put_failure; + + sp = nla_data(attr); + dev_get_stats(dev, sp); + } + + nlmsg_end(skb, nlh); + + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + + return -EMSGSIZE; +} + +static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = { + [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) }, +}; + +static size_t if_nlmsg_stats_size(const struct net_device *dev, + u32 filter_mask) +{ + size_t size = 0; + + if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64)) + size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); + + return size; +} + +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct if_stats_msg *ifsm; + struct net_device *dev = NULL; + struct sk_buff *nskb; + u32 filter_mask; + int err; + + ifsm = nlmsg_data(nlh); + if (ifsm->ifindex > 0) + dev = __dev_get_by_index(net, ifsm->ifindex); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + filter_mask = ifsm->filter_mask; + if (!filter_mask) + return -EINVAL; + + nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL); + if (!nskb) + return -ENOBUFS; + + err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS, + NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + 0, filter_mask); + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(nskb); + } else { + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); + } + + return err; +} + +static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct if_stats_msg *ifsm; + int h, s_h; + int idx = 0, s_idx; + struct net_device *dev; + struct hlist_head *head; + unsigned int flags = NLM_F_MULTI; + u32 filter_mask = 0; + int err; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + cb->seq = net->dev_base_seq; + + ifsm = nlmsg_data(cb->nlh); + filter_mask = ifsm->filter_mask; + if (!filter_mask) + return -EINVAL; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + flags, filter_mask); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err < 0) + goto out; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } +out: + cb->args[1] = idx; + cb->args[0] = h; + + return skb->len; +} + /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -3600,4 +3741,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); + + rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, + NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e561f9f07d6d..7ff7788b0151 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,8 +3076,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); + unsigned int partial_segs = 0; unsigned int headroom; - unsigned int len; + unsigned int len = head_skb->len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); @@ -3094,6 +3095,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, csum = !!can_checksum_protocol(features, proto); + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ + if (features & NETIF_F_GSO_PARTIAL) { + partial_segs = len / mss; + mss *= partial_segs; + } + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3281,6 +3291,23 @@ perform_csum_check: */ segs->prev = tail; + /* Update GSO info on first skb in partial sequence. */ + if (partial_segs) { + int type = skb_shinfo(head_skb)->gso_type; + + /* Update type to add partial and then remove dodgy if set */ + type |= SKB_GSO_PARTIAL; + type &= ~SKB_GSO_DODGY; + + /* Update GSO info and prepare to start updating headers on + * our way back down the stack of protocols. + */ + skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; + skb_shinfo(segs)->gso_segs = partial_segs; + skb_shinfo(segs)->gso_type = type; + SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + } + /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. diff --git a/net/core/sock.c b/net/core/sock.c index 7e73c26b6bb4..e16a5db853c6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -405,9 +405,8 @@ static void sock_disable_timestamp(struct sock *sk, unsigned long flags) } -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - int err; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; @@ -417,10 +416,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; } - err = sk_filter(sk, skb); - if (err) - return err; - if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; @@ -443,6 +438,18 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk->sk_data_ready(sk); return 0; } +EXPORT_SYMBOL(__sock_queue_rcv_skb); + +int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sk_filter(sk, skb); + if (err) + return err; + + return __sock_queue_rcv_skb(sk, skb); +} EXPORT_SYMBOL(sock_queue_rcv_skb); int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) @@ -835,7 +842,8 @@ set_rcvbuf: !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - if (sk->sk_state != TCP_ESTABLISHED) { + if ((1 << sk->sk_state) & + (TCPF_CLOSE | TCPF_LISTEN)) { ret = -EINVAL; break; } @@ -1421,8 +1429,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } EXPORT_SYMBOL(sk_alloc); -void sk_destruct(struct sock *sk) +/* Sockets having SOCK_RCU_FREE will call this function after one RCU + * grace period. This is the case for UDP sockets and TCP listeners. + */ +static void __sk_destruct(struct rcu_head *head) { + struct sock *sk = container_of(head, struct sock, sk_rcu); struct sk_filter *filter; if (sk->sk_destruct) @@ -1451,6 +1463,14 @@ void sk_destruct(struct sock *sk) sk_prot_free(sk->sk_prot_creator, sk); } +void sk_destruct(struct sock *sk) +{ + if (sock_flag(sk, SOCK_RCU_FREE)) + call_rcu(&sk->sk_rcu, __sk_destruct); + else + __sk_destruct(&sk->sk_rcu); +} + static void __sk_free(struct sock *sk) { if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) @@ -1515,6 +1535,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; + atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; @@ -1869,27 +1890,51 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, + struct sockcm_cookie *sockc) +{ + u32 tsflags; + + switch (cmsg->cmsg_type) { + case SO_MARK: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->mark = *(u32 *)CMSG_DATA(cmsg); + break; + case SO_TIMESTAMPING: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + + tsflags = *(u32 *)CMSG_DATA(cmsg); + if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) + return -EINVAL; + + sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; + sockc->tsflags |= tsflags; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(__sock_cmsg_send); + int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc) { struct cmsghdr *cmsg; + int ret; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_SOCKET) continue; - switch (cmsg->cmsg_type) { - case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) - return -EINVAL; - sockc->mark = *(u32 *)CMSG_DATA(cmsg); - break; - default: - return -EINVAL; - } + ret = __sock_cmsg_send(sk, msg, cmsg, sockc); + if (ret) + return ret; } return 0; } @@ -2145,6 +2190,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount) } EXPORT_SYMBOL(__sk_mem_reclaim); +int sk_set_peek_off(struct sock *sk, int val) +{ + if (val < 0) + return -EINVAL; + + sk->sk_peek_off = val; + return 0; +} +EXPORT_SYMBOL_GPL(sk_set_peek_off); /* * Set of default routines for initialising struct proto_ops when @@ -2432,11 +2486,6 @@ EXPORT_SYMBOL(lock_sock_nested); void release_sock(struct sock *sk) { - /* - * The sk_lock has mutex_unlock() semantics: - */ - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a996ce8c8fb2..ca9e35bbe13c 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -67,6 +67,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); return nla_put(skb, attrtype, sizeof(mem), &mem); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9c67a961ba53..f6d183f8f332 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -62,7 +62,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; @@ -764,6 +764,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; const struct iphdr *iph; + bool refcounted; struct sock *sk; int min_cov; @@ -801,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), - dh->dccph_sport, dh->dccph_dport); + dh->dccph_sport, dh->dccph_dport, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); @@ -830,6 +831,7 @@ lookup: goto lookup; } sock_hold(sk); + refcounted = true; nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); @@ -886,7 +888,8 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + if (refcounted) + sock_put(sk); goto discard_it; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4663a01d5039..8ceb3cebcad4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -642,6 +642,7 @@ discard: static int dccp_v6_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; + bool refcounted; struct sock *sk; int min_cov; @@ -670,7 +671,7 @@ static int dccp_v6_rcv(struct sk_buff *skb) lookup: sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport, - inet6_iif(skb)); + inet6_iif(skb), &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); @@ -699,6 +700,7 @@ lookup: goto lookup; } sock_hold(sk); + refcounted = true; nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); @@ -752,7 +754,8 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + if (refcounted) + sock_put(sk); goto discard_it; } @@ -865,7 +868,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index c28c47463b7e..d61ceed912be 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -51,11 +51,12 @@ void unregister_switch_driver(struct dsa_switch_driver *drv) EXPORT_SYMBOL_GPL(unregister_switch_driver); static struct dsa_switch_driver * -dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name) +dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, + const char **_name, void **priv) { struct dsa_switch_driver *ret; struct list_head *list; - char *name; + const char *name; ret = NULL; name = NULL; @@ -66,7 +67,7 @@ dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name) drv = list_entry(list, struct dsa_switch_driver, list); - name = drv->probe(host_dev, sw_addr); + name = drv->probe(parent, host_dev, sw_addr, priv); if (name != NULL) { ret = drv; break; @@ -245,7 +246,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) } else if (!strcmp(name, "dsa")) { ds->dsa_port_mask |= 1 << i; } else { - ds->phys_port_mask |= 1 << i; + ds->enabled_port_mask |= 1 << i; } valid_name_found = true; } @@ -258,7 +259,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) /* Make the built-in MII bus mask match the number of ports, * switch drivers can override this later */ - ds->phys_mii_mask = ds->phys_port_mask; + ds->phys_mii_mask = ds->enabled_port_mask; /* * If the CPU connects to this switch, set the switch tree @@ -266,7 +267,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * switch. */ if (dst->cpu_switch == index) { - switch (ds->tag_protocol) { + switch (drv->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case DSA_TAG_PROTO_DSA: dst->rcv = dsa_netdev_ops.rcv; @@ -294,7 +295,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) goto out; } - dst->tag_protocol = ds->tag_protocol; + dst->tag_protocol = drv->tag_protocol; } /* @@ -324,7 +325,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * Create network devices for physical switch ports. */ for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!(ds->phys_port_mask & (1 << i))) + if (!(ds->enabled_port_mask & (1 << i))) continue; ret = dsa_slave_create(ds, parent, i, pd->port_names[i]); @@ -382,12 +383,13 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct dsa_switch_driver *drv; struct dsa_switch *ds; int ret; - char *name; + const char *name; + void *priv; /* * Probe for switch model. */ - drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); + drv = dsa_switch_probe(parent, host_dev, pd->sw_addr, &name, &priv); if (drv == NULL) { netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", index); @@ -400,7 +402,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Allocate and initialise switch state. */ - ds = devm_kzalloc(parent, sizeof(*ds) + drv->priv_size, GFP_KERNEL); + ds = devm_kzalloc(parent, sizeof(*ds), GFP_KERNEL); if (ds == NULL) return ERR_PTR(-ENOMEM); @@ -408,7 +410,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->index = index; ds->pd = pd; ds->drv = drv; - ds->tag_protocol = drv->tag_protocol; + ds->priv = priv; ds->master_dev = host_dev; ret = dsa_switch_setup_one(ds, parent); @@ -432,7 +434,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) /* Destroy network devices for physical switch ports. */ for (port = 0; port < DSA_MAX_PORTS; port++) { - if (!(ds->phys_port_mask & (1 << port))) + if (!(ds->enabled_port_mask & (1 << port))) continue; if (!ds->ports[port]) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 1d1a54687e4a..dfa33779d49c 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -22,11 +22,6 @@ struct dsa_device_ops { }; struct dsa_slave_priv { - /* - * The linux network interface corresponding to this - * switch port. - */ - struct net_device *dev; struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a575f0350d5a..3b6750f5e68b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -104,8 +104,8 @@ static int dsa_slave_open(struct net_device *dev) goto clear_promisc; } - if (ds->drv->port_stp_update) - ds->drv->port_stp_update(ds, p->port, stp_state); + if (ds->drv->port_stp_state_set) + ds->drv->port_stp_state_set(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -147,8 +147,8 @@ static int dsa_slave_close(struct net_device *dev) if (ds->drv->port_disable) ds->drv->port_disable(ds, p->port, p->phy); - if (ds->drv->port_stp_update) - ds->drv->port_stp_update(ds, p->port, BR_STATE_DISABLED); + if (ds->drv->port_stp_state_set) + ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -207,21 +207,16 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - int err; if (switchdev_trans_ph_prepare(trans)) { if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) return -EOPNOTSUPP; - err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); - if (err) - return err; - } else { - err = ds->drv->port_vlan_add(ds, p->port, vlan, trans); - if (err) - return err; + return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); } + ds->drv->port_vlan_add(ds, p->port, vlan, trans); + return 0; } @@ -256,17 +251,17 @@ static int dsa_slave_port_fdb_add(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - int ret; - if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) - return -EOPNOTSUPP; + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) + return -EOPNOTSUPP; - if (switchdev_trans_ph_prepare(trans)) - ret = ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); - else - ret = ds->drv->port_fdb_add(ds, p->port, fdb, trans); + return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); + } - return ret; + ds->drv->port_fdb_add(ds, p->port, fdb, trans); + + return 0; } static int dsa_slave_port_fdb_del(struct net_device *dev, @@ -305,16 +300,19 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } -static int dsa_slave_stp_update(struct net_device *dev, u8 state) +static int dsa_slave_stp_state_set(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - if (ds->drv->port_stp_update) - ret = ds->drv->port_stp_update(ds, p->port, state); + if (switchdev_trans_ph_prepare(trans)) + return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP; - return ret; + ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state); + + return 0; } static int dsa_slave_vlan_filtering(struct net_device *dev, @@ -339,17 +337,11 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; int ret; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - if (switchdev_trans_ph_prepare(trans)) - ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; - else - ret = ds->drv->port_stp_update(ds, p->port, - attr->u.stp_state); + ret = dsa_slave_stp_state_set(dev, attr, trans); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: ret = dsa_slave_vlan_filtering(dev, attr, trans); @@ -468,7 +460,8 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - dsa_slave_stp_update(dev, BR_STATE_FORWARDING); + if (ds->drv->port_stp_state_set) + ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, @@ -680,10 +673,10 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - data[0] = p->dev->stats.tx_packets; - data[1] = p->dev->stats.tx_bytes; - data[2] = p->dev->stats.rx_packets; - data[3] = p->dev->stats.rx_bytes; + data[0] = dev->stats.tx_packets; + data[1] = dev->stats.tx_bytes; + data[2] = dev->stats.rx_packets; + data[3] = dev->stats.rx_bytes; if (ds->drv->get_ethtool_stats != NULL) ds->drv->get_ethtool_stats(ds, p->port, data + 4); } @@ -1070,7 +1063,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); - p->dev = slave_dev; p->parent = ds; p->port = port; diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig index 0d3d709052ca..4b683fd0abf1 100644 --- a/net/hsr/Kconfig +++ b/net/hsr/Kconfig @@ -18,8 +18,9 @@ config HSR earlier. This code is a "best effort" to comply with the HSR standard as - described in IEC 62439-3:2010 (HSRv0), but no compliancy tests have - been made. + described in IEC 62439-3:2010 (HSRv0) and IEC 62439-3:2012 (HSRv1), + but no compliancy tests have been made. Use iproute2 to select + the version you desire. You need to perform any and all necessary tests yourself before relying on this code in a safety critical system! diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c7d1adca30d8..386cbce7bc51 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -90,7 +90,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, hsr = netdev_priv(hsr_dev); - if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { + if ((hsr_dev->operstate == IF_OPER_UP) + && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; hsr->announce_timer.expires = jiffies + @@ -250,31 +251,22 @@ static const struct header_ops hsr_header_ops = { .parse = eth_header_parse, }; - -/* HSR:2010 supervision frames should be padded so that the whole frame, - * including headers and FCS, is 64 bytes (without VLAN). - */ -static int hsr_pad(int size) -{ - const int min_size = ETH_ZLEN - HSR_HLEN - ETH_HLEN; - - if (size >= min_size) - return size; - return min_size; -} - -static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) +static void send_hsr_supervision_frame(struct hsr_port *master, + u8 type, u8 hsrVer) { struct sk_buff *skb; int hlen, tlen; + struct hsr_tag *hsr_tag; struct hsr_sup_tag *hsr_stag; struct hsr_sup_payload *hsr_sp; unsigned long irqflags; hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; - skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen, - GFP_ATOMIC); + skb = dev_alloc_skb( + sizeof(struct hsr_tag) + + sizeof(struct hsr_sup_tag) + + sizeof(struct hsr_sup_payload) + hlen + tlen); if (skb == NULL) return; @@ -282,32 +274,48 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) skb_reserve(skb, hlen); skb->dev = master->dev; - skb->protocol = htons(ETH_P_PRP); + skb->protocol = htons(hsrVer ? ETH_P_HSR : ETH_P_PRP); skb->priority = TC_PRIO_CONTROL; - if (dev_hard_header(skb, skb->dev, ETH_P_PRP, + if (dev_hard_header(skb, skb->dev, (hsrVer ? ETH_P_HSR : ETH_P_PRP), master->hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); - hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(*hsr_stag)); + if (hsrVer > 0) { + hsr_tag = (typeof(hsr_tag)) skb_put(skb, sizeof(struct hsr_tag)); + hsr_tag->encap_proto = htons(ETH_P_PRP); + set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE); + } - set_hsr_stag_path(hsr_stag, 0xf); - set_hsr_stag_HSR_Ver(hsr_stag, 0); + hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(struct hsr_sup_tag)); + set_hsr_stag_path(hsr_stag, (hsrVer ? 0x0 : 0xf)); + set_hsr_stag_HSR_Ver(hsr_stag, hsrVer); + /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); - hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); - master->hsr->sequence_nr++; + if (hsrVer > 0) { + hsr_stag->sequence_nr = htons(master->hsr->sup_sequence_nr); + hsr_tag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sup_sequence_nr++; + master->hsr->sequence_nr++; + } else { + hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sequence_nr++; + } spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_stag->HSR_TLV_Type = type; - hsr_stag->HSR_TLV_Length = 12; + /* TODO: Why 12 in HSRv0? */ + hsr_stag->HSR_TLV_Length = hsrVer ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ - hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); + hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); + skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); + hsr_forward_skb(skb, master); return; @@ -329,19 +337,20 @@ static void hsr_announce(unsigned long data) rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (hsr->announce_count < 3) { - send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE); + if (hsr->announce_count < 3 && hsr->protVersion == 0) { + send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE, + hsr->protVersion); hsr->announce_count++; - } else { - send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK); - } - if (hsr->announce_count < 3) hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - else + } else { + send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, + hsr->protVersion); + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + } if (is_admin_up(master->dev)) add_timer(&hsr->announce_timer); @@ -428,7 +437,7 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec) + unsigned char multicast_spec, u8 protocol_version) { struct hsr_priv *hsr; struct hsr_port *port; @@ -450,6 +459,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], spin_lock_init(&hsr->seqnr_lock); /* Overflow soon to find bugs easier: */ hsr->sequence_nr = HSR_SEQNR_START; + hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; init_timer(&hsr->announce_timer); hsr->announce_timer.function = hsr_announce; @@ -462,6 +472,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; + hsr->protVersion = protocol_version; + /* FIXME: should I modify the value of these? * * - hsr_dev->flags - i.e. diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 108a5d59d2a6..9975e31bbb82 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -17,7 +17,7 @@ void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec); + unsigned char multicast_spec, u8 protocol_version); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 7871ed6d3825..5ee1d43f1310 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -50,21 +50,40 @@ struct hsr_frame_info { */ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) { - struct hsr_ethhdr_sp *hdr; + struct ethhdr *ethHdr; + struct hsr_sup_tag *hsrSupTag; + struct hsrv1_ethhdr_sp *hsrV1Hdr; WARN_ON_ONCE(!skb_mac_header_was_set(skb)); - hdr = (struct hsr_ethhdr_sp *) skb_mac_header(skb); + ethHdr = (struct ethhdr *) skb_mac_header(skb); - if (!ether_addr_equal(hdr->ethhdr.h_dest, + /* Correct addr? */ + if (!ether_addr_equal(ethHdr->h_dest, hsr->sup_multicast_addr)) return false; - if (get_hsr_stag_path(&hdr->hsr_sup) != 0x0f) + /* Correct ether type?. */ + if (!(ethHdr->h_proto == htons(ETH_P_PRP) + || ethHdr->h_proto == htons(ETH_P_HSR))) return false; - if ((hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_ANNOUNCE) && - (hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + + /* Get the supervision header from correct location. */ + if (ethHdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */ + hsrV1Hdr = (struct hsrv1_ethhdr_sp *) skb_mac_header(skb); + if (hsrV1Hdr->hsr.encap_proto != htons(ETH_P_PRP)) + return false; + + hsrSupTag = &hsrV1Hdr->hsr_sup; + } else { + hsrSupTag = &((struct hsrv0_ethhdr_sp *) skb_mac_header(skb))->hsr_sup; + } + + if ((hsrSupTag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && + (hsrSupTag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) return false; - if (hdr->hsr_sup.HSR_TLV_Length != 12) + if ((hsrSupTag->HSR_TLV_Length != 12) && + (hsrSupTag->HSR_TLV_Length != + sizeof(struct hsr_sup_payload))) return false; return true; @@ -110,7 +129,7 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, - struct hsr_port *port) + struct hsr_port *port, u8 protoVersion) { struct hsr_ethhdr *hsr_ethhdr; int lane_id; @@ -131,7 +150,8 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; - hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); + hsr_ethhdr->ethhdr.h_proto = htons(protoVersion ? + ETH_P_HSR : ETH_P_PRP); } static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, @@ -160,7 +180,7 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, memmove(dst, src, movelen); skb_reset_mac_header(skb); - hsr_fill_tag(skb, frame, port); + hsr_fill_tag(skb, frame, port, port->hsr->protVersion); return skb; } @@ -320,7 +340,8 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame, /* FIXME: */ WARN_ONCE(1, "HSR: VLAN not yet supported"); } - if (ethhdr->h_proto == htons(ETH_P_PRP)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) + || ethhdr->h_proto == htons(ETH_P_HSR)) { frame->skb_std = NULL; frame->skb_hsr = skb; frame->sequence_nr = hsr_get_skb_sequence_nr(skb); diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index bace124d14ef..7ea925816f79 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -177,17 +177,17 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, return node; } - if (!is_sup) - return NULL; /* Only supervision frame may create node entry */ + /* Everyone may create a node entry, connected node to a HSR device. */ - if (ethhdr->h_proto == htons(ETH_P_PRP)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) + || ethhdr->h_proto == htons(ETH_P_HSR)) { /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ seq_out = hsr_get_skb_sequence_nr(skb) - 1; } else { WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); - seq_out = 0; + seq_out = HSR_SEQNR_START; } return hsr_add_node(node_db, ethhdr->h_source, seq_out); @@ -200,17 +200,25 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, struct hsr_port *port_rcv) { + struct ethhdr *ethhdr; struct hsr_node *node_real; struct hsr_sup_payload *hsr_sp; struct list_head *node_db; int i; - skb_pull(skb, sizeof(struct hsr_ethhdr_sp)); - hsr_sp = (struct hsr_sup_payload *) skb->data; + ethhdr = (struct ethhdr *) skb_mac_header(skb); - if (ether_addr_equal(eth_hdr(skb)->h_source, hsr_sp->MacAddressA)) - /* Not sent from MacAddressB of a PICS_SUBS capable node */ - goto done; + /* Leave the ethernet header. */ + skb_pull(skb, sizeof(struct ethhdr)); + + /* And leave the HSR tag. */ + if (ethhdr->h_proto == htons(ETH_P_HSR)) + skb_pull(skb, sizeof(struct hsr_tag)); + + /* And leave the HSR sup tag. */ + skb_pull(skb, sizeof(struct hsr_sup_tag)); + + hsr_sp = (struct hsr_sup_payload *) skb->data; /* Merge node_curr (registered on MacAddressB) into node_real */ node_db = &port_rcv->hsr->node_db; @@ -225,7 +233,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, /* Node has already been merged */ goto done; - ether_addr_copy(node_real->MacAddressB, eth_hdr(skb)->h_source); + ether_addr_copy(node_real->MacAddressB, ethhdr->h_source); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { @@ -241,7 +249,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, kfree_rcu(node_curr, rcu_head); done: - skb_push(skb, sizeof(struct hsr_ethhdr_sp)); + skb_push(skb, sizeof(struct hsrv1_ethhdr_sp)); } diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 5a9c69962ded..9b9909e89e9e 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -30,6 +30,7 @@ */ #define MAX_SLAVE_DIFF 3000 /* ms */ #define HSR_SEQNR_START (USHRT_MAX - 1024) +#define HSR_SUP_SEQNR_START (HSR_SEQNR_START / 2) /* How often shall we check for broken ring and remove node entries older than @@ -58,6 +59,8 @@ struct hsr_tag { #define HSR_HLEN 6 +#define HSR_V1_SUP_LSDUSIZE 52 + /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or * equivalently, the 4 most significant bits of HSR tag byte 14). @@ -131,8 +134,14 @@ static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver) set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver); } -struct hsr_ethhdr_sp { +struct hsrv0_ethhdr_sp { + struct ethhdr ethhdr; + struct hsr_sup_tag hsr_sup; +} __packed; + +struct hsrv1_ethhdr_sp { struct ethhdr ethhdr; + struct hsr_tag hsr; struct hsr_sup_tag hsr_sup; } __packed; @@ -162,6 +171,8 @@ struct hsr_priv { struct timer_list prune_timer; int announce_count; u16 sequence_nr; + u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ + u8 protVersion; /* Indicate if HSRv0 or HSRv1. */ spinlock_t seqnr_lock; /* locking for sequence_nr */ unsigned char sup_multicast_addr[ETH_ALEN]; }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index a2c7e4c0ac1e..d4d1617f43a8 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,7 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, - [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_VERSION] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN }, [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -35,7 +36,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_device *link[2]; - unsigned char multicast_spec; + unsigned char multicast_spec, hsr_version; if (!data) { netdev_info(dev, "HSR: No slave devices specified\n"); @@ -62,7 +63,12 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); - return hsr_dev_finalize(dev, link, multicast_spec); + if (!data[IFLA_HSR_VERSION]) + hsr_version = 0; + else + hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); + + return hsr_dev_finalize(dev, link, multicast_spec, hsr_version); } static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) @@ -115,10 +121,9 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { /* attribute policy */ -/* NLA_BINARY missing in libnl; use NLA_UNSPEC in userspace instead. */ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { - [HSR_A_NODE_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, - [HSR_A_NODE_ADDR_B] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [HSR_A_NODE_ADDR] = { .len = ETH_ALEN }, + [HSR_A_NODE_ADDR_B] = { .len = ETH_ALEN }, [HSR_A_IFINDEX] = { .type = NLA_U32 }, [HSR_A_IF1_AGE] = { .type = NLA_U32 }, [HSR_A_IF2_AGE] = { .type = NLA_U32 }, diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 7d37366cc695..f5b60388d02f 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -22,6 +22,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct hsr_port *port; + u16 protocol; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: skb invalid", __func__); @@ -37,7 +38,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) goto finish_consume; } - if (eth_hdr(skb)->h_proto != htons(ETH_P_PRP)) + protocol = eth_hdr(skb)->h_proto; + if (protocol != htons(ETH_P_PRP) && protocol != htons(ETH_P_HSR)) goto finish_pass; skb_push(skb, ETH_HLEN); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e481992dbae..2e6e65fc4d20 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, + .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, @@ -1106,7 +1107,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct ip_options_rcu *inet_opt; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; @@ -1194,12 +1195,12 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { + bool udpfrag = false, fixedid = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; - bool udpfrag, encap; struct iphdr *iph; - int proto; + int proto, tot_len; int nhoff; int ihl; int id; @@ -1216,7 +1217,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TUNNEL_REMCSUM | + SKB_GSO_PARTIAL | 0))) goto out; @@ -1247,11 +1250,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - if (skb->encapsulation && - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) - udpfrag = proto == IPPROTO_UDP && encap; - else - udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + if (!skb->encapsulation || encap) { + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); + fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); + + /* fixed ID is invalid if DF bit is not set */ + if (fixedid && !(iph->frag_off & htons(IP_DF))) + goto out; + } ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) @@ -1264,15 +1270,25 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); if (udpfrag) { - iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; + tot_len = skb->len - nhoff; + } else if (skb_is_gso(skb)) { + if (!fixedid) { + iph->id = htons(id); + id += skb_shinfo(skb)->gso_segs; + } + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; } else { - iph->id = htons(id++); + if (!fixedid) + iph->id = htons(id++); + tot_len = skb->len - nhoff; } - iph->tot_len = htons(skb->len - nhoff); + iph->tot_len = htons(tot_len); ip_send_check(iph); if (encap) skb_reset_inner_headers(skb); @@ -1324,6 +1340,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, for (p = *head; p; p = p->next) { struct iphdr *iph2; + u16 flush_id; if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -1347,16 +1364,36 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, (iph->tos ^ iph2->tos) | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); - /* Save the IP ID check to be included later when we get to - * the transport layer so only the inner most IP ID is checked. - * This is because some GSO/TSO implementations do not - * correctly increment the IP ID for the outer hdrs. - */ - NAPI_GRO_CB(p)->flush_id = - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; + + /* We need to store of the IP ID check to be included later + * when we can verify that this packet does in fact belong + * to a given flow. + */ + flush_id = (u16)(id - ntohs(iph2->id)); + + /* This bit of code makes it much easier for us to identify + * the cases where we are doing atomic vs non-atomic IP ID + * checks. Specifically an atomic check can return IP ID + * values 0 - 0xFFFF, while a non-atomic check can only + * return 0 or 0xFFFF. + */ + if (!NAPI_GRO_CB(p)->is_atomic || + !(iph->frag_off & htons(IP_DF))) { + flush_id ^= NAPI_GRO_CB(p)->count; + flush_id = flush_id ? 0xFFFF : 0; + } + + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = flush_id; + else + NAPI_GRO_CB(p)->flush_id |= flush_id; } + NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF)); NAPI_GRO_CB(skb)->flush |= flush; skb_set_network_header(skb, off); /* The above will be needed by the transport layer if there is one diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bdb2a07ec363..40d6b87713a1 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1933,7 +1933,8 @@ int cipso_v4_sock_setattr(struct sock *sk, sk_inet = inet_sk(sk); - old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk)); + old = rcu_dereference_protected(sk_inet->inet_opt, + lockdep_sock_is_held(sk)); if (sk_inet->is_icsk) { sk_conn = inet_csk(sk); if (old) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d97268e8ff10..ab64d9f2eef9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1559,21 +1559,45 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) } #ifdef CONFIG_IP_ROUTE_MULTIPATH +static bool fib_good_nh(const struct fib_nh *nh) +{ + int state = NUD_REACHABLE; + + if (nh->nh_scope == RT_SCOPE_LINK) { + struct neighbour *n; + + rcu_read_lock_bh(); + + n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw); + if (n) + state = n->nud_state; + + rcu_read_unlock_bh(); + } + + return !!(state & NUD_VALID); +} void fib_select_multipath(struct fib_result *res, int hash) { struct fib_info *fi = res->fi; + struct net *net = fi->fib_net; + bool first = false; for_nexthops(fi) { if (hash > atomic_read(&nh->nh_upper_bound)) continue; - res->nh_sel = nhsel; - return; + if (!net->ipv4.sysctl_fib_multipath_use_neigh || + fib_good_nh(nh)) { + res->nh_sel = nhsel; + return; + } + if (!first) { + res->nh_sel = nhsel; + first = true; + } } endfor_nexthops(fi); - - /* Race condition: route has just become dead. */ - res->nh_sel = 0; } #endif diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a39068b4a4d9..7ac5ec87b004 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -22,7 +22,6 @@ struct fou { u8 flags; __be16 port; u16 type; - struct udp_offload udp_offloads; struct list_head list; struct rcu_head rcu; }; @@ -186,13 +185,13 @@ drop: return 0; } -static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) +static struct sk_buff **fou_gro_receive(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; - u8 proto = NAPI_GRO_CB(skb)->proto; + u8 proto = fou_from_sock(sk)->protocol; const struct net_offload **offloads; /* We can clear the encap_mark for FOU as we are essentially doing @@ -220,11 +219,11 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) +static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, + int nhoff) { const struct net_offload *ops; - u8 proto = NAPI_GRO_CB(skb)->proto; + u8 proto = fou_from_sock(sk)->protocol; int err = -ENOSYS; const struct net_offload **offloads; @@ -267,9 +266,9 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return guehdr; } -static struct sk_buff **gue_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) +static struct sk_buff **gue_gro_receive(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) { const struct net_offload **offloads; const struct net_offload *ops; @@ -280,7 +279,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, void *data; u16 doffset = 0; int flush = 1; - struct fou *fou = container_of(uoff, struct fou, udp_offloads); + struct fou *fou = fou_from_sock(sk); struct gro_remcsum grc; skb_gro_remcsum_init(&grc); @@ -392,8 +391,7 @@ out: return pp; } -static int gue_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) +static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { const struct net_offload **offloads; struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); @@ -441,10 +439,7 @@ static int fou_add_to_port_list(struct net *net, struct fou *fou) static void fou_release(struct fou *fou) { struct socket *sock = fou->sock; - struct sock *sk = sock->sk; - if (sk->sk_family == AF_INET) - udp_del_offload(&fou->udp_offloads); list_del(&fou->list); udp_tunnel_sock_release(sock); @@ -454,11 +449,9 @@ static void fou_release(struct fou *fou) static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) { udp_sk(sk)->encap_rcv = fou_udp_recv; - fou->protocol = cfg->protocol; - fou->udp_offloads.callbacks.gro_receive = fou_gro_receive; - fou->udp_offloads.callbacks.gro_complete = fou_gro_complete; - fou->udp_offloads.port = cfg->udp_config.local_udp_port; - fou->udp_offloads.ipproto = cfg->protocol; + udp_sk(sk)->gro_receive = fou_gro_receive; + udp_sk(sk)->gro_complete = fou_gro_complete; + fou_from_sock(sk)->protocol = cfg->protocol; return 0; } @@ -466,9 +459,8 @@ static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) { udp_sk(sk)->encap_rcv = gue_udp_recv; - fou->udp_offloads.callbacks.gro_receive = gue_gro_receive; - fou->udp_offloads.callbacks.gro_complete = gue_gro_complete; - fou->udp_offloads.port = cfg->udp_config.local_udp_port; + udp_sk(sk)->gro_receive = gue_gro_receive; + udp_sk(sk)->gro_complete = gue_gro_complete; return 0; } @@ -527,12 +519,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; - if (cfg->udp_config.family == AF_INET) { - err = udp_add_offload(net, &fou->udp_offloads); - if (err) - goto error; - } - err = fou_add_to_port_list(net, fou); if (err) goto error; @@ -816,11 +802,11 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; __be16 sport; + int err; - skb = iptunnel_handle_offloads(skb, type); - - if (IS_ERR(skb)) - return PTR_ERR(skb); + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), skb, 0, 0, false); @@ -840,6 +826,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, __be16 sport; void *data; bool need_priv = false; + int err; if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -850,10 +837,9 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, optlen += need_priv ? GUE_LEN_PRIV : 0; - skb = iptunnel_handle_offloads(skb, type); - - if (IS_ERR(skb)) - return PTR_ERR(skb); + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; /* Get source port (based on flow hash) before skb_push */ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6a5bd4317866..e88190a8699a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -32,10 +32,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT))) + SKB_GSO_SIT | + SKB_GSO_PARTIAL))) goto out; if (!skb->encapsulation) @@ -86,7 +88,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb = segs; do { struct gre_base_hdr *greh; - __be32 *pcsum; + __sum16 *pcsum; /* Set up inner headers if we are offloading inner checksum */ if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -106,10 +108,25 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, continue; greh = (struct gre_base_hdr *)skb_transport_header(skb); - pcsum = (__be32 *)(greh + 1); + pcsum = (__sum16 *)(greh + 1); + + if (skb_is_gso(skb)) { + unsigned int partial_adj; + + /* Adjust checksum to account for the fact that + * the partial checksum is based on actual size + * whereas headers should be based on MSS size. + */ + partial_adj = skb->len + skb_headroom(skb) - + SKB_GSO_CB(skb)->data_offset - + skb_shinfo(skb)->gso_size; + *pcsum = ~csum_fold((__force __wsum)htonl(partial_adj)); + } else { + *pcsum = 0; + } - *pcsum = 0; - *(__sum16 *)pcsum = gso_make_checksum(skb, 0); + *(pcsum + 1) = 0; + *pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; @@ -275,6 +292,18 @@ static const struct net_offload gre_offload = { static int __init gre_offload_init(void) { - return inet_add_offload(&gre_offload, IPPROTO_GRE); + int err; + + err = inet_add_offload(&gre_offload, IPPROTO_GRE); +#if IS_ENABLED(CONFIG_IPV6) + if (err) + return err; + + err = inet6_add_offload(&gre_offload, IPPROTO_GRE); + if (err) + inet_del_offload(&gre_offload, IPPROTO_GRE); +#endif + + return err; } device_initcall(gre_offload_init); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bc5196ea1bdf..ab69da2d2a77 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -661,6 +661,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; + /* listeners have SOCK_RCU_FREE, not the children */ + sock_reset_flag(newsk, SOCK_RCU_FREE); + newsk->sk_mark = inet_rsk(req)->ir_mark; atomic64_set(&newsk->sk_cookie, atomic64_read(&inet_rsk(req)->ir_cookie)); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5fdb02f5598e..ad7956fa659a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -66,7 +66,7 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) mutex_unlock(&inet_diag_table_mutex); } -static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) { r->idiag_family = sk->sk_family; @@ -89,6 +89,7 @@ static void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) r->id.idiag_dst[0] = sk->sk_daddr; } } +EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); static size_t inet_sk_attr_size(void) { @@ -104,13 +105,50 @@ static size_t inet_sk_attr_size(void) + 64; } +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns) +{ + const struct inet_sock *inet = inet_sk(sk); + + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, + * hence this needs to be included regardless of socket family. + */ + if (ext & (1 << (INET_DIAG_TOS - 1))) + if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) + goto errout; + +#if IS_ENABLED(CONFIG_IPV6) + if (r->idiag_family == AF_INET6) { + if (ext & (1 << (INET_DIAG_TCLASS - 1))) + if (nla_put_u8(skb, INET_DIAG_TCLASS, + inet6_sk(sk)->tclass) < 0) + goto errout; + + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) + goto errout; + } +#endif + + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + r->idiag_inode = sock_i_ino(sk); + + return 0; +errout: + return 1; +} +EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - const struct inet_sock *inet = inet_sk(sk); const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; int ext = req->idiag_ext; @@ -135,32 +173,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) goto errout; - /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, - * hence this needs to be included regardless of socket family. - */ - if (ext & (1 << (INET_DIAG_TOS - 1))) - if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) - goto errout; - -#if IS_ENABLED(CONFIG_IPV6) - if (r->idiag_family == AF_INET6) { - if (ext & (1 << (INET_DIAG_TCLASS - 1))) - if (nla_put_u8(skb, INET_DIAG_TCLASS, - inet6_sk(sk)->tclass) < 0) - goto errout; - - if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) - goto errout; - } -#endif - - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); - r->idiag_inode = sock_i_ino(sk); - if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), @@ -182,27 +197,27 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto out; } -#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_expires = + jiffies_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_expires = + jiffies_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); + r->idiag_expires = + jiffies_to_msecs(sk->sk_timer.expires - jiffies); } else { r->idiag_timer = 0; r->idiag_expires = 0; } -#undef EXPIRES_IN_MS if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { attr = nla_reserve(skb, INET_DIAG_INFO, @@ -356,6 +371,7 @@ struct sock *inet_diag_find_one_icsk(struct net *net, { struct sock *sk; + rcu_read_lock(); if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -376,9 +392,11 @@ struct sock *inet_diag_find_one_icsk(struct net *net, req->id.idiag_if); } #endif - else + else { + rcu_read_unlock(); return ERR_PTR(-EINVAL); - + } + rcu_read_unlock(); if (!sk) return ERR_PTR(-ENOENT); @@ -772,13 +790,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock_bh(&ilb->lock); - sk_nulls_for_each(sk, node, &ilb->head) { + sk_for_each(sk, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bc68eced0105..fcadb670f50b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -198,13 +198,13 @@ static inline int compute_score(struct sock *sk, struct net *net, } /* - * Don't inline this cruft. Here are some nice properties to exploit here. The - * BSD API does not allow a listening sock to specify the remote port nor the + * Here are some nice properties to exploit here. The BSD API + * does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ - +/* called with rcu_read_lock() : No refcount taken on the socket */ struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -212,38 +212,27 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore, matches = 0, reuseport = 0; - bool select_ok = true; + int score, hiscore = 0, matches = 0, reuseport = 0; + struct sock *sk, *result = NULL; u32 phash = 0; - rcu_read_lock(); -begin: - result = NULL; - hiscore = 0; - sk_nulls_for_each_rcu(sk, node, &ilb->head) { + sk_for_each_rcu(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { - result = sk; - hiscore = score; reuseport = sk->sk_reuseport; if (reuseport) { phash = inet_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, phash, - skb, doff); - if (sk2) { - result = sk2; - goto found; - } - } + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; matches = 1; } + result = sk; + hiscore = score; } else if (score == hiscore && reuseport) { matches++; if (reciprocal_scale(phash, matches) == 0) @@ -251,25 +240,6 @@ begin: phash = next_pseudo_random32(phash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, daddr, - dif) < hiscore)) { - sock_put(result); - select_ok = false; - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -312,7 +282,6 @@ struct sock *__inet_lookup_established(struct net *net, unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - rcu_read_lock(); begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) @@ -339,7 +308,6 @@ begin: out: sk = NULL; found: - rcu_read_unlock(); return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -471,10 +439,9 @@ static int inet_reuseport_add_sock(struct sock *sk, bool match_wildcard)) { struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); - sk_nulls_for_each_rcu(sk2, node, &ilb->head) { + sk_for_each_rcu(sk2, &ilb->head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -512,7 +479,8 @@ int __inet_hash(struct sock *sk, struct sock *osk, if (err) goto unlock; } - __sk_nulls_add_node_rcu(sk, &ilb->head); + hlist_add_head_rcu(&sk->sk_node, &ilb->head); + sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb->lock); @@ -539,20 +507,25 @@ void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; spinlock_t *lock; + bool listener = false; int done; if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) + if (sk->sk_state == TCP_LISTEN) { lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; - else + listener = true; + } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - + } spin_lock_bh(lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - done = __sk_nulls_del_node_init_rcu(sk); + if (listener) + done = __sk_del_node_init(sk); + else + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); @@ -688,9 +661,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, - i + LISTENING_NULLS_BASE); - } + INIT_HLIST_HEAD(&h->listening_hash[i].head); + } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index af5d1f38217f..eedd829a2f87 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -500,8 +500,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } -static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, - bool csum) +static int gre_handle_offloads(struct sk_buff *skb, bool csum) { return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } @@ -568,11 +567,8 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) } /* Push Tunnel header. */ - skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)); - if (IS_ERR(skb)) { - skb = NULL; + if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) goto err_free_rt; - } flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), @@ -640,16 +636,14 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, tnl_params = &tunnel->parms.iph; } - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; + if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + goto free_skb; __gre_xmit(skb, dev, tnl_params, skb->protocol); return NETDEV_TX_OK; free_skb: kfree_skb(skb); -out: dev->stats.tx_dropped++; return NETDEV_TX_OK; } @@ -664,9 +658,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; + if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + goto free_skb; if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; @@ -676,7 +669,6 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, free_skb: kfree_skb(skb); -out: dev->stats.tx_dropped++; return NETDEV_TX_OK; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 035ad645a8d9..279471c4e58f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -106,7 +106,8 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb->data, offset, 0)); + csum = csum_sub(csum, csum_partial(skb_transport_header(skb), + offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } @@ -219,11 +220,12 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } EXPORT_SYMBOL(ip_cmsg_recv_offset); -int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, +int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6) { int err, val; struct cmsghdr *cmsg; + struct net *net = sock_net(sk); for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) @@ -244,6 +246,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, continue; } #endif + if (cmsg->cmsg_level == SOL_SOCKET) { + if (__sock_cmsg_send(sk, msg, cmsg, &ipc->sockc)) + return -EINVAL; + continue; + } + if (cmsg->cmsg_level != SOL_IP) continue; switch (cmsg->cmsg_type) { @@ -635,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (err) break; old = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); #if IS_ENABLED(CONFIG_IPV6) @@ -1295,7 +1303,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, struct ip_options_rcu *inet_opt; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); opt->optlen = 0; if (inet_opt) memcpy(optbuf, &inet_opt->opt, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6165f30c4d72..f46c5c873831 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,15 +86,15 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(iptunnel_xmit); -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, - bool xnet) +int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool raw_proto, bool xnet) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; skb_pull_rcsum(skb, hdr_len); - if (inner_proto == htons(ETH_P_TEB)) { + if (!raw_proto && inner_proto == htons(ETH_P_TEB)) { struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) @@ -117,7 +117,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, return iptunnel_pull_offloads(skb); } -EXPORT_SYMBOL_GPL(iptunnel_pull_header); +EXPORT_SYMBOL_GPL(__iptunnel_pull_header); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags) @@ -146,8 +146,8 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, } EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); -struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, - int gso_type_mask) +int iptunnel_handle_offloads(struct sk_buff *skb, + int gso_type_mask) { int err; @@ -159,9 +159,9 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, if (skb_is_gso(skb)) { err = skb_unclone(skb, GFP_ATOMIC); if (unlikely(err)) - goto error; + return err; skb_shinfo(skb)->gso_type |= gso_type_mask; - return skb; + return 0; } if (skb->ip_summed != CHECKSUM_PARTIAL) { @@ -174,10 +174,7 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, skb->encapsulation = 0; } - return skb; -error: - kfree_skb(skb); - return ERR_PTR(err); + return 0; } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); @@ -247,10 +244,10 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) - tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); + tun_info->key.u.ipv4.dst = nla_get_in_addr(tb[LWTUNNEL_IP_DST]); if (tb[LWTUNNEL_IP_SRC]) - tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]); + tun_info->key.u.ipv4.src = nla_get_in_addr(tb[LWTUNNEL_IP_SRC]); if (tb[LWTUNNEL_IP_TTL]) tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); @@ -275,8 +272,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || - nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || - nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || + nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || + nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ec51d02166de..92827483ee3d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -219,9 +219,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); - if (IS_ERR(skb)) - goto out; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); @@ -230,7 +229,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) tx_error: kfree_skb(skb); -out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 395e2814a46d..21a38e296fe2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2104,7 +2104,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) return -EMSGSIZE; rtm->rtm_type = RTN_MULTICAST; @@ -2237,7 +2237,7 @@ static size_t mroute_msgsize(bool unresolved, int maxvif) + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ - + nla_total_size(sizeof(struct rta_mfc_stats)) + + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index cf9700b1a106..66ddcb60519a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -737,6 +737,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* no remote port */ } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; @@ -744,10 +745,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.ttl = 0; ipc.tos = -1; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); + err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); return err; @@ -768,6 +767,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rcu_read_unlock(); } + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + saddr = ipc.addr; ipc.addr = faddr = daddr; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8d22de74080c..438f50c1a676 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -339,8 +339,8 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, struct msghdr *msg, size_t length, - struct rtable **rtp, - unsigned int flags) + struct rtable **rtp, unsigned int flags, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); @@ -379,7 +379,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb->transport_header = skb->network_header; err = -EFAULT; @@ -540,6 +540,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = inet->inet_daddr; } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -548,7 +549,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(net, msg, &ipc, false); + err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); goto out; @@ -638,10 +639,10 @@ back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, - &rt, msg->msg_flags); + &rt, msg->msg_flags, &ipc.sockc); else { - sock_tx_timestamp(sk, &ipc.tx_flags); + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); if (!ipc.addr) ipc.addr = fl4.daddr; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1e1fe6086dd9..bb0419582b8d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -960,6 +960,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_IP_ROUTE_MULTIPATH + { + .procname = "fib_multipath_use_neigh", + .data = &init_net.ipv4.sysctl_fib_multipath_use_neigh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 08b8b960a8ed..4d73858991af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -428,14 +428,16 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) +static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { - if (sk->sk_tsflags) { + if (sk->sk_tsflags || tsflags) { struct skb_shared_info *shinfo = skb_shinfo(skb); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - sock_tx_timestamp(sk, &shinfo->tx_flags); + sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; + tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP); } } @@ -957,7 +959,7 @@ new_segment: offset += copy; size -= copy; if (!size) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sk->sk_tsflags, skb); goto out; } @@ -1077,6 +1079,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool sg; @@ -1119,6 +1122,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } + sockc.tsflags = sk->sk_tsflags; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_err; + } + } + /* This should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -1237,7 +1249,7 @@ new_segment: copied += copy; if (!msg_data_left(msg)) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sockc.tsflags, skb); goto out; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c124c3c12f7c..75e8336f6ecd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2253,16 +2253,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) } } -/* CWND moderation, preventing bursts due to too big ACKs - * in dubious situations. - */ -static inline void tcp_moderate_cwnd(struct tcp_sock *tp) -{ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + tcp_max_burst(tp)); - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) { return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && @@ -2411,7 +2401,6 @@ static bool tcp_try_undo_recovery(struct sock *sk) /* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ - tcp_moderate_cwnd(tp); if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; return true; @@ -3094,7 +3083,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, const struct skb_shared_info *shinfo; /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */ - if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))) + if (likely(!TCP_SKB_CB(skb)->txstamp_ack)) return; shinfo = skb_shinfo(skb); @@ -4320,6 +4309,12 @@ static bool tcp_try_coalesce(struct sock *sk, return true; } +static void tcp_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + __kfree_skb(skb); +} + /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ @@ -4344,7 +4339,7 @@ static void tcp_ofo_queue(struct sock *sk) __skb_unlink(skb, &tp->out_of_order_queue); if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { SOCK_DEBUG(sk, "ofo packet was already received\n"); - __kfree_skb(skb); + tcp_drop(sk, skb); continue; } SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n", @@ -4396,7 +4391,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); - __kfree_skb(skb); + tcp_drop(sk, skb); return; } @@ -4460,7 +4455,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb); + tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4499,7 +4494,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb1); + tcp_drop(sk, skb1); } add_sack: @@ -4582,12 +4577,13 @@ err: static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - int eaten = -1; bool fragstolen = false; + int eaten = -1; - if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) - goto drop; - + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { + __kfree_skb(skb); + return; + } skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); @@ -4669,7 +4665,7 @@ out_of_window: tcp_enter_quickack_mode(sk); inet_csk_schedule_ack(sk); drop: - __kfree_skb(skb); + tcp_drop(sk, skb); return; } @@ -5246,7 +5242,7 @@ syn_challenge: return true; discard: - __kfree_skb(skb); + tcp_drop(sk, skb); return false; } @@ -5464,7 +5460,7 @@ csum_error: TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); discard: - __kfree_skb(skb); + tcp_drop(sk, skb); } EXPORT_SYMBOL(tcp_rcv_established); @@ -5695,7 +5691,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, TCP_DELACK_MAX, TCP_RTO_MAX); discard: - __kfree_skb(skb); + tcp_drop(sk, skb); return 0; } else { tcp_send_ack(sk); @@ -5802,8 +5798,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) int queued = 0; bool acceptable; - tp->rx_opt.saw_tstamp = 0; - switch (sk->sk_state) { case TCP_CLOSE: goto discard; @@ -5844,6 +5838,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) goto discard; case TCP_SYN_SENT: + tp->rx_opt.saw_tstamp = 0; queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; @@ -5855,6 +5850,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; } + tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && @@ -6056,7 +6052,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!queued) { discard: - __kfree_skb(skb); + tcp_drop(sk, skb); } return 0; } @@ -6333,7 +6329,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, - &foc, false); + &foc, TCP_SYNACK_FASTOPEN); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, fastopen_sk); sk->sk_data_ready(sk); @@ -6343,10 +6339,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; if (!want_cookie) inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - af_ops->send_synack(sk, dst, &fl, req, - &foc, !want_cookie); - if (want_cookie) - goto drop_and_free; + af_ops->send_synack(sk, dst, &fl, req, &foc, + !want_cookie ? TCP_SYNACK_NORMAL : + TCP_SYNACK_COOKIE); + if (want_cookie) { + reqsk_free(req); + return 0; + } } reqsk_put(req); return 0; @@ -6356,7 +6355,7 @@ drop_and_release: drop_and_free: reqsk_free(req); drop: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return 0; } EXPORT_SYMBOL(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad450509029b..d2a5763e5abc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -157,7 +157,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = daddr = usin->sin_addr.s_addr; inet_opt = rcu_dereference_protected(inet->inet_opt, - sock_owned_by_user(sk)); + lockdep_sock_is_held(sk)); if (inet_opt && inet_opt->opt.srr) { if (!daddr) return -EINVAL; @@ -329,7 +329,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) * errors returned from accept(). */ inet_csk_reqsk_queue_drop(req->rsk_listener, req); - NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); + tcp_listendrop(req->rsk_listener); } reqsk_put(req); } @@ -628,6 +628,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) @@ -646,16 +647,18 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ntohs(th->source), inet_iif(skb)); /* don't send rst if it can't find key */ if (!sk1) - return; - rcu_read_lock(); + goto out; + key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) &ip_hdr(skb)->saddr, AF_INET); if (!key) - goto release_sk1; + goto out; + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) - goto release_sk1; + goto out; + } if (key) { @@ -698,11 +701,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); #ifdef CONFIG_TCP_MD5SIG -release_sk1: - if (sk1) { - rcu_read_unlock(); - sock_put(sk1); - } +out: + rcu_read_unlock(); #endif } @@ -830,7 +830,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -841,7 +841,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, foc, attach_req); + skb = tcp_make_synack(sk, dst, req, foc, synack_type); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -882,8 +882,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, - sock_owned_by_user(sk) || - lockdep_is_held((spinlock_t *)&sk->sk_lock.slock)); + lockdep_sock_is_held(sk)); if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) @@ -928,8 +927,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk) || - lockdep_is_held(&sk->sk_lock.slock)); + lockdep_sock_is_held(sk)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -1246,7 +1244,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) &tcp_request_sock_ipv4_ops, sk, skb); drop: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return 0; } EXPORT_SYMBOL(tcp_v4_conn_request); @@ -1348,7 +1346,7 @@ exit_overflow: exit_nonewsk: dst_release(dst); exit: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return NULL; put_and_exit: inet_csk_prepare_forced_close(newsk); @@ -1538,11 +1536,12 @@ EXPORT_SYMBOL(tcp_prequeue); int tcp_v4_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; + bool refcounted; struct sock *sk; int ret; - struct net *net = dev_net(skb->dev); if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1588,7 +1587,7 @@ int tcp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, - th->dest); + th->dest, &refcounted); if (!sk) goto no_tcp_socket; @@ -1609,7 +1608,11 @@ process: inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + /* We own a reference on the listener, increase it again + * as we might lose it too soon. + */ sock_hold(sk); + refcounted = true; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1665,7 +1668,8 @@ process: bh_unlock_sock(sk); put_and_return: - sock_put(sk); + if (refcounted) + sock_put(sk); return ret; @@ -1688,7 +1692,9 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + sk_drops_add(sk, skb); + if (refcounted) + sock_put(sk); goto discard_it; do_time_wait: @@ -1712,6 +1718,7 @@ do_time_wait: if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; + refcounted = false; goto process; } /* Fall through to ACK */ @@ -1845,17 +1852,17 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); */ static void *listening_get_next(struct seq_file *seq, void *cur) { - struct inet_connection_sock *icsk; - struct hlist_nulls_node *node; - struct sock *sk = cur; - struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); + struct inet_listen_hashbucket *ilb; + struct inet_connection_sock *icsk; + struct sock *sk = cur; if (!sk) { +get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); - sk = sk_nulls_head(&ilb->head); + sk = sk_head(&ilb->head); st->offset = 0; goto get_sk; } @@ -1863,28 +1870,20 @@ static void *listening_get_next(struct seq_file *seq, void *cur) ++st->num; ++st->offset; - sk = sk_nulls_next(sk); + sk = sk_next(sk); get_sk: - sk_nulls_for_each_from(sk, node) { + sk_for_each_from(sk) { if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_family == st->family) { - cur = sk; - goto out; - } + if (sk->sk_family == st->family) + return sk; icsk = inet_csk(sk); } spin_unlock_bh(&ilb->lock); st->offset = 0; - if (++st->bucket < INET_LHTABLE_SIZE) { - ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock_bh(&ilb->lock); - sk = sk_nulls_head(&ilb->head); - goto get_sk; - } - cur = NULL; -out: - return cur; + if (++st->bucket < INET_LHTABLE_SIZE) + goto get_head; + return NULL; } static void *listening_get_idx(struct seq_file *seq, loff_t *pos) @@ -2383,6 +2382,7 @@ static int __net_init tcp_sk_init(struct net *net) IPPROTO_TCP, net); if (res) goto fail; + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index acb366dd61e6..4c53e7c86586 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -704,7 +704,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) { /* Out of window: send ACK and drop. */ - if (!(flg & TCP_FLAG_RST)) + if (!(flg & TCP_FLAG_RST) && + !tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDSYNRECV, + &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 773083b7f1e9..02737b607aa7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -89,6 +89,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, ~(SKB_GSO_TCPV4 | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | @@ -98,7 +99,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + !(type & (SKB_GSO_TCPV4 | + SKB_GSO_TCPV6)))) goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); @@ -107,6 +109,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } + /* GSO partial only requires splitting the frame into an MSS + * multiple and possibly a remainder. So update the mss now. + */ + if (features & NETIF_F_GSO_PARTIAL) + mss = skb->len - (skb->len % mss); + copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -131,7 +139,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); - do { + while (skb->next) { th->fin = th->psh = 0; th->check = newcheck; @@ -151,7 +159,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->seq = htonl(seq); th->cwr = 0; - } while (skb->next); + } /* Following permits TCP Small Queues to work well with GSO : * The callback to TCP stack will be called at the time last frag @@ -237,7 +245,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) found: /* Include the IP ID check below from the inner most IP hdr */ - flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; + flush = NAPI_GRO_CB(p)->flush; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -246,6 +254,17 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); + /* When we receive our second frame we can made a decision on if we + * continue this flow as an atomic flow with a fixed ID or if we use + * an incrementing ID. + */ + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + mss = skb_shinfo(p)->gso_size; flush |= (len - 1) >= mss; @@ -314,6 +333,9 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (NAPI_GRO_CB(skb)->is_atomic) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + return tcp_gro_complete(skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 441ae9da3a23..96182a2aabc9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2960,7 +2960,7 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk); @@ -2980,14 +2980,22 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); - if (attach_req) { + switch (synack_type) { + case TCP_SYNACK_NORMAL: skb_set_owner_w(skb, req_to_sk(req)); - } else { + break; + case TCP_SYNACK_COOKIE: + /* Under synflood, we do not attach skb to a socket, + * to avoid false sharing. + */ + break; + case TCP_SYNACK_FASTOPEN: /* sk is a const pointer, because we want to express multiple * cpu might call us concurrently. * sk->sk_wmem_alloc in an atomic, we can promote to rw. */ skb_set_owner_w(skb, (struct sock *)sk); + break; } skb_dst_set(skb, dst); @@ -3532,7 +3540,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) int res; tcp_rsk(req)->txhash = net_tx_rndhash(); - res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true); + res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a2e7f55a1f61..76ea0a8be090 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -143,10 +143,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, unsigned int log) { struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); - sk_nulls_for_each(sk2, node, &hslot->head) { + sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (bitmap || udp_sk(sk2)->udp_port_hash == num) && @@ -177,12 +176,11 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, bool match_wildcard)) { struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); - udp_portaddr_for_each_entry(sk2, node, &hslot2->head) { + udp_portaddr_for_each_entry(sk2, &hslot2->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && (udp_sk(sk2)->udp_port_hash == num) && @@ -207,11 +205,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, bool match_wildcard)) { struct net *net = sock_net(sk); - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); struct sock *sk2; - sk_nulls_for_each(sk2, node, &hslot->head) { + sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_family == sk->sk_family && @@ -333,22 +330,23 @@ found: goto fail_unlock; } - sk_nulls_add_node_rcu(sk, &hslot->head); + sk_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock(&hslot2->lock); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && - sk->sk_family == AF_INET6) - hlist_nulls_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, - &hslot2->head); + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); else - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, - &hslot2->head); + hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); hslot2->count++; spin_unlock(&hslot2->lock); } + sock_set_flag(sk, SOCK_RCU_FREE); error = 0; fail_unlock: spin_unlock_bh(&hslot->lock); @@ -502,37 +500,27 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; -begin: result = NULL; badness = 0; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + badness = score; + result = sk; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -540,23 +528,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot2) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score2(result, net, saddr, sport, - daddr, hnum, dif) < badness)) { - sock_put(result); - goto begin; - } - } return result; } @@ -568,15 +539,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; - rcu_read_lock(); if (hslot->count > 10) { hash2 = udp4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; @@ -598,35 +566,27 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, htonl(INADDR_ANY), hnum, dif, hslot2, slot2, skb); } - rcu_read_unlock(); return result; } begin: result = NULL; badness = 0; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -634,25 +594,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score(result, net, saddr, hnum, sport, - daddr, dport, dif) < badness)) { - sock_put(result); - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__udp4_lib_lookup); @@ -668,13 +609,37 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, udptable, skb); } +struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct net_device *dev = + skb_dst(skb) ? skb_dst(skb)->dev : skb->dev; + + return __udp4_lib_lookup(dev_net(dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + &udp_table, skb); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); + +/* Must be called under rcu_read_lock(). + * Does increment socket refcount. + */ +#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, - &udp_table, NULL); + struct sock *sk; + + sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, + dif, &udp_table, NULL); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + return sk; } EXPORT_SYMBOL_GPL(udp4_lib_lookup); +#endif static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, @@ -776,7 +741,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk->sk_err = err; sk->sk_error_report(sk); out: - sock_put(sk); + return; } void udp_err(struct sk_buff *skb, u32 info) @@ -1032,15 +997,13 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ connected = 1; } - ipc.addr = inet->inet_saddr; + ipc.sockc.tsflags = sk->sk_tsflags; + ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc, - sk->sk_family == AF_INET6); + err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); if (unlikely(err)) { kfree(ipc.opt); return err; @@ -1065,6 +1028,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) saddr = ipc.addr; ipc.addr = faddr = daddr; + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; @@ -1316,14 +1281,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) { unsigned int amount = first_packet_length(sk); - if (amount) - /* - * We will only return the amount - * of this packet since that is all - * that will be read. - */ - amount -= sizeof(struct udphdr); - return put_user(amount, (int __user *)arg); } @@ -1347,7 +1304,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, off = 0; + int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; @@ -1357,15 +1314,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: + peeking = off = sk_peek_offset(sk, flags); skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &off, &err); if (!skb) - goto out; + return err; - ulen = skb->len - sizeof(struct udphdr); + ulen = skb->len; copied = len; - if (copied > ulen) - copied = ulen; + if (copied > ulen - off) + copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; @@ -1375,18 +1333,16 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { checksum_valid = !udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), - msg, copied); + err = skb_copy_datagram_msg(skb, off, msg, copied); else { - err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), - msg); + err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; @@ -1399,7 +1355,8 @@ try_again: UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - goto out_free; + skb_free_datagram_locked(sk, skb); + return err; } if (!peeked) @@ -1417,15 +1374,13 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off); err = copied; if (flags & MSG_TRUNC) err = ulen; -out_free: - skb_free_datagram_locked(sk, skb); -out: + __skb_free_datagram_locked(sk, skb, peeking ? -err : err); return err; csum_copy_err: @@ -1479,13 +1434,13 @@ void udp_lib_unhash(struct sock *sk) spin_lock_bh(&hslot->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (sk_nulls_del_node_init_rcu(sk)) { + if (sk_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); } @@ -1518,12 +1473,12 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) if (hslot2 != nhslot2) { spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); spin_lock(&nhslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, &nhslot2->head); nhslot2->count++; spin_unlock(&nhslot2->lock); @@ -1553,7 +1508,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = sock_queue_rcv_skb(sk, skb); + rc = __sock_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1669,10 +1624,14 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter) && - udp_lib_checksum_complete(skb)) - goto csum_error; + if (rcu_access_pointer(sk->sk_filter)) { + if (udp_lib_checksum_complete(skb)) + goto csum_error; + if (sk_filter(sk, skb)) + goto drop; + } + udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); @@ -1702,35 +1661,6 @@ drop: return -1; } -static void flush_stack(struct sock **stack, unsigned int count, - struct sk_buff *skb, unsigned int final) -{ - unsigned int i; - struct sk_buff *skb1 = NULL; - struct sock *sk; - - for (i = 0; i < count; i++) { - sk = stack[i]; - if (likely(!skb1)) - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - - if (!skb1) { - atomic_inc(&sk->sk_drops); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - } - - if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) - skb1 = NULL; - - sock_put(sk); - } - if (unlikely(skb1)) - kfree_skb(skb1); -} - /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ @@ -1754,14 +1684,14 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udp_table *udptable, int proto) { - struct sock *sk, *stack[256 / sizeof(struct sock *)]; - struct hlist_nulls_node *node; + struct sock *sk, *first = NULL; unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - int dif = skb->dev->ifindex; - unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - bool inner_flushed = false; + unsigned int offset = offsetof(typeof(*sk), sk_node); + int dif = skb->dev->ifindex; + struct hlist_node *node; + struct sk_buff *nskb; if (use_hash2) { hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & @@ -1772,23 +1702,28 @@ start_lookup: offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } - spin_lock(&hslot->lock); - sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { - if (__udp_is_mcast_sock(net, sk, - uh->dest, daddr, - uh->source, saddr, - dif, hnum)) { - if (unlikely(count == ARRAY_SIZE(stack))) { - flush_stack(stack, count, skb, ~0); - inner_flushed = true; - count = 0; - } - stack[count++] = sk; - sock_hold(sk); + sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { + if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, + uh->source, saddr, dif, hnum)) + continue; + + if (!first) { + first = sk; + continue; } - } + nskb = skb_clone(skb, GFP_ATOMIC); - spin_unlock(&hslot->lock); + if (unlikely(!nskb)) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + continue; + } + if (udp_queue_rcv_skb(sk, nskb) > 0) + consume_skb(nskb); + } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { @@ -1796,16 +1731,13 @@ start_lookup: goto start_lookup; } - /* - * do the slow work with no lock held - */ - if (count) { - flush_stack(stack, count, skb, count - 1); + if (first) { + if (udp_queue_rcv_skb(first, skb) > 0) + consume_skb(skb); } else { - if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); - consume_skb(skb); + kfree_skb(skb); + UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -1902,7 +1834,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, inet_compute_pseudo); ret = udp_queue_rcv_skb(sk, skb); - sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 @@ -1963,49 +1894,24 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, int dif) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(loc_port); - unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); + unsigned int slot = udp_hashfn(net, hnum, udp_table.mask); struct udp_hslot *hslot = &udp_table.hash[slot]; /* Do not bother scanning a too big list */ if (hslot->count > 10) return NULL; - rcu_read_lock(); -begin: - count = 0; result = NULL; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { - if (__udp_is_mcast_sock(net, sk, - loc_port, loc_addr, - rmt_port, rmt_addr, - dif, hnum)) { + sk_for_each_rcu(sk, &hslot->head) { + if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, + rmt_port, rmt_addr, dif, hnum)) { + if (result) + return NULL; result = sk; - ++count; - } - } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { - if (count != 1 || - unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(!__udp_is_mcast_sock(net, result, - loc_port, loc_addr, - rmt_port, rmt_addr, - dif, hnum))) { - sock_put(result); - result = NULL; } } - rcu_read_unlock(); + return result; } @@ -2018,37 +1924,22 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, __be16 rmt_port, __be32 rmt_addr, int dif) { - struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(loc_port); unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + struct sock *sk; - rcu_read_lock(); - result = NULL; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { - if (INET_MATCH(sk, net, acookie, - rmt_addr, loc_addr, ports, dif)) - result = sk; + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { + if (INET_MATCH(sk, net, acookie, rmt_addr, + loc_addr, ports, dif)) + return sk; /* Only check first socket in chain */ break; } - - if (result) { - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(!INET_MATCH(sk, net, acookie, - rmt_addr, loc_addr, - ports, dif))) { - sock_put(result); - result = NULL; - } - } - rcu_read_unlock(); - return result; + return NULL; } void udp_v4_early_demux(struct sk_buff *skb) @@ -2056,7 +1947,7 @@ void udp_v4_early_demux(struct sk_buff *skb) struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct udphdr *uh; - struct sock *sk; + struct sock *sk = NULL; struct dst_entry *dst; int dif = skb->dev->ifindex; int ours; @@ -2088,11 +1979,9 @@ void udp_v4_early_demux(struct sk_buff *skb) } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - } else { - return; } - if (!sk) + if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) return; skb->sk = sk; @@ -2392,14 +2281,13 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) for (state->bucket = start; state->bucket <= state->udp_table->mask; ++state->bucket) { - struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; - if (hlist_nulls_empty(&hslot->head)) + if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); - sk_nulls_for_each(sk, node, &hslot->head) { + sk_for_each(sk, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) @@ -2418,7 +2306,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { @@ -2627,12 +2515,12 @@ void __init udp_table_init(struct udp_table *table, const char *name) table->hash2 = table->hash + (table->mask + 1); for (i = 0; i <= table->mask; i++) { - INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); + INIT_HLIST_HEAD(&table->hash[i].head); table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } for (i = 0; i <= table->mask; i++) { - INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); + INIT_HLIST_HEAD(&table->hash2[i].head); table->hash2[i].count = 0; spin_lock_init(&table->hash2[i].lock); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index df1966f3b6ec..3d5ccf4b1412 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -36,10 +36,11 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { int err = -EINVAL; - struct sock *sk; + struct sock *sk = NULL; struct sk_buff *rep; struct net *net = sock_net(in_skb->sk); + rcu_read_lock(); if (req->sdiag_family == AF_INET) sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, @@ -54,9 +55,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_dport, req->id.idiag_if, tbl, NULL); #endif - else - goto out_nosk; - + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + rcu_read_unlock(); err = -ENOENT; if (!sk) goto out_nosk; @@ -96,24 +97,23 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { - int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; s_slot = cb->args[0]; num = s_num = cb->args[1]; for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { - struct sock *sk; - struct hlist_nulls_node *node; struct udp_hslot *hslot = &table->hash[slot]; + struct sock *sk; num = 0; - if (hlist_nulls_empty(&hslot->head)) + if (hlist_empty(&hslot->head)) continue; spin_lock_bh(&hslot->lock); - sk_nulls_for_each(sk, node, &hslot->head) { + sk_for_each(sk, &hslot->head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0ed2dafb7cc4..097060def7f0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -14,18 +14,6 @@ #include <net/udp.h> #include <net/protocol.h> -static DEFINE_SPINLOCK(udp_offload_lock); -static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; - -#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) - -struct udp_offload_priv { - struct udp_offload *offload; - possible_net_t net; - struct rcu_head rcu; - struct udp_offload_priv __rcu *next; -}; - static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, @@ -51,8 +39,11 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * 16 bit length field due to the header being added outside of an * IP or IPv6 frame that was already limited to 64K - 1. */ - partial = csum_sub(csum_unfold(uh->check), - (__force __wsum)htonl(skb->len)); + if (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) + partial = (__force __wsum)uh->len; + else + partial = (__force __wsum)htonl(skb->len); + partial = csum_sub(csum_unfold(uh->check), partial); /* setup inner skb. */ skb->encapsulation = 0; @@ -101,7 +92,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, udp_offset = outer_hlen - tnl_hlen; skb = segs; do { - __be16 len; + unsigned int len; if (remcsum) skb->ip_summed = CHECKSUM_NONE; @@ -119,14 +110,26 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); - len = htons(skb->len - udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = len; + + /* If we are only performing partial GSO the inner header + * will be using a length value equal to only one MSS sized + * segment instead of the entire frame. + */ + if (skb_is_gso(skb)) { + uh->len = htons(skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)uh); + } else { + uh->len = htons(len); + } if (!need_csum) continue; - uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len)); + uh->check = ~csum_fold(csum_add(partial, + (__force __wsum)htonl(len))); if (skb->encapsulation || !offload_csum) { uh->check = gso_make_checksum(skb, ~uh->check); @@ -179,6 +182,7 @@ out_unlock: return segs; } +EXPORT_SYMBOL(skb_udp_tunnel_segment); static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) @@ -253,64 +257,14 @@ out: return segs; } -int udp_add_offload(struct net *net, struct udp_offload *uo) -{ - struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); - - if (!new_offload) - return -ENOMEM; - - write_pnet(&new_offload->net, net); - new_offload->offload = uo; - - spin_lock(&udp_offload_lock); - new_offload->next = udp_offload_base; - rcu_assign_pointer(udp_offload_base, new_offload); - spin_unlock(&udp_offload_lock); - - return 0; -} -EXPORT_SYMBOL(udp_add_offload); - -static void udp_offload_free_routine(struct rcu_head *head) -{ - struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); - kfree(ou_priv); -} - -void udp_del_offload(struct udp_offload *uo) -{ - struct udp_offload_priv __rcu **head = &udp_offload_base; - struct udp_offload_priv *uo_priv; - - spin_lock(&udp_offload_lock); - - uo_priv = udp_deref_protected(*head); - for (; uo_priv != NULL; - uo_priv = udp_deref_protected(*head)) { - if (uo_priv->offload == uo) { - rcu_assign_pointer(*head, - udp_deref_protected(uo_priv->next)); - goto unlock; - } - head = &uo_priv->next; - } - pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); -unlock: - spin_unlock(&udp_offload_lock); - if (uo_priv) - call_rcu(&uo_priv->rcu, udp_offload_free_routine); -} -EXPORT_SYMBOL(udp_del_offload); - struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, - struct udphdr *uh) + struct udphdr *uh, udp_lookup_t lookup) { - struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; struct udphdr *uh2; unsigned int off = skb_gro_offset(skb); int flush = 1; + struct sock *sk; if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && @@ -322,13 +276,10 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); - uo_priv = rcu_dereference(udp_offload_base); - for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && - uo_priv->offload->port == uh->dest && - uo_priv->offload->callbacks.gro_receive) - goto unflush; - } + sk = (*lookup)(skb, uh->source, uh->dest); + + if (sk && udp_sk(sk)->gro_receive) + goto unflush; goto out_unlock; unflush: @@ -352,9 +303,7 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb, - uo_priv->offload); + pp = udp_sk(sk)->gro_receive(sk, head, skb); out_unlock: rcu_read_unlock(); @@ -362,6 +311,7 @@ out: NAPI_GRO_CB(skb)->flush |= flush; return pp; } +EXPORT_SYMBOL(udp_gro_receive); static struct sk_buff **udp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) @@ -383,39 +333,28 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head, inet_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; - return udp_gro_receive(head, skb, uh); + return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb); flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; } -int udp_gro_complete(struct sk_buff *skb, int nhoff) +int udp_gro_complete(struct sk_buff *skb, int nhoff, + udp_lookup_t lookup) { - struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); int err = -ENOSYS; + struct sock *sk; uh->len = newlen; rcu_read_lock(); - - uo_priv = rcu_dereference(udp_offload_base); - for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && - uo_priv->offload->port == uh->dest && - uo_priv->offload->callbacks.gro_complete) - break; - } - - if (uo_priv) { - NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - err = uo_priv->offload->callbacks.gro_complete(skb, - nhoff + sizeof(struct udphdr), - uo_priv->offload); - } - + sk = (*lookup)(skb, uh->source, uh->dest); + if (sk && udp_sk(sk)->gro_complete) + err = udp_sk(sk)->gro_complete(sk, skb, + nhoff + sizeof(struct udphdr)); rcu_read_unlock(); if (skb->remcsum_offload) @@ -426,6 +365,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) return err; } +EXPORT_SYMBOL(udp_gro_complete); static int udp4_gro_complete(struct sk_buff *skb, int nhoff) { @@ -440,7 +380,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } - return udp_gro_complete(skb, nhoff); + return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } static const struct net_offload udpv4_offload = { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 96599d1a1318..47f12c73d959 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -69,6 +69,8 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->encap_type = cfg->encap_type; udp_sk(sk)->encap_rcv = cfg->encap_rcv; udp_sk(sk)->encap_destroy = cfg->encap_destroy; + udp_sk(sk)->gro_receive = cfg->gro_receive; + udp_sk(sk)->gro_complete = cfg->gro_complete; udp_tunnel_encap_enable(sock); } diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2fbd90bf8d33..5e9d6bf4aaca 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,9 +8,10 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ - exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o + exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ + udp_offload.o -ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o +ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 23cec53b568a..f5a77a9dd34e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -359,7 +359,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; ndev->cnf.mtu6 = dev->mtu; - ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (!ndev->nd_parms) { kfree(ndev); @@ -5013,15 +5012,13 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) { struct inet6_ifaddr *ifp; struct net_device *dev = idev->dev; - bool update_rs = false; + bool clear_token, update_rs = false; struct in6_addr ll_addr; ASSERT_RTNL(); if (!token) return -EINVAL; - if (ipv6_addr_any(token)) - return -EINVAL; if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) return -EINVAL; if (!ipv6_accept_ra(idev)) @@ -5036,10 +5033,13 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) write_unlock_bh(&idev->lock); + clear_token = ipv6_addr_any(token); + if (clear_token) + goto update_lft; + if (!idev->dead && (idev->if_flags & IF_READY) && !ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)) { - /* If we're not ready, then normal ifup will take care * of this. Otherwise, we need to request our rs here. */ @@ -5047,6 +5047,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) update_rs = true; } +update_lft: write_lock_bh(&idev->lock); if (update_rs) { @@ -5636,376 +5637,366 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } -static struct addrconf_sysctl_table -{ - struct ctl_table_header *sysctl_header; - struct ctl_table addrconf_vars[DEVCONF_MAX+1]; -} addrconf_sysctl __read_mostly = { - .sysctl_header = NULL, - .addrconf_vars = { - { - .procname = "forwarding", - .data = &ipv6_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_forward, - }, - { - .procname = "hop_limit", - .data = &ipv6_devconf.hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_hop_limit, - }, - { - .procname = "mtu", - .data = &ipv6_devconf.mtu6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_mtu, - }, - { - .procname = "accept_ra", - .data = &ipv6_devconf.accept_ra, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_redirects", - .data = &ipv6_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "autoconf", - .data = &ipv6_devconf.autoconf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "dad_transmits", - .data = &ipv6_devconf.dad_transmits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_solicitations", - .data = &ipv6_devconf.rtr_solicits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_solicitation_interval", - .data = &ipv6_devconf.rtr_solicit_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "router_solicitation_delay", - .data = &ipv6_devconf.rtr_solicit_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "force_mld_version", - .data = &ipv6_devconf.force_mld_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "mldv1_unsolicited_report_interval", - .data = - &ipv6_devconf.mldv1_unsolicited_report_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { - .procname = "mldv2_unsolicited_report_interval", - .data = - &ipv6_devconf.mldv2_unsolicited_report_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { - .procname = "use_tempaddr", - .data = &ipv6_devconf.use_tempaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "temp_valid_lft", - .data = &ipv6_devconf.temp_valid_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "temp_prefered_lft", - .data = &ipv6_devconf.temp_prefered_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "regen_max_retry", - .data = &ipv6_devconf.regen_max_retry, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "max_desync_factor", - .data = &ipv6_devconf.max_desync_factor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "max_addresses", - .data = &ipv6_devconf.max_addresses, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_defrtr", - .data = &ipv6_devconf.accept_ra_defrtr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_min_hop_limit", - .data = &ipv6_devconf.accept_ra_min_hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_pinfo", - .data = &ipv6_devconf.accept_ra_pinfo, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, +static const struct ctl_table addrconf_sysctl[] = { + { + .procname = "forwarding", + .data = &ipv6_devconf.forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_forward, + }, + { + .procname = "hop_limit", + .data = &ipv6_devconf.hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_hop_limit, + }, + { + .procname = "mtu", + .data = &ipv6_devconf.mtu6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_mtu, + }, + { + .procname = "accept_ra", + .data = &ipv6_devconf.accept_ra, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_redirects", + .data = &ipv6_devconf.accept_redirects, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "autoconf", + .data = &ipv6_devconf.autoconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "dad_transmits", + .data = &ipv6_devconf.dad_transmits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_solicitations", + .data = &ipv6_devconf.rtr_solicits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_solicitation_interval", + .data = &ipv6_devconf.rtr_solicit_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "router_solicitation_delay", + .data = &ipv6_devconf.rtr_solicit_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "force_mld_version", + .data = &ipv6_devconf.force_mld_version, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "mldv1_unsolicited_report_interval", + .data = + &ipv6_devconf.mldv1_unsolicited_report_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "mldv2_unsolicited_report_interval", + .data = + &ipv6_devconf.mldv2_unsolicited_report_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "use_tempaddr", + .data = &ipv6_devconf.use_tempaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "temp_valid_lft", + .data = &ipv6_devconf.temp_valid_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "temp_prefered_lft", + .data = &ipv6_devconf.temp_prefered_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "regen_max_retry", + .data = &ipv6_devconf.regen_max_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "max_desync_factor", + .data = &ipv6_devconf.max_desync_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "max_addresses", + .data = &ipv6_devconf.max_addresses, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_defrtr", + .data = &ipv6_devconf.accept_ra_defrtr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_min_hop_limit", + .data = &ipv6_devconf.accept_ra_min_hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_pinfo", + .data = &ipv6_devconf.accept_ra_pinfo, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IPV6_ROUTER_PREF - { - .procname = "accept_ra_rtr_pref", - .data = &ipv6_devconf.accept_ra_rtr_pref, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "router_probe_interval", - .data = &ipv6_devconf.rtr_probe_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, + { + .procname = "accept_ra_rtr_pref", + .data = &ipv6_devconf.accept_ra_rtr_pref, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "router_probe_interval", + .data = &ipv6_devconf.rtr_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, #ifdef CONFIG_IPV6_ROUTE_INFO - { - .procname = "accept_ra_rt_info_max_plen", - .data = &ipv6_devconf.accept_ra_rt_info_max_plen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, + { + .procname = "accept_ra_rt_info_max_plen", + .data = &ipv6_devconf.accept_ra_rt_info_max_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #endif - { - .procname = "proxy_ndp", - .data = &ipv6_devconf.proxy_ndp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_proxy_ndp, - }, - { - .procname = "accept_source_route", - .data = &ipv6_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, + { + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_proxy_ndp, + }, + { + .procname = "accept_source_route", + .data = &ipv6_devconf.accept_source_route, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - { - .procname = "optimistic_dad", - .data = &ipv6_devconf.optimistic_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, - { - .procname = "use_optimistic", - .data = &ipv6_devconf.use_optimistic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, + { + .procname = "optimistic_dad", + .data = &ipv6_devconf.optimistic_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_IPV6_MROUTE - { - .procname = "mc_forwarding", - .data = &ipv6_devconf.mc_forwarding, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, + { + .procname = "mc_forwarding", + .data = &ipv6_devconf.mc_forwarding, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, #endif - { - .procname = "disable_ipv6", - .data = &ipv6_devconf.disable_ipv6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_disable, - }, - { - .procname = "accept_dad", - .data = &ipv6_devconf.accept_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "force_tllao", - .data = &ipv6_devconf.force_tllao, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "ndisc_notify", - .data = &ipv6_devconf.ndisc_notify, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "suppress_frag_ndisc", - .data = &ipv6_devconf.suppress_frag_ndisc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "accept_ra_from_local", - .data = &ipv6_devconf.accept_ra_from_local, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "accept_ra_mtu", - .data = &ipv6_devconf.accept_ra_mtu, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "stable_secret", - .data = &ipv6_devconf.stable_secret, - .maxlen = IPV6_MAX_STRLEN, - .mode = 0600, - .proc_handler = addrconf_sysctl_stable_secret, - }, - { - .procname = "use_oif_addrs_only", - .data = &ipv6_devconf.use_oif_addrs_only, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ignore_routes_with_linkdown", - .data = &ipv6_devconf.ignore_routes_with_linkdown, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, - }, - { - .procname = "drop_unicast_in_l2_multicast", - .data = &ipv6_devconf.drop_unicast_in_l2_multicast, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "drop_unsolicited_na", - .data = &ipv6_devconf.drop_unsolicited_na, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "keep_addr_on_down", - .data = &ipv6_devconf.keep_addr_on_down, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, - { - /* sentinel */ - } + { + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable, + }, + { + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "force_tllao", + .data = &ipv6_devconf.force_tllao, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ndisc_notify", + .data = &ipv6_devconf.ndisc_notify, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec }, + { + .procname = "suppress_frag_ndisc", + .data = &ipv6_devconf.suppress_frag_ndisc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "accept_ra_from_local", + .data = &ipv6_devconf.accept_ra_from_local, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "accept_ra_mtu", + .data = &ipv6_devconf.accept_ra_mtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, + { + .procname = "use_oif_addrs_only", + .data = &ipv6_devconf.use_oif_addrs_only, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ignore_routes_with_linkdown", + .data = &ipv6_devconf.ignore_routes_with_linkdown, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, + }, + { + .procname = "drop_unicast_in_l2_multicast", + .data = &ipv6_devconf.drop_unicast_in_l2_multicast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "drop_unsolicited_na", + .data = &ipv6_devconf.drop_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "keep_addr_on_down", + .data = &ipv6_devconf.keep_addr_on_down, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, + { + /* sentinel */ + } }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct addrconf_sysctl_table *t; + struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; - t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (!t) + table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL); + if (!table) goto out; - for (i = 0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; - t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ - t->addrconf_vars[i].extra2 = net; + for (i = 0; table[i].data; i++) { + table[i].data += (char *)p - (char *)&ipv6_devconf; + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); - t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); - if (!t->sysctl_header) + p->sysctl_header = register_net_sysctl(net, path, table); + if (!p->sysctl_header) goto free; - p->sysctl = t; return 0; free: - kfree(t); + kfree(table); out: return -ENOBUFS; } static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) { - struct addrconf_sysctl_table *t; + struct ctl_table *table; - if (!p->sysctl) + if (!p->sysctl_header) return; - t = p->sysctl; - p->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); + table = p->sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(p->sysctl_header); + p->sysctl_header = NULL; + kfree(table); } static int addrconf_sysctl_register(struct inet6_dev *idev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b11c37cfd67c..bfa86f040c16 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -64,6 +64,8 @@ #include <asm/uaccess.h> #include <linux/mroute6.h> +#include "ip6_offload.h" + MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); @@ -561,6 +563,7 @@ const struct proto_ops inet6_dgram_ops = { .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, @@ -958,6 +961,10 @@ static int __init inet6_init(void) if (err) goto udplitev6_fail; + err = udpv6_offload_init(); + if (err) + goto udpv6_offload_fail; + err = tcpv6_init(); if (err) goto tcpv6_fail; @@ -987,6 +994,8 @@ pingv6_fail: ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: + udpv6_offload_exit(); +udpv6_offload_fail: udplitev6_exit(); udplitev6_fail: udpv6_exit(); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9dd3882fe6bf..3962b6c810fc 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -728,7 +728,8 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag) + int *hlimit, int *tclass, int *dontfrag, + struct sockcm_cookie *sockc) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -745,6 +746,12 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + if (cmsg->cmsg_level == SOL_SOCKET) { + if (__sock_cmsg_send(sk, msg, cmsg, sockc)) + return -EINVAL; + continue; + } + if (cmsg->cmsg_level != SOL_IPV6) continue; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0a37ddc7af51..6b573ebe49de 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; + struct sockcm_cookie sockc_unused = {0}; int iif = 0; int addr_type = 0; int len; @@ -527,7 +528,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, (struct rt6_info *)dst, - MSG_DONTWAIT, np->dontfrag); + MSG_DONTWAIT, np->dontfrag, &sockc_unused); if (err) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); @@ -566,6 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) int hlimit; u8 tclass; u32 mark = IP6_REPLY_MARK(net, skb->mark); + struct sockcm_cookie sockc_unused = {0}; saddr = &ipv6_hdr(skb)->daddr; @@ -617,7 +619,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, - np->dontfrag); + np->dontfrag, &sockc_unused); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 295ca29a23c3..0b03533453e4 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -501,7 +501,8 @@ static int ila_nl_dump_start(struct netlink_callback *cb) struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; - return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter); + return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, + GFP_KERNEL); } static int ila_nl_dump_done(struct netlink_callback *cb) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 70f2628be6fa..f1678388fb0d 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -69,7 +69,6 @@ struct sock *__inet6_lookup_established(struct net *net, struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - rcu_read_lock(); begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) @@ -90,7 +89,6 @@ begin: out: sk = NULL; found: - rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -122,6 +120,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +/* called with rcu_read_lock() */ struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -129,39 +128,27 @@ struct sock *inet6_lookup_listener(struct net *net, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { - struct sock *sk; - const struct hlist_nulls_node *node; - struct sock *result; - int score, hiscore, matches = 0, reuseport = 0; - bool select_ok = true; - u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + int score, hiscore = 0, matches = 0, reuseport = 0; + struct sock *sk, *result = NULL; + u32 phash = 0; - rcu_read_lock(); -begin: - result = NULL; - hiscore = 0; - sk_nulls_for_each(sk, node, &ilb->head) { + sk_for_each(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { - hiscore = score; - result = sk; reuseport = sk->sk_reuseport; if (reuseport) { phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, phash, - skb, doff); - if (sk2) { - result = sk2; - goto found; - } - } + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; matches = 1; } + result = sk; + hiscore = score; } else if (score == hiscore && reuseport) { matches++; if (reciprocal_scale(phash, matches) == 0) @@ -169,25 +156,6 @@ begin: phash = next_pseudo_random32(phash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, daddr, - dif) < hiscore)) { - sock_put(result); - select_ok = false; - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(inet6_lookup_listener); @@ -199,12 +167,12 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const int dif) { struct sock *sk; + bool refcounted; - local_bh_disable(); sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - ntohs(dport), dif); - local_bh_enable(); - + ntohs(dport), dif, &refcounted); + if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index dc2db4f7b182..35d3ddc328f8 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -372,6 +372,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, if (olen > 0) { struct msghdr msg; struct flowi6 flowi6; + struct sockcm_cookie sockc_junk; int junk; err = -ENOMEM; @@ -390,7 +391,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, memset(&flowi6, 0, sizeof(flowi6)); err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, - &junk, &junk, &junk); + &junk, &junk, &junk, &sockc_junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4e636e60a360..ca5a2c5675c5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -598,6 +598,18 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) opt->ops.opt_nflen = 8; } +static __sum16 gre6_checksum(struct sk_buff *skb) +{ + __wsum csum; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + csum = lco_csum(skb); + else + csum = skb_checksum(skb, sizeof(struct ipv6hdr), + skb->len - sizeof(struct ipv6hdr), 0); + return csum_fold(csum); +} + static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, @@ -609,7 +621,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct net *net = tunnel->net; struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom = 0; /* The extra header space needed */ + unsigned int min_headroom = 0; /* The extra header space needed */ int gre_hlen; struct ipv6_tel_txoption opt; int mtu; @@ -617,7 +629,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &tunnel->dev->stats; int err = -1; u8 proto; - struct sk_buff *new_skb; __be16 protocol; if (dev->type == ARPHRD_ETHER) @@ -660,14 +671,14 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, mtu = dst_mtu(dst) - sizeof(*ipv6h); if (encap_limit >= 0) { - max_headroom += 8; + min_headroom += 8; mtu -= 8; } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; @@ -685,20 +696,19 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); - max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + min_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; - if (skb_headroom(skb) < max_headroom || skb_shared(skb) || - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; - if (!new_skb) - goto tx_err_dst_release; + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - consume_skb(skb); - skb = new_skb; + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (min_headroom > dev->needed_headroom) + dev->needed_headroom = min_headroom; + if (unlikely(err)) + goto tx_err_dst_release; } if (!fl6->flowi6_mark && ndst) @@ -711,10 +721,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + err = iptunnel_handle_offloads(skb, + (tunnel->parms.o_flags & GRE_CSUM) ? + SKB_GSO_GRE_CSUM : SKB_GSO_GRE); + if (err) + goto tx_err_dst_release; skb_push(skb, gre_hlen); skb_reset_network_header(skb); @@ -748,10 +759,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, *ptr = tunnel->parms.o_key; ptr--; } - if (tunnel->parms.o_flags&GRE_CSUM) { + if ((tunnel->parms.o_flags & GRE_CSUM) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { *ptr = 0; - *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1), - skb->len - sizeof(struct ipv6hdr)); + *(__sum16 *)ptr = gre6_checksum(skb); } } @@ -987,6 +999,8 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) dev->mtu = rt->dst.dev->mtu - addend; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1505,6 +1519,11 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; +#define GRE6_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + static void ip6gre_tap_setup(struct net_device *dev) { @@ -1538,9 +1557,21 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; + + if (!(nt->parms.o_flags & GRE_SEQ)) { + /* TCP segmentation offload is not supported when we + * generate output sequences. + */ + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + + /* Can use a lockless transmit, unless we generate + * output sequences + */ dev->features |= NETIF_F_LLTX; + } err = register_netdevice(dev); if (err) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 82e9f3076028..f5eb184e1093 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,6 +63,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int proto; struct frag_hdr *fptr; unsigned int unfrag_ip6hlen; + unsigned int payload_len; u8 *prevhdr; int offset = 0; bool encap, udpfrag; @@ -73,6 +74,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_TCP_FIXEDID | + SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | @@ -80,7 +83,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_TCPV6 | + SKB_GSO_PARTIAL | 0))) goto out; @@ -117,7 +120,13 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); + if (skb_is_gso(skb)) + payload_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)(ipv6h + 1); + else + payload_len = skb->len - nhoff - sizeof(*ipv6h); + ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { @@ -239,10 +248,14 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; - /* Clear flush_id, there's really no concept of ID in IPv6. */ - NAPI_GRO_CB(p)->flush_id = 0; + /* If the previous IP ID value was based on an atomic + * datagram we can overwrite the value and ignore it. + */ + if (NAPI_GRO_CB(skb)->is_atomic) + NAPI_GRO_CB(p)->flush_id = 0; } + NAPI_GRO_CB(skb)->is_atomic = true; NAPI_GRO_CB(skb)->flush |= flush; skb_gro_postpull_rcsum(skb, iph, nlen); @@ -325,8 +338,6 @@ static int __init ipv6_offload_init(void) if (tcpv6_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); - if (udp_offload_init() < 0) - pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (ipv6_exthdrs_offload_init() < 0) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h index 2e155c651b35..96b40e41ac53 100644 --- a/net/ipv6/ip6_offload.h +++ b/net/ipv6/ip6_offload.h @@ -12,7 +12,8 @@ #define __ip6_offload_h int ipv6_exthdrs_offload_init(void); -int udp_offload_init(void); +int udpv6_offload_init(void); +int udpv6_offload_exit(void); int tcpv6_offload_init(void); #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bc972e7152c7..171518e3ca21 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1258,7 +1258,8 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - unsigned int flags, int dontfrag) + unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc) { struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; @@ -1329,7 +1330,7 @@ emsgsize: csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { - sock_tx_timestamp(sk, &tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &tx_flags); if (tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -1565,7 +1566,8 @@ int ip6_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) + struct rt6_info *rt, unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -1593,7 +1595,8 @@ int ip6_append_data(struct sock *sk, return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, dontfrag); + from, length, transhdrlen, flags, dontfrag, + sockc); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -1752,7 +1755,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, - int dontfrag) + int dontfrag, const struct sockcm_cookie *sockc) { struct inet_cork_full cork; struct inet6_cork v6_cork; @@ -1779,7 +1782,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, dontfrag); + flags, dontfrag, sockc); if (err) { __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); return ERR_PTR(err); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a10e77103c88..bf678324fd52 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2268,7 +2268,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) return -EMSGSIZE; rtm->rtm_type = RTN_MULTICAST; @@ -2411,7 +2411,7 @@ static int mr6_msgsize(bool unresolved, int maxvif) + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ - + nla_total_size(sizeof(struct rta_mfc_stats)) + + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4449ad1f8114..4ff4b29894eb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -407,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, + lockdep_sock_is_held(sk)); opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); @@ -471,6 +472,7 @@ sticky_done: struct ipv6_txoptions *opt = NULL; struct msghdr msg; struct flowi6 fl6; + struct sockcm_cookie sockc_junk; int junk; memset(&fl6, 0, sizeof(fl6)); @@ -503,7 +505,7 @@ sticky_done: msg.msg_control = (void *)(opt+1); retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, - &junk, &junk); + &junk, &junk, &sockc_junk); if (retv) goto done; update: @@ -1123,7 +1125,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; lock_sock(sk); - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, + lockdep_sock_is_held(sk)); len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 3deed5860a42..5d778dd11f66 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -20,15 +20,16 @@ #include <net/netfilter/nf_conntrack_synproxy.h> static struct ipv6hdr * -synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr, - const struct in6_addr *daddr) +synproxy_build_ip(struct net *net, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr) { struct ipv6hdr *iph; skb_reset_network_header(skb); iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); - iph->hop_limit = 64; //XXX + iph->hop_limit = net->ipv6.devconf_all->hop_limit; iph->nexthdr = IPPROTO_TCP; iph->saddr = *saddr; iph->daddr = *daddr; @@ -37,13 +38,12 @@ synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr, } static void -synproxy_send_tcp(const struct synproxy_net *snet, +synproxy_send_tcp(struct net *net, const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct ipv6hdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { - struct net *net = nf_ct_net(snet->tmpl); struct dst_entry *dst; struct flowi6 fl6; @@ -84,7 +84,7 @@ free_nskb: } static void -synproxy_send_client_synack(const struct synproxy_net *snet, +synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -103,7 +103,7 @@ synproxy_send_client_synack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr); + niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -121,15 +121,16 @@ synproxy_send_client_synack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_syn(const struct synproxy_net *snet, +synproxy_send_server_syn(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *nskb; struct ipv6hdr *iph, *niph; struct tcphdr *nth; @@ -144,7 +145,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr); + niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -165,12 +166,12 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_ack(const struct synproxy_net *snet, +synproxy_send_server_ack(struct net *net, const struct ip_ct_tcp *state, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) @@ -189,7 +190,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr); + niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -205,11 +206,11 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void -synproxy_send_client_ack(const struct synproxy_net *snet, +synproxy_send_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -227,7 +228,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr); + niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -243,15 +244,16 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool -synproxy_recv_client_ack(const struct synproxy_net *snet, +synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); int mss; mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1); @@ -267,7 +269,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); - synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + synproxy_send_server_syn(net, skb, th, opts, recv_seq); return true; } @@ -275,7 +277,8 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(par->net); + struct net *net = par->net; + struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; @@ -304,12 +307,12 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(snet, skb, th, &opts); + synproxy_send_client_synack(net, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); return NF_DROP; } @@ -320,7 +323,8 @@ static unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(nhs->net); + struct net *net = nhs->net; + struct synproxy_net *snet = synproxy_pernet(net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; @@ -384,7 +388,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, * therefore we need to add 1 to make the SYN sequence * number match the one of first SYN. */ - if (synproxy_recv_client_ack(snet, skb, th, &opts, + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq) + 1)) this_cpu_inc(snet->stats->cookie_retrans); @@ -410,12 +414,12 @@ static unsigned int ipv6_synproxy_hook(void *priv, XT_SYNPROXY_OPT_SACK_PERM); swap(opts.tsval, opts.tsecr); - synproxy_send_server_ack(snet, state, skb, th, &opts); + synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); swap(opts.tsval, opts.tsecr); - synproxy_send_client_ack(snet, skb, th, &opts); + synproxy_send_client_ack(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 4709f657b7b6..a5400223fd74 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -158,7 +158,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_dport = otcph->source; security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { + if (dst->error) { dst_release(dst); return; } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c382db7a2e73..da1cff79e447 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -62,6 +62,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct dst_entry *dst; struct rt6_info *rt; struct pingfakehdr pfh; + struct sockcm_cookie junk = {0}; pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); @@ -144,7 +145,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = ip6_append_data(sk, ping_getfrag, &pfh, len, 0, hlimit, np->tclass, NULL, &fl6, rt, - MSG_DONTWAIT, np->dontfrag); + MSG_DONTWAIT, np->dontfrag, &junk); if (err) { ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fa59dd7a427e..b07ce21983aa 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -745,6 +745,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct dst_entry *dst = NULL; struct raw6_frag_vec rfv; struct flowi6 fl6; + struct sockcm_cookie sockc; int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; @@ -821,13 +822,15 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (fl6.flowi6_oif == 0) fl6.flowi6_oif = sk->sk_bound_dev_if; + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + &hlimit, &tclass, &dontfrag, + &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -897,7 +900,7 @@ back_from_confirm: lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 83384308d032..a13d8c114ccb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -913,10 +913,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT); - if (IS_ERR(skb)) { + if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) { ip_rt_put(rt); - goto out; + goto tx_error; } if (df) { @@ -992,7 +991,6 @@ tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); -out: dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -1002,15 +1000,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP); - if (IS_ERR(skb)) - goto out; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; -out: +tx_error: + kfree_skb(skb); dev->stats.tx_errors++; return NETDEV_TX_OK; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 711d209f9124..800265c7fd3f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -234,7 +234,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -439,7 +439,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req) + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -452,7 +452,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, IPPROTO_TCP)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, foc, attach_req); + skb = tcp_make_synack(sk, dst, req, foc, synack_type); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -858,6 +858,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) return; #ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); @@ -875,16 +876,15 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) th->source, &ipv6h->daddr, ntohs(th->source), tcp_v6_iif(skb)); if (!sk1) - return; + goto out; - rcu_read_lock(); key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr); if (!key) - goto release_sk1; + goto out; genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) - goto release_sk1; + goto out; } #endif @@ -898,11 +898,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG -release_sk1: - if (sk1) { - rcu_read_unlock(); - sock_put(sk1); - } +out: + rcu_read_unlock(); #endif } @@ -967,7 +964,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) &tcp_request_sock_ipv6_ops, sk, skb); drop: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return 0; /* don't send reset */ } @@ -1172,7 +1169,7 @@ out_overflow: out_nonewsk: dst_release(dst); out: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + tcp_listendrop(sk); return NULL; } @@ -1351,6 +1348,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; const struct ipv6hdr *hdr; + bool refcounted; struct sock *sk; int ret; struct net *net = dev_net(skb->dev); @@ -1381,7 +1379,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) lookup: sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), - th->source, th->dest, inet6_iif(skb)); + th->source, th->dest, inet6_iif(skb), + &refcounted); if (!sk) goto no_tcp_socket; @@ -1404,6 +1403,7 @@ process: goto lookup; } sock_hold(sk); + refcounted = true; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1460,7 +1460,8 @@ process: bh_unlock_sock(sk); put_and_return: - sock_put(sk); + if (refcounted) + sock_put(sk); return ret ? -1 : 0; no_tcp_socket: @@ -1483,7 +1484,9 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + sk_drops_add(sk, skb); + if (refcounted) + sock_put(sk); goto discard_it; do_time_wait: @@ -1514,6 +1517,7 @@ do_time_wait: inet_twsk_deschedule_put(tw); sk = sk2; tcp_v6_restore_cb(skb); + refcounted = false; goto process; } /* Fall through to ACK */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6bc5c664fa46..8d8b2cd8ec5b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -213,37 +213,28 @@ static struct sock *udp6_lib_lookup2(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; -begin: result = NULL; badness = -1; - udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -251,27 +242,10 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot2) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score2(result, net, saddr, sport, - daddr, hnum, dif) < badness)) { - sock_put(result); - goto begin; - } - } return result; } +/* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -279,15 +253,12 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; - bool select_ok = true; u32 hash = 0; - rcu_read_lock(); if (hslot->count > 10) { hash2 = udp6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; @@ -309,34 +280,26 @@ struct sock *__udp6_lib_lookup(struct net *net, &in6addr_any, hnum, dif, hslot2, slot2, skb); } - rcu_read_unlock(); return result; } begin: result = NULL; badness = -1; - sk_nulls_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { - result = sk; - badness = score; reuseport = sk->sk_reuseport; if (reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - - sk2 = reuseport_select_sock(sk, hash, skb, + result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - select_ok = false; - goto found; - } - } + if (result) + return result; matches = 1; } + result = sk; + badness = score; } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -344,25 +307,6 @@ begin: hash = next_pseudo_random32(hash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begin; - - if (result) { -found: - if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, saddr, sport, - daddr, dport, dif) < badness)) { - sock_put(result); - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__udp6_lib_lookup); @@ -382,12 +326,37 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, udptable, skb); } +struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct net_device *dev = + skb_dst(skb) ? skb_dst(skb)->dev : skb->dev; + + return __udp6_lib_lookup(dev_net(dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + &udp_table, skb); +} +EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); + +/* Must be called under rcu_read_lock(). + * Does increment socket refcount. + */ +#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ + IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { - return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); + struct sock *sk; + + sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, + dif, &udp_table, NULL); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + return sk; } EXPORT_SYMBOL_GPL(udp6_lib_lookup); +#endif /* * This should be easy, if there is something there we @@ -401,7 +370,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, off = 0; + int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; @@ -415,15 +384,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: + peeking = off = sk_peek_offset(sk, flags); skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &off, &err); if (!skb) - goto out; + return err; - ulen = skb->len - sizeof(struct udphdr); + ulen = skb->len; copied = len; - if (copied > ulen) - copied = ulen; + if (copied > ulen - off) + copied = ulen - off; else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; @@ -435,17 +405,16 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { checksum_valid = !udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), - msg, copied); + err = skb_copy_datagram_msg(skb, off, msg, copied); else { - err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); + err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; } @@ -462,7 +431,8 @@ try_again: UDP_MIB_INERRORS, is_udplite); } - goto out_free; + skb_free_datagram_locked(sk, skb); + return err; } if (!peeked) { if (is_udp4) @@ -510,9 +480,7 @@ try_again: if (flags & MSG_TRUNC) err = ulen; -out_free: - skb_free_datagram_locked(sk, skb); -out: + __skb_free_datagram_locked(sk, skb, peeking ? -err : err); return err; csum_copy_err: @@ -585,7 +553,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk->sk_err = err; sk->sk_error_report(sk); out: - sock_put(sk); + return; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -598,7 +566,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = sock_queue_rcv_skb(sk, skb); + rc = __sock_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -692,8 +660,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) goto csum_error; + if (sk_filter(sk, skb)) + goto drop; } + udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); @@ -747,33 +718,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, return true; } -static void flush_stack(struct sock **stack, unsigned int count, - struct sk_buff *skb, unsigned int final) -{ - struct sk_buff *skb1 = NULL; - struct sock *sk; - unsigned int i; - - for (i = 0; i < count; i++) { - sk = stack[i]; - if (likely(!skb1)) - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - if (!skb1) { - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - } - - if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) - skb1 = NULL; - sock_put(sk); - } - if (unlikely(skb1)) - kfree_skb(skb1); -} - static void udp6_csum_zero_error(struct sk_buff *skb) { /* RFC 2460 section 8.1 says that we SHOULD log @@ -792,15 +736,15 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, struct udp_table *udptable, int proto) { - struct sock *sk, *stack[256 / sizeof(struct sock *)]; + struct sock *sk, *first = NULL; const struct udphdr *uh = udp_hdr(skb); - struct hlist_nulls_node *node; unsigned short hnum = ntohs(uh->dest); struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - int dif = inet6_iif(skb); - unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); + unsigned int offset = offsetof(typeof(*sk), sk_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - bool inner_flushed = false; + int dif = inet6_iif(skb); + struct hlist_node *node; + struct sk_buff *nskb; if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & @@ -811,27 +755,32 @@ start_lookup: offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } - spin_lock(&hslot->lock); - sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { - if (__udp_v6_is_mcast_sock(net, sk, - uh->dest, daddr, - uh->source, saddr, - dif, hnum) && - /* If zero checksum and no_check is not on for - * the socket then skip it. - */ - (uh->check || udp_sk(sk)->no_check6_rx)) { - if (unlikely(count == ARRAY_SIZE(stack))) { - flush_stack(stack, count, skb, ~0); - inner_flushed = true; - count = 0; - } - stack[count++] = sk; - sock_hold(sk); + sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { + if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr, + uh->source, saddr, dif, hnum)) + continue; + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + if (!uh->check && !udp_sk(sk)->no_check6_rx) + continue; + if (!first) { + first = sk; + continue; + } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + continue; } - } - spin_unlock(&hslot->lock); + if (udpv6_queue_rcv_skb(sk, nskb) > 0) + consume_skb(nskb); + } /* Also lookup *:port if we are using hash2 and haven't done so yet. */ if (use_hash2 && hash2 != hash2_any) { @@ -839,13 +788,13 @@ start_lookup: goto start_lookup; } - if (count) { - flush_stack(stack, count, skb, count - 1); + if (first) { + if (udpv6_queue_rcv_skb(first, skb) > 0) + consume_skb(skb); } else { - if (!inner_flushed) - UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); - consume_skb(skb); + kfree_skb(skb); + UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); } return 0; } @@ -853,10 +802,10 @@ start_lookup: int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { + const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); - struct sock *sk; struct udphdr *uh; - const struct in6_addr *saddr, *daddr; + struct sock *sk; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) @@ -910,7 +859,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int ret; if (!uh->check && !udp_sk(sk)->no_check6_rx) { - sock_put(sk); udp6_csum_zero_error(skb); goto csum_error; } @@ -920,7 +868,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ip6_compute_pseudo); ret = udpv6_queue_rcv_skb(sk, skb); - sock_put(sk); /* a return value > 0 means to resubmit the input */ if (ret > 0) @@ -1128,6 +1075,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int connected = 0; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct sockcm_cookie sockc; /* destination address check */ if (sin6) { @@ -1247,6 +1195,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; @@ -1254,7 +1203,8 @@ do_udp_sendmsg: opt->tot_len = sizeof(*opt); err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + &hlimit, &tclass, &dontfrag, + &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1321,7 +1271,7 @@ back_from_confirm: skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_v6_send_skb(skb, &fl6); @@ -1348,7 +1298,8 @@ do_append_data: err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag, + &sockc); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 2b0fbe6929e8..5429f6bcf047 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -153,7 +153,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head, skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; - return udp_gro_receive(head, skb, uh); + return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb); flush: NAPI_GRO_CB(skb)->flush = 1; @@ -173,7 +173,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } - return udp_gro_complete(skb, nhoff); + return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } static const struct net_offload udpv6_offload = { @@ -184,7 +184,12 @@ static const struct net_offload udpv6_offload = { }, }; -int __init udp_offload_init(void) +int udpv6_offload_init(void) { return inet6_add_offload(&udpv6_offload, IPPROTO_UDP); } + +int udpv6_offload_exit(void) +{ + return inet6_del_offload(&udpv6_offload, IPPROTO_UDP); +} diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cd479903d943..46e07267e503 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -494,6 +494,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; struct flowi6 fl6; + struct sockcm_cookie sockc_unused = {0}; int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; @@ -564,9 +565,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); - if (err < 0) { + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag, + &sockc_unused); + if (err < 0) { fl6_sock_release(flowlabel); return err; } @@ -627,7 +629,7 @@ back_from_confirm: err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc_unused); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 1a3c7e0f5d0d..29c509c54bb2 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -195,7 +195,7 @@ static int llc_seq_core_show(struct seq_file *seq, void *v) timer_pending(&llc->pf_cycle_timer.timer), timer_pending(&llc->rej_sent_timer.timer), timer_pending(&llc->busy_state_timer.timer), - !!sk->sk_backlog.tail, !!sock_owned_by_user(sk)); + !!sk->sk_backlog.tail, !!sk->sk_lock.owned); out: return 0; } diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 4932e9f243a2..42fa81031dfa 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, size_t len) { struct tid_ampdu_tx *tid_tx; + struct ieee80211_txq *txq; u16 capab, tid; u8 buf_size; bool amsdu; @@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); + txq = sta->sta.txq[tid]; + if (!amsdu && txq) + set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags); + mutex_lock(&sta->ampdu_mlme.mtx); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fe1704c4e8fb..0c12e4001f19 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -65,11 +65,13 @@ static int ieee80211_change_iface(struct wiphy *wiphy, return ret; if (type == NL80211_IFTYPE_AP_VLAN && - params && params->use_4addr == 0) + params && params->use_4addr == 0) { RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); - else if (type == NL80211_IFTYPE_STATION && - params && params->use_4addr >= 0) + ieee80211_check_fast_rx_iface(sdata); + } else if (type == NL80211_IFTYPE_STATION && + params && params->use_4addr >= 0) { sdata->u.mgd.use_4addr = params->use_4addr; + } if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) { struct ieee80211_local *local = sdata->local; @@ -732,6 +734,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->vif.bss_conf.dtim_period = params->dtim_period; sdata->vif.bss_conf.enable_beacon = true; + sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) @@ -1046,7 +1049,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, int ret = 0; struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); u32 mask, set; sband = local->hw.wiphy->bands[band]; @@ -1202,6 +1205,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, params->opmode_notif, band); } + if (params->support_p2p_ps >= 0) + sta->sta.support_p2p_ps = params->support_p2p_ps; + if (ieee80211_vif_is_mesh(&sdata->vif)) sta_apply_mesh_params(local, sta, params); @@ -1363,6 +1369,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, rcu_assign_pointer(vlansdata->u.vlan.sta, sta); new_4addr = true; + __ieee80211_check_fast_rx_iface(vlansdata); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && @@ -1499,7 +1506,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, memset(pinfo, 0, sizeof(*pinfo)); - pinfo->generation = mesh_paths_generation; + pinfo->generation = mpath->sdata->u.mesh.mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | MPATH_INFO_SN | @@ -1577,7 +1584,7 @@ static void mpp_set_pinfo(struct mesh_path *mpath, u8 *mpp, memset(pinfo, 0, sizeof(*pinfo)); memcpy(mpp, mpath->mpp, ETH_ALEN); - pinfo->generation = mpp_paths_generation; + pinfo->generation = mpath->sdata->u.mesh.mpp_paths_generation; } static int ieee80211_get_mpp(struct wiphy *wiphy, struct net_device *dev, @@ -1841,7 +1848,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band; + enum nl80211_band band; u32 changed = 0; if (!sdata_dereference(sdata->u.ap.beacon, sdata)) @@ -1860,7 +1867,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (!sdata->vif.bss_conf.use_short_slot && - band == IEEE80211_BAND_5GHZ) { + band == NL80211_BAND_5GHZ) { sdata->vif.bss_conf.use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } @@ -1885,6 +1892,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; else sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; + ieee80211_check_fast_rx_iface(sdata); } if (params->ht_opmode >= 0) { @@ -2089,12 +2097,12 @@ static int ieee80211_leave_ocb(struct wiphy *wiphy, struct net_device *dev) } static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, - int rate[IEEE80211_NUM_BANDS]) + int rate[NUM_NL80211_BANDS]) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(sdata->vif.bss_conf.mcast_rate, rate, - sizeof(int) * IEEE80211_NUM_BANDS); + sizeof(int) * NUM_NL80211_BANDS); return 0; } @@ -2499,7 +2507,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return ret; } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; @@ -3127,7 +3135,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; - enum ieee80211_band band; + enum nl80211_band band; int ret; /* the lock is needed to assign the cookie later */ diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 4ab5c522ceee..b251b2f7f8dd 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -127,6 +127,9 @@ static const char *hw_flag_names[] = { FLAG(BEACON_TX_STATUS), FLAG(NEEDS_UNIQUE_STA_ADDR), FLAG(SUPPORTS_REORDERING_BUFFER), + FLAG(USES_RSS), + FLAG(TX_AMSDU), + FLAG(TX_FRAG_LIST), #undef FLAG }; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 37ea30e0754c..a5ba739cd2a7 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -169,21 +169,21 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ IEEE80211_IF_FILE_R(name) /* common attributes */ -IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ], +IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[NL80211_BAND_2GHZ], HEX); -IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], +IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[NL80211_BAND_5GHZ], HEX); IEEE80211_IF_FILE(rc_rateidx_mcs_mask_2ghz, - rc_rateidx_mcs_mask[IEEE80211_BAND_2GHZ], HEXARRAY); + rc_rateidx_mcs_mask[NL80211_BAND_2GHZ], HEXARRAY); IEEE80211_IF_FILE(rc_rateidx_mcs_mask_5ghz, - rc_rateidx_mcs_mask[IEEE80211_BAND_5GHZ], HEXARRAY); + rc_rateidx_mcs_mask[NL80211_BAND_5GHZ], HEXARRAY); static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_2ghz( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { int i, len = 0; - const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_2GHZ]; + const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[NL80211_BAND_2GHZ]; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]); @@ -199,7 +199,7 @@ static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_5ghz( char *buf, int buflen) { int i, len = 0; - const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_5GHZ]; + const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[NL80211_BAND_5GHZ]; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a39512f09f9e..33dfcbc2bf9c 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -3,6 +3,7 @@ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -51,31 +52,54 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_FILE(aid, sta.aid, D); +static const char * const sta_flag_names[] = { +#define FLAG(F) [WLAN_STA_##F] = #F + FLAG(AUTH), + FLAG(ASSOC), + FLAG(PS_STA), + FLAG(AUTHORIZED), + FLAG(SHORT_PREAMBLE), + FLAG(WDS), + FLAG(CLEAR_PS_FILT), + FLAG(MFP), + FLAG(BLOCK_BA), + FLAG(PS_DRIVER), + FLAG(PSPOLL), + FLAG(TDLS_PEER), + FLAG(TDLS_PEER_AUTH), + FLAG(TDLS_INITIATOR), + FLAG(TDLS_CHAN_SWITCH), + FLAG(TDLS_OFF_CHANNEL), + FLAG(TDLS_WIDER_BW), + FLAG(UAPSD), + FLAG(SP), + FLAG(4ADDR_EVENT), + FLAG(INSERTED), + FLAG(RATE_CONTROL), + FLAG(TOFFSET_KNOWN), + FLAG(MPSP_OWNER), + FLAG(MPSP_RECIPIENT), + FLAG(PS_DELIVER), +#undef FLAG +}; + static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[121]; + char buf[16 * NUM_WLAN_STA_FLAGS], *pos = buf; + char *end = buf + sizeof(buf) - 1; struct sta_info *sta = file->private_data; + unsigned int flg; + + BUILD_BUG_ON(ARRAY_SIZE(sta_flag_names) != NUM_WLAN_STA_FLAGS); + + for (flg = 0; flg < NUM_WLAN_STA_FLAGS; flg++) { + if (test_sta_flag(sta, flg)) + pos += scnprintf(pos, end - pos, "%s\n", + sta_flag_names[flg]); + } -#define TEST(flg) \ - test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" - - int res = scnprintf(buf, sizeof(buf), - "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - TEST(AUTH), TEST(ASSOC), TEST(PS_STA), - TEST(PS_DRIVER), TEST(AUTHORIZED), - TEST(SHORT_PREAMBLE), - sta->sta.wme ? "WME\n" : "", - TEST(WDS), TEST(CLEAR_PS_FILT), - TEST(MFP), TEST(BLOCK_BA), TEST(PSPOLL), - TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), - TEST(TDLS_PEER_AUTH), TEST(TDLS_INITIATOR), - TEST(TDLS_CHAN_SWITCH), TEST(TDLS_OFF_CHANNEL), - TEST(4ADDR_EVENT), TEST(INSERTED), - TEST(RATE_CONTROL), TEST(TOFFSET_KNOWN), - TEST(MPSP_OWNER), TEST(MPSP_RECIPIENT)); -#undef TEST - return simple_read_from_buffer(userbuf, count, ppos, buf, res); + return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); } STA_OPS(flags); @@ -151,11 +175,12 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { - char _buf[12] = {}, *buf = _buf; + char _buf[25] = {}, *buf = _buf; struct sta_info *sta = file->private_data; bool start, tx; unsigned long tid; - int ret; + char *pos; + int ret, timeout = 5000; if (count > sizeof(_buf)) return -EINVAL; @@ -164,37 +189,48 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu return -EFAULT; buf[sizeof(_buf) - 1] = '\0'; + pos = buf; + buf = strsep(&pos, " "); + if (!buf) + return -EINVAL; - if (strncmp(buf, "tx ", 3) == 0) { - buf += 3; + if (!strcmp(buf, "tx")) tx = true; - } else if (strncmp(buf, "rx ", 3) == 0) { - buf += 3; + else if (!strcmp(buf, "rx")) tx = false; - } else + else return -EINVAL; - if (strncmp(buf, "start ", 6) == 0) { - buf += 6; + buf = strsep(&pos, " "); + if (!buf) + return -EINVAL; + if (!strcmp(buf, "start")) { start = true; if (!tx) return -EINVAL; - } else if (strncmp(buf, "stop ", 5) == 0) { - buf += 5; + } else if (!strcmp(buf, "stop")) { start = false; - } else + } else { return -EINVAL; + } - ret = kstrtoul(buf, 0, &tid); - if (ret) - return ret; + buf = strsep(&pos, " "); + if (!buf) + return -EINVAL; + if (sscanf(buf, "timeout=%d", &timeout) == 1) { + buf = strsep(&pos, " "); + if (!buf || !tx || !start) + return -EINVAL; + } - if (tid >= IEEE80211_NUM_TIDS) + ret = kstrtoul(buf, 0, &tid); + if (ret || tid >= IEEE80211_NUM_TIDS) return -EINVAL; if (tx) { if (start) - ret = ieee80211_start_tx_ba_session(&sta->sta, tid, 5000); + ret = ieee80211_start_tx_ba_session(&sta->sta, tid, + timeout); else ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { @@ -322,14 +358,14 @@ STA_OPS(vht_capa); #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ - sta->debugfs.dir, sta, &sta_ ##name## _ops); + sta->debugfs_dir, sta, &sta_ ##name## _ops); #define DEBUGFS_ADD_COUNTER(name, field) \ if (sizeof(sta->field) == sizeof(u32)) \ - debugfs_create_u32(#name, 0400, sta->debugfs.dir, \ + debugfs_create_u32(#name, 0400, sta->debugfs_dir, \ (u32 *) &sta->field); \ else \ - debugfs_create_u64(#name, 0400, sta->debugfs.dir, \ + debugfs_create_u64(#name, 0400, sta->debugfs_dir, \ (u64 *) &sta->field); void ieee80211_sta_debugfs_add(struct sta_info *sta) @@ -339,8 +375,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) struct dentry *stations_dir = sta->sdata->debugfs.subdir_stations; u8 mac[3*ETH_ALEN]; - sta->debugfs.add_has_run = true; - if (!stations_dir) return; @@ -355,8 +389,8 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) * destroyed quickly enough the old station's debugfs * dir might still be around. */ - sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); - if (!sta->debugfs.dir) + sta->debugfs_dir = debugfs_create_dir(mac, stations_dir); + if (!sta->debugfs_dir) return; DEBUGFS_ADD(flags); @@ -372,14 +406,14 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, - sta->debugfs.dir, + sta->debugfs_dir, (u32 *)&sta->driver_buffered_tids); else debugfs_create_x64("driver_buffered_tids", 0400, - sta->debugfs.dir, + sta->debugfs_dir, (u64 *)&sta->driver_buffered_tids); - drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs.dir); + drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); } void ieee80211_sta_debugfs_remove(struct sta_info *sta) @@ -387,7 +421,7 @@ void ieee80211_sta_debugfs_remove(struct sta_info *sta) struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs.dir); - debugfs_remove_recursive(sta->debugfs.dir); - sta->debugfs.dir = NULL; + drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); + debugfs_remove_recursive(sta->debugfs_dir); + sta->debugfs_dir = NULL; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 18b0d65baff0..184473c257eb 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1,3 +1,8 @@ +/* +* Portions of this file +* Copyright(c) 2016 Intel Deutschland GmbH +*/ + #ifndef __MAC80211_DRIVER_OPS #define __MAC80211_DRIVER_OPS @@ -29,6 +34,16 @@ static inline void drv_tx(struct ieee80211_local *local, local->ops->tx(&local->hw, control, skb); } +static inline void drv_sync_rx_queues(struct ieee80211_local *local, + struct sta_info *sta) +{ + if (local->ops->sync_rx_queues) { + trace_drv_sync_rx_queues(local, sta->sdata, &sta->sta); + local->ops->sync_rx_queues(&local->hw); + trace_drv_return_void(local); + } +} + static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata, u32 sset, u8 *data) { diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index fc3238376b39..a31d30713d08 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -126,7 +126,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, } } - if (sband->band == IEEE80211_BAND_2GHZ) { + if (sband->band == NL80211_BAND_2GHZ) { *pos++ = WLAN_EID_DS_PARAMS; *pos++ = 1; *pos++ = ieee80211_frequency_to_channel( @@ -348,11 +348,11 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, * * HT follows these specifications (IEEE 802.11-2012 20.3.18) */ - sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ; + sdata->vif.bss_conf.use_short_slot = chan->band == NL80211_BAND_5GHZ; bss_change |= BSS_CHANGED_ERP_SLOT; /* cf. IEEE 802.11 9.2.12 */ - if (chan->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit) + if (chan->band == NL80211_BAND_2GHZ && have_higher_than_11mbit) sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; @@ -649,8 +649,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, return NULL; } - sta->rx_stats.last_rx = jiffies; - /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | @@ -670,10 +668,11 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { + unsigned long last_active = ieee80211_sta_last_active(sta); + if (sta->sdata == sdata && - time_after(sta->rx_stats.last_rx + - IEEE80211_IBSS_MERGE_INTERVAL, - jiffies)) { + time_is_after_jiffies(last_active + + IEEE80211_IBSS_MERGE_INTERVAL)) { active++; break; } @@ -990,7 +989,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel) { struct sta_info *sta; - enum ieee80211_band band = rx_status->band; + enum nl80211_band band = rx_status->band; enum nl80211_bss_scan_width scan_width; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; @@ -1110,7 +1109,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; u64 beacon_timestamp, rx_timestamp; u32 supp_rates = 0; - enum ieee80211_band band = rx_status->band; + enum nl80211_band band = rx_status->band; channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); if (!channel) @@ -1236,8 +1235,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return; - sta->rx_stats.last_rx = jiffies; - /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | @@ -1259,11 +1256,13 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { + unsigned long last_active = ieee80211_sta_last_active(sta); + if (sdata != sta->sdata) continue; - if (time_after(jiffies, sta->rx_stats.last_rx + exp_time) || - (time_after(jiffies, sta->rx_stats.last_rx + exp_rsn) && + if (time_is_before_jiffies(last_active + exp_time) || + (time_is_before_jiffies(last_active + exp_rsn) && sta->sta_state != IEEE80211_STA_AUTHORIZED)) { sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n", sta->sta_state != IEEE80211_STA_AUTHORIZED ? diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 422003540169..9438c9406687 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -696,6 +696,11 @@ struct ieee80211_if_mesh { /* offset from skb->data while building IE */ int meshconf_offset; + + struct mesh_table *mesh_paths; + struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ + int mesh_paths_generation; + int mpp_paths_generation; }; #ifdef CONFIG_MAC80211_MESH @@ -797,6 +802,7 @@ struct mac80211_qos_map { enum txq_info_flags { IEEE80211_TXQ_STOP, IEEE80211_TXQ_AMPDU, + IEEE80211_TXQ_NO_AMSDU, }; struct txq_info { @@ -890,13 +896,13 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ap *bss; /* bitmap of allowed (non-MCS) rate indexes for rate control */ - u32 rc_rateidx_mask[IEEE80211_NUM_BANDS]; + u32 rc_rateidx_mask[NUM_NL80211_BANDS]; - bool rc_has_mcs_mask[IEEE80211_NUM_BANDS]; - u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN]; + bool rc_has_mcs_mask[NUM_NL80211_BANDS]; + u8 rc_rateidx_mcs_mask[NUM_NL80211_BANDS][IEEE80211_HT_MCS_MASK_LEN]; - bool rc_has_vht_mcs_mask[IEEE80211_NUM_BANDS]; - u16 rc_rateidx_vht_mcs_mask[IEEE80211_NUM_BANDS][NL80211_VHT_NSS_MAX]; + bool rc_has_vht_mcs_mask[NUM_NL80211_BANDS]; + u16 rc_rateidx_vht_mcs_mask[NUM_NL80211_BANDS][NL80211_VHT_NSS_MAX]; union { struct ieee80211_if_ap ap; @@ -951,10 +957,10 @@ sdata_assert_lock(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&sdata->wdev.mtx); } -static inline enum ieee80211_band +static inline enum nl80211_band ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) { - enum ieee80211_band band = IEEE80211_BAND_2GHZ; + enum nl80211_band band = NL80211_BAND_2GHZ; struct ieee80211_chanctx_conf *chanctx_conf; rcu_read_lock(); @@ -1225,7 +1231,7 @@ struct ieee80211_local { struct cfg80211_scan_request __rcu *scan_req; struct ieee80211_scan_request *hw_scan_req; struct cfg80211_chan_def scan_chandef; - enum ieee80211_band hw_scan_band; + enum nl80211_band hw_scan_band; int scan_channel_idx; int scan_ies_len; int hw_scan_ies_bufsize; @@ -1489,6 +1495,11 @@ u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp); +void ieee80211_check_fast_rx(struct sta_info *sta); +void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_clear_fast_rx(struct sta_info *sta); + /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, @@ -1727,10 +1738,10 @@ void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band); + enum nl80211_band band); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band); + enum nl80211_band band); void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap); void ieee80211_get_vht_mask_from_cap(__le16 vht_cap, @@ -1758,7 +1769,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, */ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band current_band, + enum nl80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie); @@ -1783,7 +1794,7 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) /* utility functions/constants */ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */ -int ieee80211_frame_duration(enum ieee80211_band band, size_t len, +int ieee80211_frame_duration(enum nl80211_band band, size_t len, int rate, int erp, int short_preamble, int shift); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, @@ -1793,12 +1804,12 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum ieee80211_band band); + enum nl80211_band band); static inline void ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum ieee80211_band band) + enum nl80211_band band) { rcu_read_lock(); __ieee80211_tx_skb_tid_band(sdata, skb, tid, band); @@ -1953,7 +1964,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band band, u32 *basic_rates); + enum nl80211_band band, u32 *basic_rates); int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, @@ -1976,10 +1987,10 @@ int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const u8 *srates, int srates_len, u32 *rates); int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band); + enum nl80211_band band); int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band); + enum nl80211_band band); u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); /* channel management */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 453b4e741780..6a33f0b4d839 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1093,7 +1093,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) sdata->fragment_next = 0; if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rmc_free(sdata); + ieee80211_mesh_teardown_sdata(sdata); } static void ieee80211_uninit(struct net_device *dev) @@ -1800,7 +1800,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, ieee80211_delayed_tailroom_dec); - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[i]; sdata->rc_rateidx_mask[i] = diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 3df7b0392d30..edd6f2945f69 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -338,6 +338,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } else { rcu_assign_pointer(sta->gtk[idx], new); } + ieee80211_check_fast_rx(sta); } else { defunikey = old && old == key_mtx_dereference(sdata->local, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8190bf27ebff..7ee91d6151d1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -558,6 +558,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, if (!ops->set_key) wiphy->flags |= WIPHY_FLAG_IBSS_RSN; + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM); + wiphy->bss_priv_size = sizeof(struct ieee80211_bss); local = wiphy_priv(wiphy); @@ -799,7 +801,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); int result, i; - enum ieee80211_band band; + enum nl80211_band band; int channels, max_bitrates; bool supp_ht, supp_vht; netdev_features_t feature_whitelist; @@ -854,7 +856,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* Only HW csum features are currently compatible with mac80211 */ feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_GSO_SOFTWARE; + NETIF_F_GSO_SOFTWARE | NETIF_F_RXCSUM; if (WARN_ON(hw->netdev_features & ~feature_whitelist)) return -EINVAL; @@ -872,7 +874,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) max_bitrates = 0; supp_ht = false; supp_vht = false; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[band]; @@ -934,7 +936,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->int_scan_req) return -ENOMEM; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!local->hw.wiphy->bands[band]) continue; local->int_scan_req->rates[band] = (u32) -1; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d32cefcb63b0..4c6404e1ad6e 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -25,7 +25,6 @@ bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) void ieee80211s_init(void) { - mesh_pathtbl_init(); mesh_allocated = 1; rm_cache = kmem_cache_create("mesh_rmc", sizeof(struct rmc_entry), 0, 0, NULL); @@ -35,7 +34,6 @@ void ieee80211s_stop(void) { if (!mesh_allocated) return; - mesh_pathtbl_unregister(); kmem_cache_destroy(rm_cache); } @@ -176,22 +174,23 @@ int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) return -ENOMEM; sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1; for (i = 0; i < RMC_BUCKETS; i++) - INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i]); + INIT_HLIST_HEAD(&sdata->u.mesh.rmc->bucket[i]); return 0; } void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) { struct mesh_rmc *rmc = sdata->u.mesh.rmc; - struct rmc_entry *p, *n; + struct rmc_entry *p; + struct hlist_node *n; int i; if (!sdata->u.mesh.rmc) return; for (i = 0; i < RMC_BUCKETS; i++) { - list_for_each_entry_safe(p, n, &rmc->bucket[i], list) { - list_del(&p->list); + hlist_for_each_entry_safe(p, n, &rmc->bucket[i], list) { + hlist_del(&p->list); kmem_cache_free(rm_cache, p); } } @@ -220,16 +219,20 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, u32 seqnum = 0; int entries = 0; u8 idx; - struct rmc_entry *p, *n; + struct rmc_entry *p; + struct hlist_node *n; + + if (!rmc) + return -1; /* Don't care about endianness since only match matters */ memcpy(&seqnum, &mesh_hdr->seqnum, sizeof(mesh_hdr->seqnum)); idx = le32_to_cpu(mesh_hdr->seqnum) & rmc->idx_mask; - list_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { + hlist_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { ++entries; if (time_after(jiffies, p->exp_time) || entries == RMC_QUEUE_MAX_LEN) { - list_del(&p->list); + hlist_del(&p->list); kmem_cache_free(rm_cache, p); --entries; } else if ((seqnum == p->seqnum) && ether_addr_equal(sa, p->sa)) @@ -243,7 +246,7 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, p->seqnum = seqnum; p->exp_time = jiffies + RMC_TIMEOUT; memcpy(p->sa, sa, ETH_ALEN); - list_add(&p->list, &rmc->bucket[idx]); + hlist_add_head(&p->list, &rmc->bucket[idx]); return 0; } @@ -412,7 +415,7 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u8 *pos; @@ -475,7 +478,7 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u8 *pos; @@ -677,7 +680,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) struct ieee80211_mgmt *mgmt; struct ieee80211_chanctx_conf *chanctx_conf; struct mesh_csa_settings *csa; - enum ieee80211_band band; + enum nl80211_band band; u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + @@ -927,7 +930,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); int err; u32 sta_flags; @@ -1081,7 +1084,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; size_t baselen; int freq; - enum ieee80211_band band = rx_status->band; + enum nl80211_band band = rx_status->band; /* ignore ProbeResp to foreign address */ if (stype == IEEE80211_STYPE_PROBE_RESP && @@ -1348,12 +1351,6 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) mesh_path_start_discovery(sdata); - if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) - mesh_mpath_table_grow(); - - if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags)) - mesh_mpp_table_grow(); - if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) ieee80211_mesh_housekeeping(sdata); @@ -1388,6 +1385,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); + + mesh_pathtbl_init(sdata); + setup_timer(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, (unsigned long) sdata); @@ -1402,3 +1402,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.bssid = zero_addr; } + +void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata) +{ + mesh_rmc_free(sdata); + mesh_pathtbl_unregister(sdata); +} diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 87c017a3b1ce..26b9ccbe1fce 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -21,8 +21,6 @@ /** * enum mesh_path_flags - mac80211 mesh path flags * - * - * * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence @@ -32,6 +30,8 @@ * @MESH_PATH_RESOLVED: the mesh path can has been resolved * @MESH_PATH_REQ_QUEUED: there is an unsent path request for this destination * already queued up, waiting for the discovery process to start. + * @MESH_PATH_DELETED: the mesh path has been deleted and should no longer + * be used * * MESH_PATH_RESOLVED is used by the mesh path timer to * decide when to stop or cancel the mesh path discovery. @@ -43,6 +43,7 @@ enum mesh_path_flags { MESH_PATH_FIXED = BIT(3), MESH_PATH_RESOLVED = BIT(4), MESH_PATH_REQ_QUEUED = BIT(5), + MESH_PATH_DELETED = BIT(6), }; /** @@ -51,10 +52,6 @@ enum mesh_path_flags { * * * @MESH_WORK_HOUSEKEEPING: run the periodic mesh housekeeping tasks - * @MESH_WORK_GROW_MPATH_TABLE: the mesh path table is full and needs - * to grow. - * @MESH_WORK_GROW_MPP_TABLE: the mesh portals table is full and needs to - * grow * @MESH_WORK_ROOT: the mesh root station needs to send a frame * @MESH_WORK_DRIFT_ADJUST: time to compensate for clock drift relative to other * mesh nodes @@ -62,8 +59,6 @@ enum mesh_path_flags { */ enum mesh_deferred_task_flags { MESH_WORK_HOUSEKEEPING, - MESH_WORK_GROW_MPATH_TABLE, - MESH_WORK_GROW_MPP_TABLE, MESH_WORK_ROOT, MESH_WORK_DRIFT_ADJUST, MESH_WORK_MBSS_CHANGED, @@ -73,12 +68,16 @@ enum mesh_deferred_task_flags { * struct mesh_path - mac80211 mesh path structure * * @dst: mesh path destination mac address + * @mpp: mesh proxy mac address + * @rhash: rhashtable list pointer + * @gate_list: list pointer for known gates list * @sdata: mesh subif * @next_hop: mesh neighbor to which frames for this destination will be * forwarded * @timer: mesh path discovery timer * @frame_queue: pending queue for frames sent to this destination while the * path is unresolved + * @rcu: rcu head for freeing mesh path * @sn: target sequence number * @metric: current metric to this destination * @hop_count: hops to destination @@ -97,14 +96,16 @@ enum mesh_deferred_task_flags { * @is_gate: the destination station of this path is a mesh gate * * - * The combination of dst and sdata is unique in the mesh path table. Since the - * next_hop STA is only protected by RCU as well, deleting the STA must also - * remove/substitute the mesh_path structure and wait until that is no longer - * reachable before destroying the STA completely. + * The dst address is unique in the mesh path table. Since the mesh_path is + * protected by RCU, deleting the next_hop STA must remove / substitute the + * mesh_path structure and wait until that is no longer reachable before + * destroying the STA completely. */ struct mesh_path { u8 dst[ETH_ALEN]; u8 mpp[ETH_ALEN]; /* used for MPP or MAP */ + struct rhash_head rhash; + struct hlist_node gate_list; struct ieee80211_sub_if_data *sdata; struct sta_info __rcu *next_hop; struct timer_list timer; @@ -128,34 +129,17 @@ struct mesh_path { /** * struct mesh_table * - * @hash_buckets: array of hash buckets of the table - * @hashwlock: array of locks to protect write operations, one per bucket - * @hash_mask: 2^size_order - 1, used to compute hash idx - * @hash_rnd: random value used for hash computations - * @entries: number of entries in the table - * @free_node: function to free nodes of the table - * @copy_node: function to copy nodes of the table - * @size_order: determines size of the table, there will be 2^size_order hash - * buckets * @known_gates: list of known mesh gates and their mpaths by the station. The * gate's mpath may or may not be resolved and active. - * - * rcu_head: RCU head to free the table + * @gates_lock: protects updates to known_gates + * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr + * @entries: number of entries in the table */ struct mesh_table { - /* Number of buckets will be 2^N */ - struct hlist_head *hash_buckets; - spinlock_t *hashwlock; /* One per bucket, for add/del */ - unsigned int hash_mask; /* (2^size_order) - 1 */ - __u32 hash_rnd; /* Used for hash generation */ - atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ - void (*free_node) (struct hlist_node *p, bool free_leafs); - int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); - int size_order; - struct hlist_head *known_gates; + struct hlist_head known_gates; spinlock_t gates_lock; - - struct rcu_head rcu_head; + struct rhashtable rhead; + atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ }; /* Recent multicast cache */ @@ -170,20 +154,21 @@ struct mesh_table { * @seqnum: mesh sequence number of the frame * @exp_time: expiration time of the entry, in jiffies * @sa: source address of the frame + * @list: hashtable list pointer * * The Recent Multicast Cache keeps track of the latest multicast frames that * have been received by a mesh interface and discards received multicast frames * that are found in the cache. */ struct rmc_entry { - struct list_head list; - u32 seqnum; + struct hlist_node list; unsigned long exp_time; + u32 seqnum; u8 sa[ETH_ALEN]; }; struct mesh_rmc { - struct list_head bucket[RMC_BUCKETS]; + struct hlist_head bucket[RMC_BUCKETS]; u32 idx_mask; }; @@ -234,6 +219,7 @@ void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); +void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); @@ -299,9 +285,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, void mesh_sta_cleanup(struct sta_info *sta); /* Private interfaces */ -/* Mesh tables */ -void mesh_mpath_table_grow(void); -void mesh_mpp_table_grow(void); /* Mesh paths */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, u8 ttl, const u8 *target, u32 target_sn, @@ -309,8 +292,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); -int mesh_pathtbl_init(void); -void mesh_pathtbl_unregister(void); +int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); +void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata); int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); @@ -319,8 +302,6 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); -extern int mesh_paths_generation; -extern int mpp_paths_generation; #ifdef CONFIG_MAC80211_MESH static inline diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 002244bca948..8f9c3bde835f 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1012,6 +1012,10 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) goto enddiscovery; spin_lock_bh(&mpath->state_lock); + if (mpath->flags & MESH_PATH_DELETED) { + spin_unlock_bh(&mpath->state_lock); + goto enddiscovery; + } mpath->flags &= ~MESH_PATH_REQ_QUEUED; if (preq_node->flags & PREQ_Q_F_START) { if (mpath->flags & MESH_PATH_RESOLVING) { diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 2ba7aa56b11c..6db2ddfa0695 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -18,11 +18,22 @@ #include "ieee80211_i.h" #include "mesh.h" -/* There will be initially 2^INIT_PATHS_SIZE_ORDER buckets */ -#define INIT_PATHS_SIZE_ORDER 2 +static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); -/* Keep the mean chain length below this constant */ -#define MEAN_CHAIN_LEN 2 +static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) +{ + /* Use last four bytes of hw addr as hash index */ + return jhash_1word(*(u32 *)(addr+2), seed); +} + +static const struct rhashtable_params mesh_rht_params = { + .nelem_hint = 2, + .automatic_shrinking = true, + .key_len = ETH_ALEN, + .key_offset = offsetof(struct mesh_path, dst), + .head_offset = offsetof(struct mesh_path, rhash), + .hashfn = mesh_table_hash, +}; static inline bool mpath_expired(struct mesh_path *mpath) { @@ -31,173 +42,36 @@ static inline bool mpath_expired(struct mesh_path *mpath) !(mpath->flags & MESH_PATH_FIXED); } -struct mpath_node { - struct hlist_node list; - struct rcu_head rcu; - /* This indirection allows two different tables to point to the same - * mesh_path structure, useful when resizing - */ - struct mesh_path *mpath; -}; - -static struct mesh_table __rcu *mesh_paths; -static struct mesh_table __rcu *mpp_paths; /* Store paths for MPP&MAP */ - -int mesh_paths_generation; -int mpp_paths_generation; - -/* This lock will have the grow table function as writer and add / delete nodes - * as readers. RCU provides sufficient protection only when reading the table - * (i.e. doing lookups). Adding or adding or removing nodes requires we take - * the read lock or we risk operating on an old table. The write lock is only - * needed when modifying the number of buckets a table. - */ -static DEFINE_RWLOCK(pathtbl_resize_lock); - - -static inline struct mesh_table *resize_dereference_paths( - struct mesh_table __rcu *table) +static void mesh_path_rht_free(void *ptr, void *tblptr) { - return rcu_dereference_protected(table, - lockdep_is_held(&pathtbl_resize_lock)); -} + struct mesh_path *mpath = ptr; + struct mesh_table *tbl = tblptr; -static inline struct mesh_table *resize_dereference_mesh_paths(void) -{ - return resize_dereference_paths(mesh_paths); + mesh_path_free_rcu(tbl, mpath); } -static inline struct mesh_table *resize_dereference_mpp_paths(void) +static struct mesh_table *mesh_table_alloc(void) { - return resize_dereference_paths(mpp_paths); -} - -/* - * CAREFUL -- "tbl" must not be an expression, - * in particular not an rcu_dereference(), since - * it's used twice. So it is illegal to do - * for_each_mesh_entry(rcu_dereference(...), ...) - */ -#define for_each_mesh_entry(tbl, node, i) \ - for (i = 0; i <= tbl->hash_mask; i++) \ - hlist_for_each_entry_rcu(node, &tbl->hash_buckets[i], list) - - -static struct mesh_table *mesh_table_alloc(int size_order) -{ - int i; struct mesh_table *newtbl; newtbl = kmalloc(sizeof(struct mesh_table), GFP_ATOMIC); if (!newtbl) return NULL; - newtbl->hash_buckets = kzalloc(sizeof(struct hlist_head) * - (1 << size_order), GFP_ATOMIC); - - if (!newtbl->hash_buckets) { - kfree(newtbl); - return NULL; - } - - newtbl->hashwlock = kmalloc(sizeof(spinlock_t) * - (1 << size_order), GFP_ATOMIC); - if (!newtbl->hashwlock) { - kfree(newtbl->hash_buckets); - kfree(newtbl); - return NULL; - } - - newtbl->size_order = size_order; - newtbl->hash_mask = (1 << size_order) - 1; + INIT_HLIST_HEAD(&newtbl->known_gates); atomic_set(&newtbl->entries, 0); - get_random_bytes(&newtbl->hash_rnd, - sizeof(newtbl->hash_rnd)); - for (i = 0; i <= newtbl->hash_mask; i++) - spin_lock_init(&newtbl->hashwlock[i]); spin_lock_init(&newtbl->gates_lock); return newtbl; } -static void __mesh_table_free(struct mesh_table *tbl) +static void mesh_table_free(struct mesh_table *tbl) { - kfree(tbl->hash_buckets); - kfree(tbl->hashwlock); + rhashtable_free_and_destroy(&tbl->rhead, + mesh_path_rht_free, tbl); kfree(tbl); } -static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) -{ - struct hlist_head *mesh_hash; - struct hlist_node *p, *q; - struct mpath_node *gate; - int i; - - mesh_hash = tbl->hash_buckets; - for (i = 0; i <= tbl->hash_mask; i++) { - spin_lock_bh(&tbl->hashwlock[i]); - hlist_for_each_safe(p, q, &mesh_hash[i]) { - tbl->free_node(p, free_leafs); - atomic_dec(&tbl->entries); - } - spin_unlock_bh(&tbl->hashwlock[i]); - } - if (free_leafs) { - spin_lock_bh(&tbl->gates_lock); - hlist_for_each_entry_safe(gate, q, - tbl->known_gates, list) { - hlist_del(&gate->list); - kfree(gate); - } - kfree(tbl->known_gates); - spin_unlock_bh(&tbl->gates_lock); - } - - __mesh_table_free(tbl); -} - -static int mesh_table_grow(struct mesh_table *oldtbl, - struct mesh_table *newtbl) -{ - struct hlist_head *oldhash; - struct hlist_node *p, *q; - int i; - - if (atomic_read(&oldtbl->entries) - < MEAN_CHAIN_LEN * (oldtbl->hash_mask + 1)) - return -EAGAIN; - - newtbl->free_node = oldtbl->free_node; - newtbl->copy_node = oldtbl->copy_node; - newtbl->known_gates = oldtbl->known_gates; - atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries)); - - oldhash = oldtbl->hash_buckets; - for (i = 0; i <= oldtbl->hash_mask; i++) - hlist_for_each(p, &oldhash[i]) - if (oldtbl->copy_node(p, newtbl) < 0) - goto errcopy; - - return 0; - -errcopy: - for (i = 0; i <= newtbl->hash_mask; i++) { - hlist_for_each_safe(p, q, &newtbl->hash_buckets[i]) - oldtbl->free_node(p, 0); - } - return -ENOMEM; -} - -static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata, - struct mesh_table *tbl) -{ - /* Use last four bytes of hw addr and interface index as hash index */ - return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, - tbl->hash_rnd) & tbl->hash_mask; -} - - /** * * mesh_path_assign_nexthop - update mesh path next hop @@ -340,23 +214,15 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; - struct hlist_head *bucket; - struct mpath_node *node; - - bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; - hlist_for_each_entry_rcu(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(dst, mpath->dst)) { - if (mpath_expired(mpath)) { - spin_lock_bh(&mpath->state_lock); - mpath->flags &= ~MESH_PATH_ACTIVE; - spin_unlock_bh(&mpath->state_lock); - } - return mpath; - } + + mpath = rhashtable_lookup_fast(&tbl->rhead, dst, mesh_rht_params); + + if (mpath && mpath_expired(mpath)) { + spin_lock_bh(&mpath->state_lock); + mpath->flags &= ~MESH_PATH_ACTIVE; + spin_unlock_bh(&mpath->state_lock); } - return NULL; + return mpath; } /** @@ -371,15 +237,52 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct mesh_path * mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata); + return mpath_lookup(sdata->u.mesh.mesh_paths, dst, sdata); } struct mesh_path * mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(rcu_dereference(mpp_paths), dst, sdata); + return mpath_lookup(sdata->u.mesh.mpp_paths, dst, sdata); } +static struct mesh_path * +__mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) +{ + int i = 0, ret; + struct mesh_path *mpath = NULL; + struct rhashtable_iter iter; + + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return NULL; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto err; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; + if (i++ == idx) + break; + } +err: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + + if (IS_ERR(mpath) || !mpath) + return NULL; + + if (mpath_expired(mpath)) { + spin_lock_bh(&mpath->state_lock); + mpath->flags &= ~MESH_PATH_ACTIVE; + spin_unlock_bh(&mpath->state_lock); + } + return mpath; +} /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index @@ -393,25 +296,7 @@ mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) struct mesh_path * mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - struct mesh_table *tbl = rcu_dereference(mesh_paths); - struct mpath_node *node; - int i; - int j = 0; - - for_each_mesh_entry(tbl, node, i) { - if (sdata && node->mpath->sdata != sdata) - continue; - if (j++ == idx) { - if (mpath_expired(node->mpath)) { - spin_lock_bh(&node->mpath->state_lock); - node->mpath->flags &= ~MESH_PATH_ACTIVE; - spin_unlock_bh(&node->mpath->state_lock); - } - return node->mpath; - } - } - - return NULL; + return __mesh_path_lookup_by_idx(sdata->u.mesh.mesh_paths, idx); } /** @@ -426,19 +311,7 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) struct mesh_path * mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - struct mesh_table *tbl = rcu_dereference(mpp_paths); - struct mpath_node *node; - int i; - int j = 0; - - for_each_mesh_entry(tbl, node, i) { - if (sdata && node->mpath->sdata != sdata) - continue; - if (j++ == idx) - return node->mpath; - } - - return NULL; + return __mesh_path_lookup_by_idx(sdata->u.mesh.mpp_paths, idx); } /** @@ -448,30 +321,26 @@ mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) int mesh_path_add_gate(struct mesh_path *mpath) { struct mesh_table *tbl; - struct mpath_node *gate, *new_gate; int err; rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - - hlist_for_each_entry_rcu(gate, tbl->known_gates, list) - if (gate->mpath == mpath) { - err = -EEXIST; - goto err_rcu; - } + tbl = mpath->sdata->u.mesh.mesh_paths; - new_gate = kzalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (!new_gate) { - err = -ENOMEM; + spin_lock_bh(&mpath->state_lock); + if (mpath->is_gate) { + err = -EEXIST; + spin_unlock_bh(&mpath->state_lock); goto err_rcu; } - mpath->is_gate = true; mpath->sdata->u.mesh.num_gates++; - new_gate->mpath = mpath; - spin_lock_bh(&tbl->gates_lock); - hlist_add_head_rcu(&new_gate->list, tbl->known_gates); - spin_unlock_bh(&tbl->gates_lock); + + spin_lock(&tbl->gates_lock); + hlist_add_head_rcu(&mpath->gate_list, &tbl->known_gates); + spin_unlock(&tbl->gates_lock); + + spin_unlock_bh(&mpath->state_lock); + mpath_dbg(mpath->sdata, "Mesh path: Recorded new gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); @@ -485,28 +354,22 @@ err_rcu: * mesh_gate_del - remove a mesh gate from the list of known gates * @tbl: table which holds our list of known gates * @mpath: gate mpath - * - * Locking: must be called inside rcu_read_lock() section */ static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { - struct mpath_node *gate; - struct hlist_node *q; + lockdep_assert_held(&mpath->state_lock); + if (!mpath->is_gate) + return; - hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { - if (gate->mpath != mpath) - continue; - spin_lock_bh(&tbl->gates_lock); - hlist_del_rcu(&gate->list); - kfree_rcu(gate, rcu); - spin_unlock_bh(&tbl->gates_lock); - mpath->sdata->u.mesh.num_gates--; - mpath->is_gate = false; - mpath_dbg(mpath->sdata, - "Mesh path: Deleted gate: %pM. %d known gates\n", - mpath->dst, mpath->sdata->u.mesh.num_gates); - break; - } + mpath->is_gate = false; + spin_lock_bh(&tbl->gates_lock); + hlist_del_rcu(&mpath->gate_list); + mpath->sdata->u.mesh.num_gates--; + spin_unlock_bh(&tbl->gates_lock); + + mpath_dbg(mpath->sdata, + "Mesh path: Deleted gate: %pM. %d known gates\n", + mpath->dst, mpath->sdata->u.mesh.num_gates); } /** @@ -518,6 +381,31 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) return sdata->u.mesh.num_gates; } +static +struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata, + const u8 *dst, gfp_t gfp_flags) +{ + struct mesh_path *new_mpath; + + new_mpath = kzalloc(sizeof(struct mesh_path), gfp_flags); + if (!new_mpath) + return NULL; + + memcpy(new_mpath->dst, dst, ETH_ALEN); + eth_broadcast_addr(new_mpath->rann_snd_addr); + new_mpath->is_root = false; + new_mpath->sdata = sdata; + new_mpath->flags = 0; + skb_queue_head_init(&new_mpath->frame_queue); + new_mpath->timer.data = (unsigned long) new_mpath; + new_mpath->timer.function = mesh_path_timer; + new_mpath->exp_time = jiffies; + spin_lock_init(&new_mpath->state_lock); + init_timer(&new_mpath->timer); + + return new_mpath; +} + /** * mesh_path_add - allocate and add a new path to the mesh path table * @dst: destination address of the path (ETH_ALEN length) @@ -530,15 +418,9 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_local *local = sdata->local; struct mesh_table *tbl; struct mesh_path *mpath, *new_mpath; - struct mpath_node *node, *new_node; - struct hlist_head *bucket; - int grow = 0; - int err; - u32 hash_idx; + int ret; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ @@ -550,129 +432,44 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) return ERR_PTR(-ENOSPC); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mesh_paths(); - - hash_idx = mesh_table_hash(dst, sdata, tbl); - bucket = &tbl->hash_buckets[hash_idx]; - - spin_lock(&tbl->hashwlock[hash_idx]); - - hlist_for_each_entry(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(dst, mpath->dst)) - goto found; - } - - err = -ENOMEM; - new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC); + new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); if (!new_mpath) - goto err_path_alloc; - - new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (!new_node) - goto err_node_alloc; - - memcpy(new_mpath->dst, dst, ETH_ALEN); - eth_broadcast_addr(new_mpath->rann_snd_addr); - new_mpath->is_root = false; - new_mpath->sdata = sdata; - new_mpath->flags = 0; - skb_queue_head_init(&new_mpath->frame_queue); - new_node->mpath = new_mpath; - new_mpath->timer.data = (unsigned long) new_mpath; - new_mpath->timer.function = mesh_path_timer; - new_mpath->exp_time = jiffies; - spin_lock_init(&new_mpath->state_lock); - init_timer(&new_mpath->timer); - - hlist_add_head_rcu(&new_node->list, bucket); - if (atomic_inc_return(&tbl->entries) >= - MEAN_CHAIN_LEN * (tbl->hash_mask + 1)) - grow = 1; - - mesh_paths_generation++; - - if (grow) { - set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &sdata->work); - } - mpath = new_mpath; -found: - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - return mpath; + return ERR_PTR(-ENOMEM); -err_node_alloc: - kfree(new_mpath); -err_path_alloc: - atomic_dec(&sdata->u.mesh.mpaths); - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - return ERR_PTR(err); -} + tbl = sdata->u.mesh.mesh_paths; + do { + ret = rhashtable_lookup_insert_fast(&tbl->rhead, + &new_mpath->rhash, + mesh_rht_params); -static void mesh_table_free_rcu(struct rcu_head *rcu) -{ - struct mesh_table *tbl = container_of(rcu, struct mesh_table, rcu_head); + if (ret == -EEXIST) + mpath = rhashtable_lookup_fast(&tbl->rhead, + dst, + mesh_rht_params); - mesh_table_free(tbl, false); -} + } while (unlikely(ret == -EEXIST && !mpath)); -void mesh_mpath_table_grow(void) -{ - struct mesh_table *oldtbl, *newtbl; - - write_lock_bh(&pathtbl_resize_lock); - oldtbl = resize_dereference_mesh_paths(); - newtbl = mesh_table_alloc(oldtbl->size_order + 1); - if (!newtbl) - goto out; - if (mesh_table_grow(oldtbl, newtbl) < 0) { - __mesh_table_free(newtbl); - goto out; - } - rcu_assign_pointer(mesh_paths, newtbl); - - call_rcu(&oldtbl->rcu_head, mesh_table_free_rcu); - - out: - write_unlock_bh(&pathtbl_resize_lock); -} + if (ret && ret != -EEXIST) + return ERR_PTR(ret); -void mesh_mpp_table_grow(void) -{ - struct mesh_table *oldtbl, *newtbl; - - write_lock_bh(&pathtbl_resize_lock); - oldtbl = resize_dereference_mpp_paths(); - newtbl = mesh_table_alloc(oldtbl->size_order + 1); - if (!newtbl) - goto out; - if (mesh_table_grow(oldtbl, newtbl) < 0) { - __mesh_table_free(newtbl); - goto out; + /* At this point either new_mpath was added, or we found a + * matching entry already in the table; in the latter case + * free the unnecessary new entry. + */ + if (ret == -EEXIST) { + kfree(new_mpath); + new_mpath = mpath; } - rcu_assign_pointer(mpp_paths, newtbl); - call_rcu(&oldtbl->rcu_head, mesh_table_free_rcu); - - out: - write_unlock_bh(&pathtbl_resize_lock); + sdata->u.mesh.mesh_paths_generation++; + return new_mpath; } int mpp_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst, const u8 *mpp) { - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_local *local = sdata->local; struct mesh_table *tbl; - struct mesh_path *mpath, *new_mpath; - struct mpath_node *node, *new_node; - struct hlist_head *bucket; - int grow = 0; - int err = 0; - u32 hash_idx; + struct mesh_path *new_mpath; + int ret; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ @@ -681,65 +478,19 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, if (is_multicast_ether_addr(dst)) return -ENOTSUPP; - err = -ENOMEM; - new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC); - if (!new_mpath) - goto err_path_alloc; + new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); - new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (!new_node) - goto err_node_alloc; + if (!new_mpath) + return -ENOMEM; - read_lock_bh(&pathtbl_resize_lock); - memcpy(new_mpath->dst, dst, ETH_ALEN); memcpy(new_mpath->mpp, mpp, ETH_ALEN); - new_mpath->sdata = sdata; - new_mpath->flags = 0; - skb_queue_head_init(&new_mpath->frame_queue); - new_node->mpath = new_mpath; - init_timer(&new_mpath->timer); - new_mpath->exp_time = jiffies; - spin_lock_init(&new_mpath->state_lock); - - tbl = resize_dereference_mpp_paths(); + tbl = sdata->u.mesh.mpp_paths; + ret = rhashtable_lookup_insert_fast(&tbl->rhead, + &new_mpath->rhash, + mesh_rht_params); - hash_idx = mesh_table_hash(dst, sdata, tbl); - bucket = &tbl->hash_buckets[hash_idx]; - - spin_lock(&tbl->hashwlock[hash_idx]); - - err = -EEXIST; - hlist_for_each_entry(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(dst, mpath->dst)) - goto err_exists; - } - - hlist_add_head_rcu(&new_node->list, bucket); - if (atomic_inc_return(&tbl->entries) >= - MEAN_CHAIN_LEN * (tbl->hash_mask + 1)) - grow = 1; - - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - - mpp_paths_generation++; - - if (grow) { - set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &sdata->work); - } - return 0; - -err_exists: - spin_unlock(&tbl->hashwlock[hash_idx]); - read_unlock_bh(&pathtbl_resize_lock); - kfree(new_node); -err_node_alloc: - kfree(new_mpath); -err_path_alloc: - return err; + sdata->u.mesh.mpp_paths_generation++; + return ret; } @@ -753,17 +504,26 @@ err_path_alloc: */ void mesh_plink_broken(struct sta_info *sta) { - struct mesh_table *tbl; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct mesh_table *tbl = sdata->u.mesh.mesh_paths; static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; - struct mpath_node *node; - struct ieee80211_sub_if_data *sdata = sta->sdata; - int i; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, node, i) { - mpath = node->mpath; + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; if (rcu_access_pointer(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { @@ -777,33 +537,30 @@ void mesh_plink_broken(struct sta_info *sta) WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } - rcu_read_unlock(); +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void mesh_path_node_reclaim(struct rcu_head *rp) +static void mesh_path_free_rcu(struct mesh_table *tbl, + struct mesh_path *mpath) { - struct mpath_node *node = container_of(rp, struct mpath_node, rcu); + struct ieee80211_sub_if_data *sdata = mpath->sdata; - del_timer_sync(&node->mpath->timer); - kfree(node->mpath); - kfree(node); + spin_lock_bh(&mpath->state_lock); + mpath->flags |= MESH_PATH_RESOLVING | MESH_PATH_DELETED; + mesh_gate_del(tbl, mpath); + spin_unlock_bh(&mpath->state_lock); + del_timer_sync(&mpath->timer); + atomic_dec(&sdata->u.mesh.mpaths); + atomic_dec(&tbl->entries); + kfree_rcu(mpath, rcu); } -/* needs to be called with the corresponding hashwlock taken */ -static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) +static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) { - struct mesh_path *mpath = node->mpath; - struct ieee80211_sub_if_data *sdata = node->mpath->sdata; - - spin_lock(&mpath->state_lock); - mpath->flags |= MESH_PATH_RESOLVING; - if (mpath->is_gate) - mesh_gate_del(tbl, mpath); - hlist_del_rcu(&node->list); - call_rcu(&node->rcu, mesh_path_node_reclaim); - spin_unlock(&mpath->state_lock); - atomic_dec(&sdata->u.mesh.mpaths); - atomic_dec(&tbl->entries); + rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params); + mesh_path_free_rcu(tbl, mpath); } /** @@ -819,65 +576,88 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) */ void mesh_path_flush_by_nexthop(struct sta_info *sta) { - struct mesh_table *tbl; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct mesh_table *tbl = sdata->u.mesh.mesh_paths; struct mesh_path *mpath; - struct mpath_node *node; - int i; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mesh_paths(); - for_each_mesh_entry(tbl, node, i) { - mpath = node->mpath; - if (rcu_access_pointer(mpath->next_hop) == sta) { - spin_lock(&tbl->hashwlock[i]); - __mesh_path_del(tbl, node); - spin_unlock(&tbl->hashwlock[i]); - } + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; + + if (rcu_access_pointer(mpath->next_hop) == sta) + __mesh_path_del(tbl, mpath); } - read_unlock_bh(&pathtbl_resize_lock); - rcu_read_unlock(); +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, const u8 *proxy) { - struct mesh_table *tbl; - struct mesh_path *mpp; - struct mpath_node *node; - int i; + struct mesh_table *tbl = sdata->u.mesh.mpp_paths; + struct mesh_path *mpath; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mpp_paths(); - for_each_mesh_entry(tbl, node, i) { - mpp = node->mpath; - if (ether_addr_equal(mpp->mpp, proxy)) { - spin_lock(&tbl->hashwlock[i]); - __mesh_path_del(tbl, node); - spin_unlock(&tbl->hashwlock[i]); - } + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) + continue; + if (IS_ERR(mpath)) + break; + + if (ether_addr_equal(mpath->mpp, proxy)) + __mesh_path_del(tbl, mpath); } - read_unlock_bh(&pathtbl_resize_lock); - rcu_read_unlock(); +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void table_flush_by_iface(struct mesh_table *tbl, - struct ieee80211_sub_if_data *sdata) +static void table_flush_by_iface(struct mesh_table *tbl) { struct mesh_path *mpath; - struct mpath_node *node; - int i; + struct rhashtable_iter iter; + int ret; + + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; - WARN_ON(!rcu_read_lock_held()); - for_each_mesh_entry(tbl, node, i) { - mpath = node->mpath; - if (mpath->sdata != sdata) + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) continue; - spin_lock_bh(&tbl->hashwlock[i]); - __mesh_path_del(tbl, node); - spin_unlock_bh(&tbl->hashwlock[i]); + if (IS_ERR(mpath)) + break; + __mesh_path_del(tbl, mpath); } +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } /** @@ -890,16 +670,8 @@ static void table_flush_by_iface(struct mesh_table *tbl, */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { - struct mesh_table *tbl; - - rcu_read_lock(); - read_lock_bh(&pathtbl_resize_lock); - tbl = resize_dereference_mesh_paths(); - table_flush_by_iface(tbl, sdata); - tbl = resize_dereference_mpp_paths(); - table_flush_by_iface(tbl, sdata); - read_unlock_bh(&pathtbl_resize_lock); - rcu_read_unlock(); + table_flush_by_iface(sdata->u.mesh.mesh_paths); + table_flush_by_iface(sdata->u.mesh.mpp_paths); } /** @@ -911,37 +683,25 @@ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) * * Returns: 0 if successful */ -static int table_path_del(struct mesh_table __rcu *rcu_tbl, +static int table_path_del(struct mesh_table *tbl, struct ieee80211_sub_if_data *sdata, const u8 *addr) { - struct mesh_table *tbl; struct mesh_path *mpath; - struct mpath_node *node; - struct hlist_head *bucket; - int hash_idx; - int err = 0; - - tbl = resize_dereference_paths(rcu_tbl); - hash_idx = mesh_table_hash(addr, sdata, tbl); - bucket = &tbl->hash_buckets[hash_idx]; - - spin_lock(&tbl->hashwlock[hash_idx]); - hlist_for_each_entry(node, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - ether_addr_equal(addr, mpath->dst)) { - __mesh_path_del(tbl, node); - goto enddel; - } + + rcu_read_lock(); + mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params); + if (!mpath) { + rcu_read_unlock(); + return -ENXIO; } - err = -ENXIO; -enddel: - spin_unlock(&tbl->hashwlock[hash_idx]); - return err; + __mesh_path_del(tbl, mpath); + rcu_read_unlock(); + return 0; } + /** * mesh_path_del - delete a mesh path from the table * @@ -952,36 +712,13 @@ enddel: */ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) { - int err = 0; + int err; /* flush relevant mpp entries first */ mpp_flush_by_proxy(sdata, addr); - read_lock_bh(&pathtbl_resize_lock); - err = table_path_del(mesh_paths, sdata, addr); - mesh_paths_generation++; - read_unlock_bh(&pathtbl_resize_lock); - - return err; -} - -/** - * mpp_path_del - delete a mesh proxy path from the table - * - * @addr: addr address (ETH_ALEN length) - * @sdata: local subif - * - * Returns: 0 if successful - */ -static int mpp_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) -{ - int err = 0; - - read_lock_bh(&pathtbl_resize_lock); - err = table_path_del(mpp_paths, sdata, addr); - mpp_paths_generation++; - read_unlock_bh(&pathtbl_resize_lock); - + err = table_path_del(sdata->u.mesh.mesh_paths, sdata, addr); + sdata->u.mesh.mesh_paths_generation++; return err; } @@ -1015,39 +752,30 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) struct ieee80211_sub_if_data *sdata = mpath->sdata; struct mesh_table *tbl; struct mesh_path *from_mpath = mpath; - struct mpath_node *gate = NULL; + struct mesh_path *gate; bool copy = false; - struct hlist_head *known_gates; - - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - known_gates = tbl->known_gates; - rcu_read_unlock(); - if (!known_gates) - return -EHOSTUNREACH; + tbl = sdata->u.mesh.mesh_paths; - hlist_for_each_entry_rcu(gate, known_gates, list) { - if (gate->mpath->sdata != sdata) - continue; - - if (gate->mpath->flags & MESH_PATH_ACTIVE) { - mpath_dbg(sdata, "Forwarding to %pM\n", gate->mpath->dst); - mesh_path_move_to_queue(gate->mpath, from_mpath, copy); - from_mpath = gate->mpath; + rcu_read_lock(); + hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { + if (gate->flags & MESH_PATH_ACTIVE) { + mpath_dbg(sdata, "Forwarding to %pM\n", gate->dst); + mesh_path_move_to_queue(gate, from_mpath, copy); + from_mpath = gate; copy = true; } else { mpath_dbg(sdata, "Not forwarding to %pM (flags %#x)\n", - gate->mpath->dst, gate->mpath->flags); + gate->dst, gate->flags); } } - hlist_for_each_entry_rcu(gate, known_gates, list) - if (gate->mpath->sdata == sdata) { - mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst); - mesh_path_tx_pending(gate->mpath); - } + hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { + mpath_dbg(sdata, "Sending to %pM\n", gate->dst); + mesh_path_tx_pending(gate); + } + rcu_read_unlock(); return (from_mpath == mpath) ? -EHOSTUNREACH : 0; } @@ -1104,118 +832,73 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mesh_path_tx_pending(mpath); } -static void mesh_path_node_free(struct hlist_node *p, bool free_leafs) -{ - struct mesh_path *mpath; - struct mpath_node *node = hlist_entry(p, struct mpath_node, list); - mpath = node->mpath; - hlist_del_rcu(p); - if (free_leafs) { - del_timer_sync(&mpath->timer); - kfree(mpath); - } - kfree(node); -} - -static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) -{ - struct mesh_path *mpath; - struct mpath_node *node, *new_node; - u32 hash_idx; - - new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); - if (new_node == NULL) - return -ENOMEM; - - node = hlist_entry(p, struct mpath_node, list); - mpath = node->mpath; - new_node->mpath = mpath; - hash_idx = mesh_table_hash(mpath->dst, mpath->sdata, newtbl); - hlist_add_head(&new_node->list, - &newtbl->hash_buckets[hash_idx]); - return 0; -} - -int mesh_pathtbl_init(void) +int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) { struct mesh_table *tbl_path, *tbl_mpp; int ret; - tbl_path = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + tbl_path = mesh_table_alloc(); if (!tbl_path) return -ENOMEM; - tbl_path->free_node = &mesh_path_node_free; - tbl_path->copy_node = &mesh_path_node_copy; - tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); - if (!tbl_path->known_gates) { - ret = -ENOMEM; - goto free_path; - } - INIT_HLIST_HEAD(tbl_path->known_gates); - - tbl_mpp = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + tbl_mpp = mesh_table_alloc(); if (!tbl_mpp) { ret = -ENOMEM; goto free_path; } - tbl_mpp->free_node = &mesh_path_node_free; - tbl_mpp->copy_node = &mesh_path_node_copy; - tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); - if (!tbl_mpp->known_gates) { - ret = -ENOMEM; - goto free_mpp; - } - INIT_HLIST_HEAD(tbl_mpp->known_gates); - /* Need no locking since this is during init */ - RCU_INIT_POINTER(mesh_paths, tbl_path); - RCU_INIT_POINTER(mpp_paths, tbl_mpp); + rhashtable_init(&tbl_path->rhead, &mesh_rht_params); + rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params); + + sdata->u.mesh.mesh_paths = tbl_path; + sdata->u.mesh.mpp_paths = tbl_mpp; return 0; -free_mpp: - mesh_table_free(tbl_mpp, true); free_path: - mesh_table_free(tbl_path, true); + mesh_table_free(tbl_path); return ret; } -void mesh_path_expire(struct ieee80211_sub_if_data *sdata) +static +void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, + struct mesh_table *tbl) { - struct mesh_table *tbl; struct mesh_path *mpath; - struct mpath_node *node; - int i; + struct rhashtable_iter iter; + int ret; - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, node, i) { - if (node->mpath->sdata != sdata) + ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_KERNEL); + if (ret) + return; + + ret = rhashtable_walk_start(&iter); + if (ret && ret != -EAGAIN) + goto out; + + while ((mpath = rhashtable_walk_next(&iter))) { + if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) continue; - mpath = node->mpath; + if (IS_ERR(mpath)) + break; if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) - mesh_path_del(mpath->sdata, mpath->dst); - } - - tbl = rcu_dereference(mpp_paths); - for_each_mesh_entry(tbl, node, i) { - if (node->mpath->sdata != sdata) - continue; - mpath = node->mpath; - if ((!(mpath->flags & MESH_PATH_FIXED)) && - time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) - mpp_path_del(mpath->sdata, mpath->dst); + __mesh_path_del(tbl, mpath); } +out: + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} - rcu_read_unlock(); +void mesh_path_expire(struct ieee80211_sub_if_data *sdata) +{ + mesh_path_tbl_expire(sdata, sdata->u.mesh.mesh_paths); + mesh_path_tbl_expire(sdata, sdata->u.mesh.mpp_paths); } -void mesh_pathtbl_unregister(void) +void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata) { - /* no need for locking during exit path */ - mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true); - mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true); + mesh_table_free(sdata->u.mesh.mesh_paths); + mesh_table_free(sdata->u.mesh.mpp_paths); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a07e93c21c9e..79f2a0a13db8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -61,7 +61,7 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; return rssi_threshold == 0 || (sta && - (s8)-ewma_signal_read(&sta->rx_stats.avg_signal) > + (s8)-ewma_signal_read(&sta->rx_stats_avg.signal) > rssi_threshold); } @@ -93,18 +93,18 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; struct sta_info *sta; u32 erp_rates = 0, changed = 0; int i; bool short_slot = false; - if (band == IEEE80211_BAND_5GHZ) { + if (band == NL80211_BAND_5GHZ) { /* (IEEE 802.11-2012 19.4.5) */ short_slot = true; goto out; - } else if (band != IEEE80211_BAND_2GHZ) + } else if (band != NL80211_BAND_2GHZ) goto out; for (i = 0; i < sband->n_bitrates; i++) @@ -247,7 +247,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.self_prot.action_code = action; if (action != WLAN_SP_MESH_PEERING_CLOSE) { - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); /* capability info */ pos = skb_put(skb, 2); @@ -331,7 +331,9 @@ free: * * @sta: mesh peer link to deactivate * - * All mesh paths with this peer as next hop will be flushed + * Mesh paths with this peer as next hop should be flushed + * by the caller outside of plink_lock. + * * Returns beacon changed flag if the beacon content changed. * * Locking: the caller must hold sta->mesh->plink_lock @@ -346,7 +348,6 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) if (sta->mesh->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->mesh->plink_state = NL80211_PLINK_BLOCKED; - mesh_path_flush_by_nexthop(sta); ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, @@ -374,6 +375,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) sta->sta.addr, sta->mesh->llid, sta->mesh->plid, sta->mesh->reason); spin_unlock_bh(&sta->mesh->plink_lock); + mesh_path_flush_by_nexthop(sta); return changed; } @@ -383,7 +385,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, bool insert) { struct ieee80211_local *local = sdata->local; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u32 rates, basic_rates = 0, changed = 0; enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; @@ -748,6 +750,7 @@ u32 mesh_plink_block(struct sta_info *sta) changed = __mesh_plink_deactivate(sta); sta->mesh->plink_state = NL80211_PLINK_BLOCKED; spin_unlock_bh(&sta->mesh->plink_lock); + mesh_path_flush_by_nexthop(sta); return changed; } @@ -797,6 +800,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; enum ieee80211_self_protected_actioncode action = 0; u32 changed = 0; + bool flush = false; mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, mplstates[sta->mesh->plink_state], mplevents[event]); @@ -885,6 +889,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, changed |= mesh_set_short_slot_time(sdata); mesh_plink_close(sdata, sta, event); action = WLAN_SP_MESH_PEERING_CLOSE; + flush = true; break; case OPN_ACPT: action = WLAN_SP_MESH_PEERING_CONFIRM; @@ -916,6 +921,8 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, break; } spin_unlock_bh(&sta->mesh->plink_lock); + if (flush) + mesh_path_flush_by_nexthop(sta); if (action) { mesh_plink_frame_tx(sdata, sta, action, sta->sta.addr, sta->mesh->llid, sta->mesh->plid, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 281b8d6e5109..885f4ca0888d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -122,15 +122,16 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (unlikely(!sdata->u.mgd.associated)) + if (unlikely(!ifmgd->associated)) return; - ifmgd->probe_send_count = 0; + if (ifmgd->probe_send_count) + ifmgd->probe_send_count = 0; if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) return; - mod_timer(&sdata->u.mgd.conn_mon_timer, + mod_timer(&ifmgd->conn_mon_timer, round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); } @@ -660,7 +661,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) capab = WLAN_CAPABILITY_ESS; - if (sband->band == IEEE80211_BAND_2GHZ) { + if (sband->band == NL80211_BAND_2GHZ) { capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; } @@ -1099,7 +1100,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss = ifmgd->associated; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; - enum ieee80211_band current_band; + enum nl80211_band current_band; struct ieee80211_csa_ie csa_ie; struct ieee80211_channel_switch ch_switch; int res; @@ -1256,11 +1257,11 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, default: WARN_ON_ONCE(1); /* fall through */ - case IEEE80211_BAND_2GHZ: - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_2GHZ: + case NL80211_BAND_60GHZ: chan_increment = 1; break; - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: chan_increment = 4; break; } @@ -1860,7 +1861,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, } use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); - if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_5GHZ) + if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_5GHZ) use_short_slot = true; if (use_protection != bss_conf->use_cts_prot) { @@ -2216,6 +2217,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) const u8 *ssid; u8 *dst = ifmgd->associated->bssid; u8 unicast_limit = max(1, max_probe_tries - 3); + struct sta_info *sta; /* * Try sending broadcast probe requests for the last three @@ -2234,6 +2236,14 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) */ ifmgd->probe_send_count++; + if (dst) { + mutex_lock(&sdata->local->sta_mtx); + sta = sta_info_get(sdata, dst); + if (!WARN_ON(!sta)) + ieee80211_check_fast_rx(sta); + mutex_unlock(&sdata->local->sta_mtx); + } + if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { ifmgd->nullfunc_failed = false; ieee80211_send_nullfunc(sdata->local, sdata, false); @@ -4365,7 +4375,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ - if (cbss->channel->band == IEEE80211_BAND_2GHZ && + if (cbss->channel->band == NL80211_BAND_2GHZ && have_higher_than_11mbit) sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; else diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 0be0aadfc559..88e6ebbbe24f 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -75,8 +75,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return; - sta->rx_stats.last_rx = jiffies; - /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index a4e2f4e67f94..206698bc93f4 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -287,7 +287,7 @@ static void __rate_control_send_low(struct ieee80211_hw *hw, u32 rate_flags = ieee80211_chandef_rate_flags(&hw->conf.chandef); - if ((sband->band == IEEE80211_BAND_2GHZ) && + if ((sband->band == NL80211_BAND_2GHZ) && (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) rate_flags |= IEEE80211_RATE_ERP_G; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 624fe5b81615..8d3260785b94 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -96,9 +96,9 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) { #ifdef CONFIG_MAC80211_DEBUGFS struct rate_control_ref *ref = sta->rate_ctrl; - if (ref && sta->debugfs.dir && ref->ops->add_sta_debugfs) + if (ref && sta->debugfs_dir && ref->ops->add_sta_debugfs) ref->ops->add_sta_debugfs(ref->priv, sta->rate_ctrl_priv, - sta->debugfs.dir); + sta->debugfs_dir); #endif } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index b54f398cda5d..14c5ba3a1b1c 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -436,7 +436,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, static void -calc_rate_durations(enum ieee80211_band band, +calc_rate_durations(enum nl80211_band band, struct minstrel_rate *d, struct ieee80211_rate *rate, struct cfg80211_chan_def *chandef) @@ -579,7 +579,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) if (!mi) return NULL; - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = hw->wiphy->bands[i]; if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; @@ -621,7 +621,7 @@ minstrel_init_cck_rates(struct minstrel_priv *mp) u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); int i, j; - sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ]; + sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; if (!sband) return; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 370d677b547b..30fbabf4bcbc 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -883,6 +883,59 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, ratetbl->rate[offset].flags = flags; } +static inline int +minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate) +{ + int group = rate / MCS_GROUP_RATES; + rate %= MCS_GROUP_RATES; + return mi->groups[group].rates[rate].prob_ewma; +} + +static int +minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) +{ + int group = mi->max_prob_rate / MCS_GROUP_RATES; + const struct mcs_group *g = &minstrel_mcs_groups[group]; + int rate = mi->max_prob_rate % MCS_GROUP_RATES; + + /* Disable A-MSDU if max_prob_rate is bad */ + if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100)) + return 1; + + /* If the rate is slower than single-stream MCS1, make A-MSDU limit small */ + if (g->duration[rate] > MCS_DURATION(1, 0, 52)) + return 500; + + /* + * If the rate is slower than single-stream MCS4, limit A-MSDU to usual + * data packet size + */ + if (g->duration[rate] > MCS_DURATION(1, 0, 104)) + return 1600; + + /* + * If the rate is slower than single-stream MCS7, or if the max throughput + * rate success probability is less than 75%, limit A-MSDU to twice the usual + * data packet size + */ + if (g->duration[rate] > MCS_DURATION(1, 0, 260) || + (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) < + MINSTREL_FRAC(75, 100))) + return 3200; + + /* + * HT A-MPDU limits maximum MPDU size under BA agreement to 4095 bytes. + * Since aggregation sessions are started/stopped without txq flush, use + * the limit here to avoid the complexity of having to de-aggregate + * packets in the queue. + */ + if (!mi->sta->vht_cap.vht_supported) + return IEEE80211_MAX_MPDU_LEN_HT_BA; + + /* unlimited */ + return 0; +} + static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { @@ -907,6 +960,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); } + mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); rates->rate[i].idx = -1; rate_control_set_rates(mp->hw, mi->sta, rates); } @@ -924,6 +978,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) struct minstrel_rate_stats *mrs; struct minstrel_mcs_group_data *mg; unsigned int sample_dur, sample_group, cur_max_tp_streams; + int tp_rate1, tp_rate2; int sample_idx = 0; if (mi->sample_wait > 0) { @@ -945,14 +1000,22 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mrs = &mg->rates[sample_idx]; sample_idx += sample_group * MCS_GROUP_RATES; + /* Set tp_rate1, tp_rate2 to the highest / second highest max_tp_rate */ + if (minstrel_get_duration(mi->max_tp_rate[0]) > + minstrel_get_duration(mi->max_tp_rate[1])) { + tp_rate1 = mi->max_tp_rate[1]; + tp_rate2 = mi->max_tp_rate[0]; + } else { + tp_rate1 = mi->max_tp_rate[0]; + tp_rate2 = mi->max_tp_rate[1]; + } + /* * Sampling might add some overhead (RTS, no aggregation) - * to the frame. Hence, don't use sampling for the currently - * used rates. + * to the frame. Hence, don't use sampling for the highest currently + * used highest throughput or probability rate. */ - if (sample_idx == mi->max_tp_rate[0] || - sample_idx == mi->max_tp_rate[1] || - sample_idx == mi->max_prob_rate) + if (sample_idx == mi->max_tp_rate[0] || sample_idx == mi->max_prob_rate) return -1; /* @@ -967,10 +1030,10 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) * if the link is working perfectly. */ - cur_max_tp_streams = minstrel_mcs_groups[mi->max_tp_rate[0] / + cur_max_tp_streams = minstrel_mcs_groups[tp_rate1 / MCS_GROUP_RATES].streams; sample_dur = minstrel_get_duration(sample_idx); - if (sample_dur >= minstrel_get_duration(mi->max_tp_rate[1]) && + if (sample_dur >= minstrel_get_duration(tp_rate2) && (cur_max_tp_streams - 1 < minstrel_mcs_groups[sample_group].streams || sample_dur >= minstrel_get_duration(mi->max_prob_rate))) { @@ -1074,7 +1137,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, { int i; - if (sband->band != IEEE80211_BAND_2GHZ) + if (sband->band != NL80211_BAND_2GHZ) return; if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) @@ -1272,7 +1335,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) int max_rates = 0; int i; - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = hw->wiphy->bands[i]; if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dc27becb9b71..c5678703921e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -322,7 +322,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->flag & RX_FLAG_5MHZ) channel_flags |= IEEE80211_CHAN_QUARTER; - if (status->band == IEEE80211_BAND_5GHZ) + if (status->band == NL80211_BAND_5GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; @@ -722,8 +722,8 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) return -1; } -static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, - struct sk_buff *skb) +static int ieee80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, + struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc; @@ -1421,16 +1421,9 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control) && - !is_multicast_ether_addr(hdr->addr1)) { - sta->rx_stats.last_rate_idx = - status->rate_idx; - sta->rx_stats.last_rate_flag = - status->flag; - sta->rx_stats.last_rate_vht_flag = - status->vht_flag; - sta->rx_stats.last_rate_vht_nss = - status->vht_nss; - } + !is_multicast_ether_addr(hdr->addr1)) + sta->rx_stats.last_rate = + sta_stats_encode_rate(status); } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { sta->rx_stats.last_rx = jiffies; @@ -1440,22 +1433,22 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * match the current local configuration when processed. */ sta->rx_stats.last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control)) { - sta->rx_stats.last_rate_idx = status->rate_idx; - sta->rx_stats.last_rate_flag = status->flag; - sta->rx_stats.last_rate_vht_flag = status->vht_flag; - sta->rx_stats.last_rate_vht_nss = status->vht_nss; - } + if (ieee80211_is_data(hdr->frame_control)) + sta->rx_stats.last_rate = sta_stats_encode_rate(status); } if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_notify(rx->sdata, hdr); sta->rx_stats.fragments++; + + u64_stats_update_begin(&rx->sta->rx_stats.syncp); sta->rx_stats.bytes += rx->skb->len; + u64_stats_update_end(&rx->sta->rx_stats.syncp); + if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { sta->rx_stats.last_signal = status->signal; - ewma_signal_add(&sta->rx_stats.avg_signal, -status->signal); + ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal); } if (status->chains) { @@ -1467,7 +1460,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) continue; sta->rx_stats.chain_signal_last[i] = signal; - ewma_signal_add(&sta->rx_stats.chain_signal_avg[i], + ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], -signal); } } @@ -1586,7 +1579,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_has_protected(fc) && rx->sta->cipher_scheme) { cs = rx->sta->cipher_scheme; - keyid = iwl80211_get_cs_keyid(cs, rx->skb); + keyid = ieee80211_get_cs_keyid(cs, rx->skb); if (unlikely(keyid < 0)) return RX_DROP_UNUSABLE; } @@ -1670,7 +1663,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(fc); if (cs) { - keyidx = iwl80211_get_cs_keyid(cs, rx->skb); + keyidx = ieee80211_get_cs_keyid(cs, rx->skb); if (unlikely(keyidx < 0)) return RX_DROP_UNUSABLE; @@ -2129,6 +2122,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) ieee80211_rx_stats(dev, skb->len); + if (rx->sta) { + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + u64_stats_update_begin(&rx->sta->rx_stats.syncp); + rx->sta->rx_stats.msdu[rx->seqno_idx]++; + u64_stats_update_end(&rx->sta->rx_stats.syncp); + } + if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && @@ -2415,15 +2419,6 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; - if (rx->sta) { - /* The seqno index has the same property as needed - * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS - * for non-QoS-data frames. Here we know it's a data - * frame, so count MSDUs. - */ - rx->sta->rx_stats.msdu[rx->seqno_idx]++; - } - /* * Send unexpected-4addr-frame event to hostapd. For older versions, * also drop the frame to cooked monitor interfaces. @@ -2474,14 +2469,14 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) rx->skb->dev = dev; - if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && + if (!ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS) && + local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && !is_multicast_ether_addr( ((struct ethhdr *)rx->skb->data)->h_dest) && (!local->scanning && - !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) { - mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); - } + !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); ieee80211_deliver_skb(rx); @@ -2828,7 +2823,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.u.measurement.action_code) { case WLAN_ACTION_SPCT_MSR_REQ: - if (status->band != IEEE80211_BAND_5GHZ) + if (status->band != NL80211_BAND_5GHZ) break; if (len < (IEEE80211_MIN_ACTION_SIZE + @@ -3201,7 +3196,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, res = rxh(rx); \ if (res != RX_CONTINUE) \ goto rxh_next; \ - } while (0); + } while (0) /* Lock here to avoid hitting all of the data used in the RX * path (e.g. key data, station data, ...) concurrently when @@ -3219,30 +3214,30 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, */ rx->skb = skb; - CALL_RXH(ieee80211_rx_h_check_more_data) - CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll) - CALL_RXH(ieee80211_rx_h_sta_process) - CALL_RXH(ieee80211_rx_h_decrypt) - CALL_RXH(ieee80211_rx_h_defragment) - CALL_RXH(ieee80211_rx_h_michael_mic_verify) + CALL_RXH(ieee80211_rx_h_check_more_data); + CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll); + CALL_RXH(ieee80211_rx_h_sta_process); + CALL_RXH(ieee80211_rx_h_decrypt); + CALL_RXH(ieee80211_rx_h_defragment); + CALL_RXH(ieee80211_rx_h_michael_mic_verify); /* must be after MMIC verify so header is counted in MPDU mic */ #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&rx->sdata->vif)) CALL_RXH(ieee80211_rx_h_mesh_fwding); #endif - CALL_RXH(ieee80211_rx_h_amsdu) - CALL_RXH(ieee80211_rx_h_data) + CALL_RXH(ieee80211_rx_h_amsdu); + CALL_RXH(ieee80211_rx_h_data); /* special treatment -- needs the queue */ res = ieee80211_rx_h_ctrl(rx, frames); if (res != RX_CONTINUE) goto rxh_next; - CALL_RXH(ieee80211_rx_h_mgmt_check) - CALL_RXH(ieee80211_rx_h_action) - CALL_RXH(ieee80211_rx_h_userspace_mgmt) - CALL_RXH(ieee80211_rx_h_action_return) - CALL_RXH(ieee80211_rx_h_mgmt) + CALL_RXH(ieee80211_rx_h_mgmt_check); + CALL_RXH(ieee80211_rx_h_action); + CALL_RXH(ieee80211_rx_h_userspace_mgmt); + CALL_RXH(ieee80211_rx_h_action_return); + CALL_RXH(ieee80211_rx_h_mgmt); rxh_next: ieee80211_rx_handlers_result(rx, res); @@ -3265,10 +3260,10 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) res = rxh(rx); \ if (res != RX_CONTINUE) \ goto rxh_next; \ - } while (0); + } while (0) - CALL_RXH(ieee80211_rx_h_check_dup) - CALL_RXH(ieee80211_rx_h_check) + CALL_RXH(ieee80211_rx_h_check_dup); + CALL_RXH(ieee80211_rx_h_check); ieee80211_rx_reorder_ampdu(rx, &reorder_release); @@ -3513,6 +3508,351 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return false; } +void ieee80211_check_fast_rx(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_key *key; + struct ieee80211_fast_rx fastrx = { + .dev = sdata->dev, + .vif_type = sdata->vif.type, + .control_port_protocol = sdata->control_port_protocol, + }, *old, *new = NULL; + bool assign = false; + + /* use sparse to check that we don't return without updating */ + __acquire(check_fast_rx); + + BUILD_BUG_ON(sizeof(fastrx.rfc1042_hdr) != sizeof(rfc1042_header)); + BUILD_BUG_ON(sizeof(fastrx.rfc1042_hdr) != ETH_ALEN); + ether_addr_copy(fastrx.rfc1042_hdr, rfc1042_header); + ether_addr_copy(fastrx.vif_addr, sdata->vif.addr); + + fastrx.uses_rss = ieee80211_hw_check(&local->hw, USES_RSS); + + /* fast-rx doesn't do reordering */ + if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) && + !ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) + goto clear; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + /* 4-addr is harder to deal with, later maybe */ + if (sdata->u.mgd.use_4addr) + goto clear; + /* software powersave is a huge mess, avoid all of it */ + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) + goto clear; + if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) && + !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) + goto clear; + if (sta->sta.tdls) { + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2); + fastrx.expected_ds_bits = 0; + } else { + fastrx.sta_notify = sdata->u.mgd.probe_send_count > 0; + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.expected_ds_bits = + cpu_to_le16(IEEE80211_FCTL_FROMDS); + } + break; + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_AP: + /* parallel-rx requires this, at least with calls to + * ieee80211_sta_ps_transition() + */ + if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) + goto clear; + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2); + fastrx.expected_ds_bits = cpu_to_le16(IEEE80211_FCTL_TODS); + + fastrx.internal_forward = + !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || + !sdata->u.vlan.sta); + break; + default: + goto clear; + } + + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + goto clear; + + rcu_read_lock(); + key = rcu_dereference(sta->ptk[sta->ptk_idx]); + if (key) { + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: + /* we don't want to deal with MMIC in fast-rx */ + goto clear_rcu; + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + break; + default: + /* we also don't want to deal with WEP or cipher scheme + * since those require looking up the key idx in the + * frame, rather than assuming the PTK is used + * (we need to revisit this once we implement the real + * PTK index, which is now valid in the spec, but we + * haven't implemented that part yet) + */ + goto clear_rcu; + } + + fastrx.key = true; + fastrx.icv_len = key->conf.icv_len; + } + + assign = true; + clear_rcu: + rcu_read_unlock(); + clear: + __release(check_fast_rx); + + if (assign) + new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL); + + spin_lock_bh(&sta->lock); + old = rcu_dereference_protected(sta->fast_rx, true); + rcu_assign_pointer(sta->fast_rx, new); + spin_unlock_bh(&sta->lock); + + if (old) + kfree_rcu(old, rcu_head); +} + +void ieee80211_clear_fast_rx(struct sta_info *sta) +{ + struct ieee80211_fast_rx *old; + + spin_lock_bh(&sta->lock); + old = rcu_dereference_protected(sta->fast_rx, true); + RCU_INIT_POINTER(sta->fast_rx, NULL); + spin_unlock_bh(&sta->lock); + + if (old) + kfree_rcu(old, rcu_head); +} + +void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + lockdep_assert_held(&local->sta_mtx); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata && + (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) + continue; + ieee80211_check_fast_rx(sta); + } +} + +void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->sta_mtx); + __ieee80211_check_fast_rx_iface(sdata); + mutex_unlock(&local->sta_mtx); +} + +static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, + struct ieee80211_fast_rx *fast_rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct sta_info *sta = rx->sta; + int orig_len = skb->len; + int snap_offs = ieee80211_hdrlen(hdr->frame_control); + struct { + u8 snap[sizeof(rfc1042_header)]; + __be16 proto; + } *payload __aligned(2); + struct { + u8 da[ETH_ALEN]; + u8 sa[ETH_ALEN]; + } addrs __aligned(2); + struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + + if (fast_rx->uses_rss) + stats = this_cpu_ptr(sta->pcpu_rx_stats); + + /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write + * to a common data structure; drivers can implement that per queue + * but we don't have that information in mac80211 + */ + if (!(status->flag & RX_FLAG_DUP_VALIDATED)) + return false; + +#define FAST_RX_CRYPT_FLAGS (RX_FLAG_PN_VALIDATED | RX_FLAG_DECRYPTED) + + /* If using encryption, we also need to have: + * - PN_VALIDATED: similar, but the implementation is tricky + * - DECRYPTED: necessary for PN_VALIDATED + */ + if (fast_rx->key && + (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS) + return false; + + /* we don't deal with A-MSDU deaggregation here */ + if (status->rx_flags & IEEE80211_RX_AMSDU) + return false; + + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) + return false; + + if (unlikely(ieee80211_is_frag(hdr))) + return false; + + /* Since our interface address cannot be multicast, this + * implicitly also rejects multicast frames without the + * explicit check. + * + * We shouldn't get any *data* frames not addressed to us + * (AP mode will accept multicast *management* frames), but + * punting here will make it go through the full checks in + * ieee80211_accept_frame(). + */ + if (!ether_addr_equal(fast_rx->vif_addr, hdr->addr1)) + return false; + + if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS)) != + fast_rx->expected_ds_bits) + goto drop; + + /* assign the key to drop unencrypted frames (later) + * and strip the IV/MIC if necessary + */ + if (fast_rx->key && !(status->flag & RX_FLAG_IV_STRIPPED)) { + /* GCMP header length is the same */ + snap_offs += IEEE80211_CCMP_HDR_LEN; + } + + if (!pskb_may_pull(skb, snap_offs + sizeof(*payload))) + goto drop; + payload = (void *)(skb->data + snap_offs); + + if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr)) + return false; + + /* Don't handle these here since they require special code. + * Accept AARP and IPX even though they should come with a + * bridge-tunnel header - but if we get them this way then + * there's little point in discarding them. + */ + if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) || + payload->proto == fast_rx->control_port_protocol)) + return false; + + /* after this point, don't punt to the slowpath! */ + + if (rx->key && !(status->flag & RX_FLAG_MIC_STRIPPED) && + pskb_trim(skb, skb->len - fast_rx->icv_len)) + goto drop; + + if (unlikely(fast_rx->sta_notify)) { + ieee80211_sta_rx_notify(rx->sdata, hdr); + fast_rx->sta_notify = false; + } + + /* statistics part of ieee80211_rx_h_sta_process() */ + stats->last_rx = jiffies; + stats->last_rate = sta_stats_encode_rate(status); + + stats->fragments++; + + if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { + stats->last_signal = status->signal; + if (!fast_rx->uses_rss) + ewma_signal_add(&sta->rx_stats_avg.signal, + -status->signal); + } + + if (status->chains) { + int i; + + stats->chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + stats->chain_signal_last[i] = signal; + if (!fast_rx->uses_rss) + ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + -signal); + } + } + /* end of statistics */ + + if (rx->key && !ieee80211_has_protected(hdr->frame_control)) + goto drop; + + /* do the header conversion - first grab the addresses */ + ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs); + ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs); + /* remove the SNAP but leave the ethertype */ + skb_pull(skb, snap_offs + sizeof(rfc1042_header)); + /* push the addresses in front */ + memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs)); + + skb->dev = fast_rx->dev; + + ieee80211_rx_stats(fast_rx->dev, skb->len); + + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + u64_stats_update_begin(&stats->syncp); + stats->msdu[rx->seqno_idx]++; + stats->bytes += orig_len; + u64_stats_update_end(&stats->syncp); + + if (fast_rx->internal_forward) { + struct sta_info *dsta = sta_info_get(rx->sdata, skb->data); + + if (dsta) { + /* + * Send to wireless media and increase priority by 256 + * to keep the received priority instead of + * reclassifying the frame (see cfg80211_classify8021d). + */ + skb->priority += 256; + skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + dev_queue_xmit(skb); + return true; + } + } + + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, fast_rx->dev); + memset(skb->cb, 0, sizeof(skb->cb)); + if (rx->napi) + napi_gro_receive(rx->napi, skb); + else + netif_receive_skb(skb); + + return true; + drop: + dev_kfree_skb(skb); + stats->dropped++; + return true; +} + /* * This function returns whether or not the SKB * was destined for RX processing or not, which, @@ -3527,6 +3867,21 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, rx->skb = skb; + /* See if we can do fast-rx; if we have to copy we already lost, + * so punt in that case. We should never have to deliver a data + * frame to multiple interfaces anyway. + * + * We skip the ieee80211_accept_frame() call and do the necessary + * checking inside ieee80211_invoke_fast_rx(). + */ + if (consume && rx->sta) { + struct ieee80211_fast_rx *fast_rx; + + fast_rx = rcu_dereference(rx->sta->fast_rx); + if (fast_rx && ieee80211_invoke_fast_rx(rx, fast_rx)) + return true; + } + if (!ieee80211_accept_frame(rx)) return false; @@ -3552,6 +3907,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, * be called with rcu_read_lock protection. */ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, struct sk_buff *skb, struct napi_struct *napi) { @@ -3561,7 +3917,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct sta_info *sta, *prev_sta; struct rhash_head *tmp; int err = 0; @@ -3597,7 +3952,14 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, ieee80211_is_beacon(hdr->frame_control))) ieee80211_scan_rx(local, skb); - if (ieee80211_is_data(fc)) { + if (pubsta) { + rx.sta = container_of(pubsta, struct sta_info, sta); + rx.sdata = rx.sta->sdata; + if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) + return; + goto out; + } else if (ieee80211_is_data(fc)) { + struct sta_info *sta, *prev_sta; const struct bucket_table *tbl; prev_sta = NULL; @@ -3671,8 +4033,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, * This is the receive path handler. It is called by a low level driver when an * 802.11 MPDU is received from the hardware. */ -void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, - struct napi_struct *napi) +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, + struct sk_buff *skb, struct napi_struct *napi) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate = NULL; @@ -3681,7 +4043,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, WARN_ON_ONCE(softirq_count() == 0); - if (WARN_ON(status->band >= IEEE80211_NUM_BANDS)) + if (WARN_ON(status->band >= NUM_NL80211_BANDS)) goto drop; sband = local->hw.wiphy->bands[status->band]; @@ -3771,7 +4133,8 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, ieee80211_tpt_led_trig_rx(local, ((struct ieee80211_hdr *)skb->data)->frame_control, skb->len); - __ieee80211_rx_handle_packet(hw, skb, napi); + + __ieee80211_rx_handle_packet(hw, pubsta, skb, napi); rcu_read_unlock(); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index ae980ce8daff..f9648ef9e31f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -66,7 +66,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct cfg80211_bss *cbss; struct ieee80211_bss *bss; int clen, srlen; - struct cfg80211_inform_bss bss_meta = {}; + struct cfg80211_inform_bss bss_meta = { + .boottime_ns = rx_status->boottime_ns, + }; bool signal_valid; if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) @@ -270,7 +272,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) n_chans = req->n_channels; } else { do { - if (local->hw_scan_band == IEEE80211_NUM_BANDS) + if (local->hw_scan_band == NUM_NL80211_BANDS) return false; n_chans = 0; @@ -303,6 +305,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr); ether_addr_copy(local->hw_scan_req->req.mac_addr_mask, req->mac_addr_mask); + ether_addr_copy(local->hw_scan_req->req.bssid, req->bssid); return true; } @@ -482,7 +485,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, int i; struct ieee80211_sub_if_data *sdata; struct cfg80211_scan_request *scan_req; - enum ieee80211_band band = local->hw.conf.chandef.chan->band; + enum nl80211_band band = local->hw.conf.chandef.chan->band; u32 tx_flags; scan_req = rcu_dereference_protected(local->scan_req, @@ -497,7 +500,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, for (i = 0; i < scan_req->n_ssids; i++) ieee80211_send_probe_req( - sdata, local->scan_addr, NULL, + sdata, local->scan_addr, scan_req->bssid, scan_req->ssids[i].ssid, scan_req->ssids[i].ssid_len, scan_req->ie, scan_req->ie_len, scan_req->rates[band], false, @@ -562,6 +565,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, req->n_channels * sizeof(req->channels[0]); local->hw_scan_req->req.ie = ies; local->hw_scan_req->req.flags = req->flags; + eth_broadcast_addr(local->hw_scan_req->req.bssid); local->hw_scan_band = 0; @@ -949,7 +953,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; int ret = -EBUSY, i, n_ch = 0; - enum ieee80211_band band; + enum nl80211_band band; mutex_lock(&local->mtx); @@ -961,7 +965,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, if (!channels) { int max_n; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!local->hw.wiphy->bands[band]) continue; @@ -1081,7 +1085,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct ieee80211_scan_ies sched_scan_ies = {}; struct cfg80211_chan_def chandef; int ret, i, iebufsz, num_bands = 0; - u32 rate_masks[IEEE80211_NUM_BANDS] = {}; + u32 rate_masks[NUM_NL80211_BANDS] = {}; u8 bands_used = 0; u8 *ie; size_t len; @@ -1093,7 +1097,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, if (!local->ops->sched_scan_start) return -ENOTSUPP; - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { if (local->hw.wiphy->bands[i]) { bands_used |= BIT(i); rate_masks[i] = (u32) -1; diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 06e6ac8cc693..2ddc661f0988 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -23,11 +23,11 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band current_band, + enum nl80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) { - enum ieee80211_band new_band; + enum nl80211_band new_band; int new_freq; u8 new_chan_no; struct ieee80211_channel *new_chan; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 861b93ffbe92..5ccfdbd406bd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2,7 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2015 Intel Deutschland GmbH + * Copyright (C) 2015 - 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -255,6 +255,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif + free_percpu(sta->pcpu_rx_stats); kfree(sta); } @@ -312,6 +313,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (!sta) return NULL; + if (ieee80211_hw_check(hw, USES_RSS)) { + sta->pcpu_rx_stats = + alloc_percpu(struct ieee80211_sta_rx_stats); + if (!sta->pcpu_rx_stats) + goto free; + } + spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); @@ -336,15 +344,17 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sdata = sdata; sta->rx_stats.last_rx = jiffies; + u64_stats_init(&sta->rx_stats.syncp); + sta->sta_state = IEEE80211_STA_NONE; /* Mark TID as unreserved */ sta->reserved_tid = IEEE80211_TID_UNRESERVED; sta->last_connected = ktime_get_seconds(); - ewma_signal_init(&sta->rx_stats.avg_signal); - for (i = 0; i < ARRAY_SIZE(sta->rx_stats.chain_signal_avg); i++) - ewma_signal_init(&sta->rx_stats.chain_signal_avg[i]); + ewma_signal_init(&sta->rx_stats_avg.signal); + for (i = 0; i < ARRAY_SIZE(sta->rx_stats_avg.chain_signal); i++) + ewma_signal_init(&sta->rx_stats_avg.chain_signal[i]); if (local->ops->wake_tx_queue) { void *txq_data; @@ -407,6 +417,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } + sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; @@ -879,6 +891,13 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); + /* + * Before removing the station from the driver there might be pending + * rx frames on RSS queues sent prior to the disassociation - wait for + * all such frames to be processed. + */ + drv_sync_rx_queues(local, sta); + ret = sta_info_hash_del(local, sta); if (WARN_ON(ret)) return ret; @@ -1091,10 +1110,12 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { + unsigned long last_active = ieee80211_sta_last_active(sta); + if (sdata != sta->sdata) continue; - if (time_after(jiffies, sta->rx_stats.last_rx + exp_time)) { + if (time_is_before_jiffies(last_active + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); @@ -1764,6 +1785,31 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, } EXPORT_SYMBOL(ieee80211_sta_set_buffered); +static void +ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + bool allow_p2p_go_ps = sdata->vif.p2p; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata || + !test_sta_flag(sta, WLAN_STA_ASSOC)) + continue; + if (!sta->sta.support_p2p_ps) { + allow_p2p_go_ps = false; + break; + } + } + rcu_read_unlock(); + + if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { + sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); + } +} + int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { @@ -1825,12 +1871,16 @@ int sta_info_move_state(struct sta_info *sta, } else if (sta->sta_state == IEEE80211_STA_ASSOC) { clear_bit(WLAN_STA_ASSOC, &sta->_flags); ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } break; case IEEE80211_STA_ASSOC: if (sta->sta_state == IEEE80211_STA_AUTH) { set_bit(WLAN_STA_ASSOC, &sta->_flags); ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && @@ -1838,6 +1888,7 @@ int sta_info_move_state(struct sta_info *sta, atomic_dec(&sta->sdata->bss->num_mcast_sta); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_clear_fast_xmit(sta); + ieee80211_clear_fast_rx(sta); } break; case IEEE80211_STA_AUTHORIZED: @@ -1848,6 +1899,7 @@ int sta_info_move_state(struct sta_info *sta, atomic_inc(&sta->sdata->bss->num_mcast_sta); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); + ieee80211_check_fast_rx(sta); } break; default: @@ -1894,43 +1946,117 @@ u8 sta_info_tx_streams(struct sta_info *sta) >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; } -static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) +static struct ieee80211_sta_rx_stats * +sta_get_last_rx_stats(struct sta_info *sta) { - rinfo->flags = 0; - - if (sta->rx_stats.last_rate_flag & RX_FLAG_HT) { - rinfo->flags |= RATE_INFO_FLAGS_MCS; - rinfo->mcs = sta->rx_stats.last_rate_idx; - } else if (sta->rx_stats.last_rate_flag & RX_FLAG_VHT) { - rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; - rinfo->nss = sta->rx_stats.last_rate_vht_nss; - rinfo->mcs = sta->rx_stats.last_rate_idx; - } else { + struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; + struct ieee80211_local *local = sta->local; + int cpu; + + if (!ieee80211_hw_check(&local->hw, USES_RSS)) + return stats; + + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpustats; + + cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + + if (time_after(cpustats->last_rx, stats->last_rx)) + stats = cpustats; + } + + return stats; +} + +static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, + struct rate_info *rinfo) +{ + rinfo->bw = (rate & STA_STATS_RATE_BW_MASK) >> + STA_STATS_RATE_BW_SHIFT; + + if (rate & STA_STATS_RATE_VHT) { + rinfo->flags = RATE_INFO_FLAGS_VHT_MCS; + rinfo->mcs = rate & 0xf; + rinfo->nss = (rate & 0xf0) >> 4; + } else if (rate & STA_STATS_RATE_HT) { + rinfo->flags = RATE_INFO_FLAGS_MCS; + rinfo->mcs = rate & 0xff; + } else if (rate & STA_STATS_RATE_LEGACY) { struct ieee80211_supported_band *sband; - int shift = ieee80211_vif_get_shift(&sta->sdata->vif); u16 brate; - - sband = sta->local->hw.wiphy->bands[ - ieee80211_get_sdata_band(sta->sdata)]; - brate = sband->bitrates[sta->rx_stats.last_rate_idx].bitrate; + unsigned int shift; + + sband = local->hw.wiphy->bands[(rate >> 4) & 0xf]; + brate = sband->bitrates[rate & 0xf].bitrate; + if (rinfo->bw == RATE_INFO_BW_5) + shift = 2; + else if (rinfo->bw == RATE_INFO_BW_10) + shift = 1; + else + shift = 0; rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); } - if (sta->rx_stats.last_rate_flag & RX_FLAG_SHORT_GI) + if (rate & STA_STATS_RATE_SGI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; +} - if (sta->rx_stats.last_rate_flag & RX_FLAG_5MHZ) - rinfo->bw = RATE_INFO_BW_5; - else if (sta->rx_stats.last_rate_flag & RX_FLAG_10MHZ) - rinfo->bw = RATE_INFO_BW_10; - else if (sta->rx_stats.last_rate_flag & RX_FLAG_40MHZ) - rinfo->bw = RATE_INFO_BW_40; - else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_80MHZ) - rinfo->bw = RATE_INFO_BW_80; - else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_160MHZ) - rinfo->bw = RATE_INFO_BW_160; +static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) +{ + u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate); + + if (rate == STA_STATS_RATE_INVALID) + rinfo->flags = 0; else - rinfo->bw = RATE_INFO_BW_20; + sta_stats_decode_rate(sta->local, rate, rinfo); +} + +static void sta_set_tidstats(struct sta_info *sta, + struct cfg80211_tid_stats *tidstats, + int tid) +{ + struct ieee80211_local *local = sta->local; + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { + unsigned int start; + + do { + start = u64_stats_fetch_begin(&sta->rx_stats.syncp); + tidstats->rx_msdu = sta->rx_stats.msdu[tid]; + } while (u64_stats_fetch_retry(&sta->rx_stats.syncp, start)); + + tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); + tidstats->tx_msdu = sta->tx_stats.msdu[tid]; + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); + tidstats->tx_msdu_retries = sta->status_stats.msdu_retries[tid]; + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && + ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); + tidstats->tx_msdu_failed = sta->status_stats.msdu_failed[tid]; + } +} + +static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) +{ + unsigned int start; + u64 value; + + do { + start = u64_stats_fetch_begin(&rxstats->syncp); + value = rxstats->bytes; + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + + return value; } void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) @@ -1939,7 +2065,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct rate_control_ref *ref = NULL; u32 thr = 0; - int i, ac; + int i, ac, cpu; + struct ieee80211_sta_rx_stats *last_rxstats; + + last_rxstats = sta_get_last_rx_stats(sta); if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) ref = local->rate_ctrl; @@ -1968,7 +2097,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->inactive_time = - jiffies_to_msecs(jiffies - sta->rx_stats.last_rx); + jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) | BIT(NL80211_STA_INFO_TX_BYTES)))) { @@ -1987,12 +2116,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | BIT(NL80211_STA_INFO_RX_BYTES)))) { - sinfo->rx_bytes = sta->rx_stats.bytes; + sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); + + if (sta->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + sinfo->rx_bytes += sta_get_stats_bytes(cpurxs); + } + } + sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); } if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { sinfo->rx_packets = sta->rx_stats.packets; + if (sta->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + sinfo->rx_packets += cpurxs->packets; + } + } sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); } @@ -2007,6 +2154,14 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) } sinfo->rx_dropped_misc = sta->rx_stats.dropped; + if (sta->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); + sinfo->rx_packets += cpurxs->dropped; + } + } if (sdata->vif.type == NL80211_IFTYPE_STATION && !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { @@ -2018,29 +2173,36 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { - sinfo->signal = (s8)sta->rx_stats.last_signal; + sinfo->signal = (s8)last_rxstats->last_signal; sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); } - if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { + if (!sta->pcpu_rx_stats && + !(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = - -ewma_signal_read(&sta->rx_stats.avg_signal); + -ewma_signal_read(&sta->rx_stats_avg.signal); sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); } } - if (sta->rx_stats.chains && + /* for the average - if pcpu_rx_stats isn't set - rxstats must point to + * the sta->rx_stats struct, so the check here is fine with and without + * pcpu statistics + */ + if (last_rxstats->chains && !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { - sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | - BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); + sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL); + if (!sta->pcpu_rx_stats) + sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); + + sinfo->chains = last_rxstats->chains; - sinfo->chains = sta->rx_stats.chains; for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { sinfo->chain_signal[i] = - sta->rx_stats.chain_signal_last[i]; + last_rxstats->chain_signal_last[i]; sinfo->chain_signal_avg[i] = - -ewma_signal_read(&sta->rx_stats.chain_signal_avg[i]); + -ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]); } } @@ -2059,33 +2221,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { struct cfg80211_tid_stats *tidstats = &sinfo->pertid[i]; - if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { - tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); - tidstats->rx_msdu = sta->rx_stats.msdu[i]; - } - - if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { - tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); - tidstats->tx_msdu = sta->tx_stats.msdu[i]; - } - - if (!(tidstats->filled & - BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && - ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { - tidstats->filled |= - BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); - tidstats->tx_msdu_retries = - sta->status_stats.msdu_retries[i]; - } - - if (!(tidstats->filled & - BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && - ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { - tidstats->filled |= - BIT(NL80211_TID_STATS_TX_MSDU_FAILED); - tidstats->tx_msdu_failed = - sta->status_stats.msdu_failed[i]; - } + sta_set_tidstats(sta, tidstats, i); } if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -2154,3 +2290,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->expected_throughput = thr; } } + +unsigned long ieee80211_sta_last_active(struct sta_info *sta) +{ + struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); + + if (time_after(stats->last_rx, sta->status_stats.last_ack)) + return stats->last_rx; + return sta->status_stats.last_ack; +} diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 62193f4bc37b..c8b8ccc370eb 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -1,7 +1,7 @@ /* * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015-2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,6 +18,7 @@ #include <linux/average.h> #include <linux/etherdevice.h> #include <linux/rhashtable.h> +#include <linux/u64_stats_sync.h> #include "key.h" /** @@ -69,6 +70,8 @@ * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. * @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX * until pending frames are delivered + * + * @NUM_WLAN_STA_FLAGS: number of defined flags */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, @@ -97,6 +100,8 @@ enum ieee80211_sta_info_flags { WLAN_STA_MPSP_OWNER, WLAN_STA_MPSP_RECIPIENT, WLAN_STA_PS_DELIVER, + + NUM_WLAN_STA_FLAGS, }; #define ADDBA_RESP_INTERVAL HZ @@ -281,6 +286,40 @@ struct ieee80211_fast_tx { }; /** + * struct ieee80211_fast_rx - RX fastpath information + * @dev: netdevice for reporting the SKB + * @vif_type: (P2P-less) interface type of the original sdata (sdata->vif.type) + * @vif_addr: interface address + * @rfc1042_hdr: copy of the RFC 1042 SNAP header (to have in cache) + * @control_port_protocol: control port protocol copied from sdata + * @expected_ds_bits: from/to DS bits expected + * @icv_len: length of the MIC if present + * @key: bool indicating encryption is expected (key is set) + * @sta_notify: notify the MLME code (once) + * @internal_forward: forward froms internally on AP/VLAN type interfaces + * @uses_rss: copy of USES_RSS hw flag + * @da_offs: offset of the DA in the header (for header conversion) + * @sa_offs: offset of the SA in the header (for header conversion) + * @rcu_head: RCU head for freeing this structure + */ +struct ieee80211_fast_rx { + struct net_device *dev; + enum nl80211_iftype vif_type; + u8 vif_addr[ETH_ALEN] __aligned(2); + u8 rfc1042_hdr[6] __aligned(2); + __be16 control_port_protocol; + __le16 expected_ds_bits; + u8 icv_len; + u8 key:1, + sta_notify:1, + internal_forward:1, + uses_rss:1; + u8 da_offs, sa_offs; + + struct rcu_head rcu_head; +}; + +/** * struct mesh_sta - mesh STA information * @plink_lock: serialize access to plink fields * @llid: Local link ID @@ -330,6 +369,21 @@ struct mesh_sta { DECLARE_EWMA(signal, 1024, 8) +struct ieee80211_sta_rx_stats { + unsigned long packets; + unsigned long last_rx; + unsigned long num_duplicates; + unsigned long fragments; + unsigned long dropped; + int last_signal; + u8 chains; + s8 chain_signal_last[IEEE80211_MAX_CHAINS]; + u16 last_rate; + struct u64_stats_sync syncp; + u64 bytes; + u64 msdu[IEEE80211_NUM_TIDS + 1]; +}; + /** * struct sta_info - STA information * @@ -371,7 +425,7 @@ DECLARE_EWMA(signal, 1024, 8) * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers * @mesh: mesh STA information - * @debugfs: debug filesystem info + * @debugfs_dir: debug filesystem directory dentry * @dead: set to true when sta is unlinked * @removed: set to true when sta is being removed from sta_list * @uploaded: set to true when sta is uploaded to the driver @@ -385,10 +439,13 @@ DECLARE_EWMA(signal, 1024, 8) * @cipher_scheme: optional cipher scheme for this station * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @fast_tx: TX fastpath information + * @fast_rx: RX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. * @tx_stats: TX statistics * @rx_stats: RX statistics + * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs + * this (by advertising the USES_RSS hw flag) * @status_stats: TX status statistics */ struct sta_info { @@ -408,6 +465,8 @@ struct sta_info { spinlock_t lock; struct ieee80211_fast_tx __rcu *fast_tx; + struct ieee80211_fast_rx __rcu *fast_rx; + struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats; #ifdef CONFIG_MAC80211_MESH struct mesh_sta *mesh; @@ -437,24 +496,11 @@ struct sta_info { long last_connected; /* Updated from RX path only, no locking requirements */ + struct ieee80211_sta_rx_stats rx_stats; struct { - unsigned long packets; - u64 bytes; - unsigned long last_rx; - unsigned long num_duplicates; - unsigned long fragments; - unsigned long dropped; - int last_signal; - struct ewma_signal avg_signal; - u8 chains; - s8 chain_signal_last[IEEE80211_MAX_CHAINS]; - struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS]; - int last_rate_idx; - u32 last_rate_flag; - u32 last_rate_vht_flag; - u8 last_rate_vht_nss; - u64 msdu[IEEE80211_NUM_TIDS + 1]; - } rx_stats; + struct ewma_signal signal; + struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS]; + } rx_stats_avg; /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; @@ -467,6 +513,7 @@ struct sta_info { unsigned long last_tdls_pkt_time; u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; + unsigned long last_ack; } status_stats; /* Updated from TX path only, no locking requirements */ @@ -485,10 +532,7 @@ struct sta_info { u8 timer_to_tid[IEEE80211_NUM_TIDS]; #ifdef CONFIG_MAC80211_DEBUGFS - struct sta_info_debugfsdentries { - struct dentry *dir; - bool add_has_run; - } debugfs; + struct dentry *debugfs_dir; #endif enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; @@ -676,4 +720,44 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); +unsigned long ieee80211_sta_last_active(struct sta_info *sta); + +#define STA_STATS_RATE_INVALID 0 +#define STA_STATS_RATE_VHT 0x8000 +#define STA_STATS_RATE_HT 0x4000 +#define STA_STATS_RATE_LEGACY 0x2000 +#define STA_STATS_RATE_SGI 0x1000 +#define STA_STATS_RATE_BW_SHIFT 9 +#define STA_STATS_RATE_BW_MASK (0x7 << STA_STATS_RATE_BW_SHIFT) + +static inline u16 sta_stats_encode_rate(struct ieee80211_rx_status *s) +{ + u16 r = s->rate_idx; + + if (s->vht_flag & RX_VHT_FLAG_80MHZ) + r |= RATE_INFO_BW_80 << STA_STATS_RATE_BW_SHIFT; + else if (s->vht_flag & RX_VHT_FLAG_160MHZ) + r |= RATE_INFO_BW_160 << STA_STATS_RATE_BW_SHIFT; + else if (s->flag & RX_FLAG_40MHZ) + r |= RATE_INFO_BW_40 << STA_STATS_RATE_BW_SHIFT; + else if (s->flag & RX_FLAG_10MHZ) + r |= RATE_INFO_BW_10 << STA_STATS_RATE_BW_SHIFT; + else if (s->flag & RX_FLAG_5MHZ) + r |= RATE_INFO_BW_5 << STA_STATS_RATE_BW_SHIFT; + else + r |= RATE_INFO_BW_20 << STA_STATS_RATE_BW_SHIFT; + + if (s->flag & RX_FLAG_SHORT_GI) + r |= STA_STATS_RATE_SGI; + + if (s->flag & RX_FLAG_VHT) + r |= STA_STATS_RATE_VHT | (s->vht_nss << 4); + else if (s->flag & RX_FLAG_HT) + r |= STA_STATS_RATE_HT; + else + r |= STA_STATS_RATE_LEGACY | (s->band << 4); + + return r; +} + #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8b1b2ea03eb5..c6d5c724e032 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -188,7 +188,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_sub_if_data *sdata = sta->sdata; if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) - sta->rx_stats.last_rx = jiffies; + sta->status_stats.last_ack = jiffies; if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; @@ -647,7 +647,7 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, sta->status_stats.retry_count += retry_count; if (acked) { - sta->rx_stats.last_rx = jiffies; + sta->status_stats.last_ack = jiffies; if (sta->status_stats.lost_packets) sta->status_stats.lost_packets = 0; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index a29ea813b7d5..1c7d45a6d93e 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -47,7 +47,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, NL80211_FEATURE_TDLS_CHANNEL_SWITCH; bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && !ifmgd->tdls_wider_bw_prohibited; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; bool vht = sband && sband->vht_cap.vht_supported; u8 *pos = (void *)skb_put(skb, 10); @@ -184,7 +184,7 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, if (status_code != 0) return 0; - if (ieee80211_get_sdata_band(sdata) == IEEE80211_BAND_2GHZ) { + if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_2GHZ) { return WLAN_CAPABILITY_SHORT_SLOT_TIME | WLAN_CAPABILITY_SHORT_PREAMBLE; } @@ -357,7 +357,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, u8 action_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; struct ieee80211_sta_ht_cap ht_cap; @@ -544,7 +544,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t offset = 0, noffset; struct sta_info *sta, *ap_sta; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + enum nl80211_band band = ieee80211_get_sdata_band(sdata); u8 *pos; mutex_lock(&local->sta_mtx); @@ -611,7 +611,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); /* only include VHT-operation if not on the 2.4GHz band */ - if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) { + if (band != NL80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) { /* * if both peers support WIDER_BW, we can expand the chandef to * a wider compatible one, up to 80MHz @@ -1773,7 +1773,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, u8 target_channel, oper_class; bool local_initiator; struct sta_info *sta; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_tdls_data *tf = (void *)skb->data; struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); int baselen = offsetof(typeof(*tf), u.chan_switch_req.variable); @@ -1805,10 +1805,10 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, if ((oper_class == 112 || oper_class == 2 || oper_class == 3 || oper_class == 4 || oper_class == 5 || oper_class == 6) && target_channel < 14) - band = IEEE80211_BAND_5GHZ; + band = NL80211_BAND_5GHZ; else - band = target_channel < 14 ? IEEE80211_BAND_2GHZ : - IEEE80211_BAND_5GHZ; + band = target_channel < 14 ? NL80211_BAND_2GHZ : + NL80211_BAND_5GHZ; freq = ieee80211_channel_to_frequency(target_channel, band); if (freq == 0) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 2b0a17ee907a..77e4c53baefb 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1,3 +1,8 @@ +/* +* Portions of this file +* Copyright(c) 2016 Intel Deutschland GmbH +*/ + #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __MAC80211_DRIVER_TRACE @@ -396,7 +401,7 @@ TRACE_EVENT(drv_bss_info_changed, __field(u32, sync_device_ts) __field(u8, sync_dtim_count) __field(u32, basic_rates) - __array(int, mcast_rate, IEEE80211_NUM_BANDS) + __array(int, mcast_rate, NUM_NL80211_BANDS) __field(u16, ht_operation_mode) __field(s32, cqm_rssi_thold); __field(s32, cqm_rssi_hyst); @@ -899,6 +904,13 @@ DEFINE_EVENT(sta_event, drv_sta_pre_rcu_remove, TP_ARGS(local, sdata, sta) ); +DEFINE_EVENT(sta_event, drv_sync_rx_queues, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); + DEFINE_EVENT(sta_event, drv_sta_rate_tbl_update, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1253,8 +1265,8 @@ TRACE_EVENT(drv_set_bitrate_mask, TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - __entry->legacy_2g = mask->control[IEEE80211_BAND_2GHZ].legacy; - __entry->legacy_5g = mask->control[IEEE80211_BAND_5GHZ].legacy; + __entry->legacy_2g = mask->control[NL80211_BAND_2GHZ].legacy; + __entry->legacy_5g = mask->control[NL80211_BAND_5GHZ].legacy; ), TP_printk( diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 21f6602395f7..203044379ce0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -150,7 +150,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, rate = DIV_ROUND_UP(r->bitrate, 1 << shift); switch (sband->band) { - case IEEE80211_BAND_2GHZ: { + case NL80211_BAND_2GHZ: { u32 flag; if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) flag = IEEE80211_RATE_MANDATORY_G; @@ -160,13 +160,13 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, mrate = r->bitrate; break; } - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: if (r->flags & IEEE80211_RATE_MANDATORY_A) mrate = r->bitrate; break; - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_60GHZ: /* TODO, for now fall through */ - case IEEE80211_NUM_BANDS: + case NUM_NL80211_BANDS: WARN_ON(1); break; } @@ -1329,6 +1329,10 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, out: spin_unlock_bh(&txqi->queue.lock); + if (skb && skb_has_frag_list(skb) && + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) + skb_linearize(skb); + return skb; } EXPORT_SYMBOL(ieee80211_tx_dequeue); @@ -1696,7 +1700,9 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, bool rate_found = false; u8 rate_retries = 0; u16 rate_flags = 0; - u8 mcs_known, mcs_flags; + u8 mcs_known, mcs_flags, mcs_bw; + u16 vht_known; + u8 vht_mcs = 0, vht_nss = 0; int i; info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | @@ -1772,11 +1778,38 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, mcs_flags & IEEE80211_RADIOTAP_MCS_SGI) rate_flags |= IEEE80211_TX_RC_SHORT_GI; + mcs_bw = mcs_flags & IEEE80211_RADIOTAP_MCS_BW_MASK; if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_BW && - mcs_flags & IEEE80211_RADIOTAP_MCS_BW_40) + mcs_bw == IEEE80211_RADIOTAP_MCS_BW_40) rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; break; + case IEEE80211_RADIOTAP_VHT: + vht_known = get_unaligned_le16(iterator.this_arg); + rate_found = true; + + rate_flags = IEEE80211_TX_RC_VHT_MCS; + if ((vht_known & IEEE80211_RADIOTAP_VHT_KNOWN_GI) && + (iterator.this_arg[2] & + IEEE80211_RADIOTAP_VHT_FLAG_SGI)) + rate_flags |= IEEE80211_TX_RC_SHORT_GI; + if (vht_known & + IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH) { + if (iterator.this_arg[3] == 1) + rate_flags |= + IEEE80211_TX_RC_40_MHZ_WIDTH; + else if (iterator.this_arg[3] == 4) + rate_flags |= + IEEE80211_TX_RC_80_MHZ_WIDTH; + else if (iterator.this_arg[3] == 11) + rate_flags |= + IEEE80211_TX_RC_160_MHZ_WIDTH; + } + + vht_mcs = iterator.this_arg[4] >> 4; + vht_nss = iterator.this_arg[4] & 0xF; + break; + /* * Please update the file * Documentation/networking/mac80211-injection.txt @@ -1802,6 +1835,9 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, if (rate_flags & IEEE80211_TX_RC_MCS) { info->control.rates[0].idx = rate; + } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { + ieee80211_rate_set_vht(info->control.rates, vht_mcs, + vht_nss); } else { for (i = 0; i < sband->n_bitrates; i++) { if (rate * 5 != sband->bitrates[i].bitrate) @@ -1812,6 +1848,9 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, } } + if (info->control.rates[0].idx < 0) + info->control.flags &= ~IEEE80211_TX_CTRL_RATE_INJECT; + info->control.rates[0].flags = rate_flags; info->control.rates[0].count = min_t(u8, rate_retries + 1, local->hw.max_rate_tries); @@ -2099,7 +2138,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, u16 info_id = 0; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_sub_if_data *ap_sdata; - enum ieee80211_band band; + enum nl80211_band band; int ret; if (IS_ERR(sta)) @@ -2186,7 +2225,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } if (mppath && mpath) - mesh_path_del(mpath->sdata, mpath->dst); + mesh_path_del(sdata, mpath->dst); } /* @@ -2772,6 +2811,154 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta) kfree_rcu(fast_tx, rcu_head); } +static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local, + struct sk_buff *skb, int headroom, + int *subframe_len) +{ + int amsdu_len = *subframe_len + sizeof(struct ethhdr); + int padding = (4 - amsdu_len) & 3; + + if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) { + I802_DEBUG_INC(local->tx_expand_skb_head); + + if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) { + wiphy_debug(local->hw.wiphy, + "failed to reallocate TX buffer\n"); + return false; + } + } + + if (padding) { + *subframe_len += padding; + memset(skb_put(skb, padding), 0, padding); + } + + return true; +} + +static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr; + struct ethhdr amsdu_hdr; + int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header); + int subframe_len = skb->len - hdr_len; + void *data; + u8 *qc; + + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) + return false; + + if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) + return true; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr), + &subframe_len)) + return false; + + amsdu_hdr.h_proto = cpu_to_be16(subframe_len); + memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN); + memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN); + + data = skb_push(skb, sizeof(amsdu_hdr)); + memmove(data, data + sizeof(amsdu_hdr), hdr_len); + memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr)); + + hdr = data; + qc = ieee80211_get_qos_ctl(hdr); + *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + info->control.flags |= IEEE80211_TX_CTRL_AMSDU; + + return true; +} + +static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + struct sk_buff **frag_tail, *head; + int subframe_len = skb->len - ETH_ALEN; + u8 max_subframes = sta->sta.max_amsdu_subframes; + int max_frags = local->hw.max_tx_fragments; + int max_amsdu_len = sta->sta.max_amsdu_len; + __be16 len; + void *data; + bool ret = false; + int n = 1, nfrags; + + if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) + return false; + + if (!txq) + return false; + + txqi = to_txq_info(txq); + if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags)) + return false; + + if (sta->sta.max_rc_amsdu_len) + max_amsdu_len = min_t(int, max_amsdu_len, + sta->sta.max_rc_amsdu_len); + + spin_lock_bh(&txqi->queue.lock); + + head = skb_peek_tail(&txqi->queue); + if (!head) + goto out; + + if (skb->len + head->len > max_amsdu_len) + goto out; + + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) + goto out; + + nfrags = 1 + skb_shinfo(skb)->nr_frags; + nfrags += 1 + skb_shinfo(head)->nr_frags; + frag_tail = &skb_shinfo(head)->frag_list; + while (*frag_tail) { + nfrags += 1 + skb_shinfo(*frag_tail)->nr_frags; + frag_tail = &(*frag_tail)->next; + n++; + } + + if (max_subframes && n > max_subframes) + goto out; + + if (max_frags && nfrags > max_frags) + goto out; + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2, + &subframe_len)) + goto out; + + ret = true; + data = skb_push(skb, ETH_ALEN + 2); + memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN); + + data += 2 * ETH_ALEN; + len = cpu_to_be16(subframe_len); + memcpy(data, &len, 2); + memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header)); + + head->len += skb->len; + head->data_len += skb->len; + *frag_tail = skb; + +out: + spin_unlock_bh(&txqi->queue.lock); + + return ret; +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, @@ -2826,6 +3013,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, ieee80211_tx_stats(dev, skb->len + extra_head); + if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) + return true; + /* will not be crypto-handled beyond what we do here, so use false * as the may-encrypt argument for the resize to not account for * more room than we already have in 'extra_head' @@ -3406,7 +3597,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, struct sk_buff *skb = NULL; struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata = NULL; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; int csa_off_base = 0; @@ -3974,7 +4165,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid); void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum ieee80211_band band) + enum nl80211_band band) { int ac = ieee802_1d_to_ac[tid & 7]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7390de4946a9..905003f75c4d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -59,7 +59,7 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) } } -int ieee80211_frame_duration(enum ieee80211_band band, size_t len, +int ieee80211_frame_duration(enum nl80211_band band, size_t len, int rate, int erp, int short_preamble, int shift) { @@ -77,7 +77,7 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len, * is assumed to be 0 otherwise. */ - if (band == IEEE80211_BAND_5GHZ || erp) { + if (band == NL80211_BAND_5GHZ || erp) { /* * OFDM: * @@ -129,7 +129,7 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len, /* Exported duration function for driver use */ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum ieee80211_band band, + enum nl80211_band band, size_t frame_len, struct ieee80211_rate *rate) { @@ -1129,7 +1129,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); use_11b = (chanctx_conf && - chanctx_conf->def.chan->band == IEEE80211_BAND_2GHZ) && + chanctx_conf->def.chan->band == NL80211_BAND_2GHZ) && !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE); rcu_read_unlock(); @@ -1301,7 +1301,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, u8 *buffer, size_t buffer_len, const u8 *ie, size_t ie_len, - enum ieee80211_band band, + enum nl80211_band band, u32 rate_mask, struct cfg80211_chan_def *chandef, size_t *offset) @@ -1375,7 +1375,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, pos += ext_rates_len; } - if (chandef->chan && sband->band == IEEE80211_BAND_2GHZ) { + if (chandef->chan && sband->band == NL80211_BAND_2GHZ) { if (end - pos < 3) goto out_err; *pos++ = WLAN_EID_DS_PARAMS; @@ -1479,7 +1479,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, memset(ie_desc, 0, sizeof(*ie_desc)); - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { if (bands_used & BIT(i)) { pos += ieee80211_build_preq_ies_band(local, buffer + pos, @@ -1522,7 +1522,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; int ies_len; - u32 rate_masks[IEEE80211_NUM_BANDS] = {}; + u32 rate_masks[NUM_NL80211_BANDS] = {}; struct ieee80211_scan_ies dummy_ie_desc; /* @@ -1582,7 +1582,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, - enum ieee80211_band band, u32 *basic_rates) + enum nl80211_band band, u32 *basic_rates) { struct ieee80211_supported_band *sband; size_t num_rates; @@ -2520,7 +2520,7 @@ int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -2565,7 +2565,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -2711,7 +2711,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, if (status->flag & RX_FLAG_MACTIME_PLCP_START) { /* TODO: handle HT/VHT preambles */ - if (status->band == IEEE80211_BAND_5GHZ) { + if (status->band == NL80211_BAND_5GHZ) { ts += 20 << shift; mpdu_offset += 2; } else if (status->flag & RX_FLAG_SHORTPRE) { @@ -2724,8 +2724,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, rate = cfg80211_calculate_bitrate(&ri); if (WARN_ONCE(!rate, - "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n", - status->flag, status->rate_idx, status->vht_nss)) + "Invalid bitrate: flags=0x%llx, idx=%d, vht_nss=%d\n", + (unsigned long long)status->flag, status->rate_idx, + status->vht_nss)) return 0; /* rewind from end of MPDU */ diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index e590e2ef9eaf..ee715764a828 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -418,7 +418,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(ieee80211_update_mu_groups); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, - enum ieee80211_band band) + enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 18848258adde..7e4f2652bca7 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -504,18 +504,20 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; - data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; - if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; - if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; + if (status->flag & RX_FLAG_MIC_STRIPPED) + mic_len = 0; } else { if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; } + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; + if (!rx->sta || data_len < 0) + return RX_DROP_UNUSABLE; + if (!(status->flag & RX_FLAG_PN_VALIDATED)) { ccmp_hdr2pn(pn, skb->data + hdrlen); @@ -720,8 +722,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 pn[IEEE80211_GCMP_PN_LEN]; - int data_len; - int queue; + int data_len, queue, mic_len = IEEE80211_GCMP_MIC_LEN; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -729,19 +730,20 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; - data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - - IEEE80211_GCMP_MIC_LEN; - if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; - if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN)) return RX_DROP_UNUSABLE; + if (status->flag & RX_FLAG_MIC_STRIPPED) + mic_len = 0; } else { if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; } + data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len; + if (!rx->sta || data_len < 0) + return RX_DROP_UNUSABLE; + if (!(status->flag & RX_FLAG_PN_VALIDATED)) { gcmp_hdr2pn(pn, skb->data + hdrlen); @@ -772,7 +774,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) } /* Remove GCMP header and MIC */ - if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - mic_len)) return RX_DROP_UNUSABLE; memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_GCMP_HDR_LEN); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 0183b32da942..bbcf60465e5c 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -31,6 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_ECN))) goto out; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index dc196a0f501d..6d19d2eeaa60 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -1013,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)); - if (IS_ERR(skb)) + if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) goto tx_error; skb->transport_header = skb->network_header; @@ -1105,8 +1104,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)); - if (IS_ERR(skb)) + if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) goto tx_error; skb->transport_header = skb->network_header; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index afde5f5e728a..2fd607408998 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -12,6 +12,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> @@ -966,7 +968,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!l4proto->new(ct, skb, dataoff, timeouts)) { nf_conntrack_free(ct); - pr_debug("init conntrack: can't track with proto module\n"); + pr_debug("can't track with proto module\n"); return NULL; } @@ -988,7 +990,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, spin_lock(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { - pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", + pr_debug("expectation arrives ct=%p exp=%p\n", ct, exp); /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &ct->status); @@ -1053,7 +1055,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { - pr_debug("resolve_normal_ct: Can't get tuple\n"); + pr_debug("Can't get tuple\n"); return NULL; } @@ -1079,14 +1081,13 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - pr_debug("nf_conntrack_in: normal packet for %p\n", ct); + pr_debug("normal packet for %p\n", ct); *ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { - pr_debug("nf_conntrack_in: related packet for %p\n", - ct); + pr_debug("related packet for %p\n", ct); *ctinfo = IP_CT_RELATED; } else { - pr_debug("nf_conntrack_in: new packet for %p\n", ct); + pr_debug("new packet for %p\n", ct); *ctinfo = IP_CT_NEW; } *set_reply = 0; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 4e78c57b818f..d28011b42845 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -113,6 +113,60 @@ static void ecache_work(struct work_struct *work) schedule_delayed_work(&ctnet->ecache_dwork, delay); } +int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, + u32 portid, int report) +{ + int ret = 0; + struct net *net = nf_ct_net(ct); + struct nf_ct_event_notifier *notify; + struct nf_conntrack_ecache *e; + + rcu_read_lock(); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); + if (!notify) + goto out_unlock; + + e = nf_ct_ecache_find(ct); + if (!e) + goto out_unlock; + + if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + struct nf_ct_event item = { + .ct = ct, + .portid = e->portid ? e->portid : portid, + .report = report + }; + /* This is a resent of a destroy event? If so, skip missed */ + unsigned long missed = e->portid ? 0 : e->missed; + + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + + ret = notify->fcn(eventmask | missed, &item); + if (unlikely(ret < 0 || missed)) { + spin_lock_bh(&ct->lock); + if (ret < 0) { + /* This is a destroy event that has been + * triggered by a process, we store the PORTID + * to include it in the retransmission. + */ + if (eventmask & (1 << IPCT_DESTROY) && + e->portid == 0 && portid != 0) + e->portid = portid; + else + e->missed |= eventmask; + } else { + e->missed &= ~missed; + } + spin_unlock_bh(&ct->lock); + } + } +out_unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); + /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) @@ -167,6 +221,36 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); +void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp, + u32 portid, int report) + +{ + struct net *net = nf_ct_exp_net(exp); + struct nf_exp_event_notifier *notify; + struct nf_conntrack_ecache *e; + + rcu_read_lock(); + notify = rcu_dereference(net->ct.nf_expect_event_cb); + if (!notify) + goto out_unlock; + + e = nf_ct_ecache_find(exp->master); + if (!e) + goto out_unlock; + + if (e->expmask & (1 << event)) { + struct nf_exp_event item = { + .exp = exp, + .portid = portid, + .report = report + }; + notify->fcn(1 << event, &item); + } +out_unlock: + rcu_read_unlock(); +} + int nf_conntrack_register_notifier(struct net *net, struct nf_ct_event_notifier *new) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cb5b630a645b..aa93877ab6e2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -295,6 +295,59 @@ static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata) return seclen; } +static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) +{ + struct sk_buff *entskb = entry->skb; + u32 nlalen = 0; + + if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) + return 0; + + if (skb_vlan_tag_present(entskb)) + nlalen += nla_total_size(nla_total_size(sizeof(__be16)) + + nla_total_size(sizeof(__be16))); + + if (entskb->network_header > entskb->mac_header) + nlalen += nla_total_size((entskb->network_header - + entskb->mac_header)); + + return nlalen; +} + +static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) +{ + struct sk_buff *entskb = entry->skb; + + if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) + return 0; + + if (skb_vlan_tag_present(entskb)) { + struct nlattr *nest; + + nest = nla_nest_start(skb, NFQA_VLAN | NLA_F_NESTED); + if (!nest) + goto nla_put_failure; + + if (nla_put_be16(skb, NFQA_VLAN_TCI, htons(entskb->vlan_tci)) || + nla_put_be16(skb, NFQA_VLAN_PROTO, entskb->vlan_proto)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + } + + if (entskb->mac_header < entskb->network_header) { + int len = (int)(entskb->network_header - entskb->mac_header); + + if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb))) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -1; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -334,6 +387,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + size += nfqnl_get_bridge_size(entry); + if (entry->state.hook <= NF_INET_FORWARD || (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) csum_verify = !skb_csum_unnecessary(entskb); @@ -497,6 +552,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } } + if (nfqnl_put_bridge(entry, skb) < 0) + goto nla_put_failure; + if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(skb->tstamp); @@ -911,12 +969,18 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; +static const struct nla_policy nfqa_vlan_policy[NFQA_VLAN_MAX + 1] = { + [NFQA_VLAN_TCI] = { .type = NLA_U16}, + [NFQA_VLAN_PROTO] = { .type = NLA_U16}, +}; + static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, [NFQA_MARK] = { .type = NLA_U32 }, [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, [NFQA_CT] = { .type = NLA_UNSPEC }, [NFQA_EXP] = { .type = NLA_UNSPEC }, + [NFQA_VLAN] = { .type = NLA_NESTED }, }; static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { @@ -1030,6 +1094,40 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, return ct; } +static int nfqa_parse_bridge(struct nf_queue_entry *entry, + const struct nlattr * const nfqa[]) +{ + if (nfqa[NFQA_VLAN]) { + struct nlattr *tb[NFQA_VLAN_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN], + nfqa_vlan_policy); + if (err < 0) + return err; + + if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO]) + return -EINVAL; + + entry->skb->vlan_tci = ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])); + entry->skb->vlan_proto = nla_get_be16(tb[NFQA_VLAN_PROTO]); + } + + if (nfqa[NFQA_L2HDR]) { + int mac_header_len = entry->skb->network_header - + entry->skb->mac_header; + + if (mac_header_len != nla_len(nfqa[NFQA_L2HDR])) + return -EINVAL; + else if (mac_header_len > 0) + memcpy(skb_mac_header(entry->skb), + nla_data(nfqa[NFQA_L2HDR]), + mac_header_len); + } + + return 0; +} + static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1045,6 +1143,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; struct nfnl_queue_net *q = nfnl_queue_pernet(net); + int err; queue = instance_lookup(q, queue_num); if (!queue) @@ -1071,6 +1170,12 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo); } + if (entry->state.pf == PF_BRIDGE) { + err = nfqa_parse_bridge(entry, nfqa); + if (err < 0) + return err; + } + if (nfqa[NFQA_PAYLOAD]) { u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); int diff = payload_len - entry->skb->len; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3f9d45d3d9b7..6fa016564f90 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -192,7 +192,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; - err = rhashtable_walk_init(&priv->ht, &hti); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); iter->err = err; if (err) return; @@ -248,7 +248,7 @@ static void nft_hash_gc(struct work_struct *work) priv = container_of(work, struct nft_hash, gc_work.work); set = nft_set_container_of(priv); - err = rhashtable_walk_init(&priv->ht, &hti); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); if (err) goto schedule; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 49d14ecad444..b10ade272b50 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -120,9 +120,9 @@ xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, { switch (protocol) { case IPPROTO_TCP: - return __inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); + return inet_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, in->ifindex); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 330ebd600f25..aeefe127691a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2343,7 +2343,8 @@ static int netlink_walk_start(struct nl_seq_iter *iter) { int err; - err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); + err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti, + GFP_KERNEL); if (err) { iter->link = MAX_LINKS; return err; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 7c8b90bf0e54..2ee48e447b72 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -165,11 +165,10 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | - IFF_PHONY_HEADROOM; + IFF_PHONY_HEADROOM | IFF_NO_QUEUE; netdev->destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; - netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 18d0becbc46d..4040eb92d9c9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1837,6 +1837,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; + struct sockcm_cookie sockc; __be16 proto = 0; int err; int extra_len = 0; @@ -1925,12 +1926,21 @@ retry: goto out_unlock; } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_unlock; + } + } + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2042,6 +2052,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; + bool is_drop_n_account = false; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -2130,6 +2141,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: + is_drop_n_account = true; spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_drops++; atomic_inc(&sk->sk_drops); @@ -2141,7 +2153,10 @@ drop_n_restore: skb->len = skb_len; } drop: - consume_skb(skb); + if (!is_drop_n_account) + consume_skb(skb); + else + kfree_skb(skb); return 0; } @@ -2160,6 +2175,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sk_buff *copy_skb = NULL; struct timespec ts; __u32 ts_status; + bool is_drop_n_account = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing @@ -2367,10 +2383,14 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb(skb); + if (!is_drop_n_account) + consume_skb(skb); + else + kfree_skb(skb); return 0; drop_n_account: + is_drop_n_account = true; po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); @@ -2486,7 +2506,8 @@ static int packet_snd_vnet_gso(struct sk_buff *skb, static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, - __be16 proto, unsigned char *addr, int hlen, int copylen) + __be16 proto, unsigned char *addr, int hlen, int copylen, + const struct sockcm_cookie *sockc) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; @@ -2500,7 +2521,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; - sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; skb_reserve(skb, hlen); @@ -2624,6 +2645,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; + struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; void *ph; @@ -2655,6 +2677,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(&po->sk, msg, &sockc); + if (unlikely(err)) + goto out; + } + err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -2712,7 +2741,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, - addr, hlen, copylen); + addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && !po->has_vnet_hdr && @@ -2851,6 +2880,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + sockc.tsflags = 0; sockc.mark = sk->sk_mark; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); @@ -2908,7 +2938,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 03f26e3a6f48..884027f62783 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1141,6 +1141,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, { struct rfkill *rfkill; struct rfkill_event ev; + int ret; /* we don't need the 'hard' variable but accept it */ if (count < RFKILL_EVENT_SIZE_V1 - 1) @@ -1155,29 +1156,36 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, if (copy_from_user(&ev, buf, count)) return -EFAULT; - if (ev.op != RFKILL_OP_CHANGE && ev.op != RFKILL_OP_CHANGE_ALL) - return -EINVAL; - if (ev.type >= NUM_RFKILL_TYPES) return -EINVAL; mutex_lock(&rfkill_global_mutex); - if (ev.op == RFKILL_OP_CHANGE_ALL) + switch (ev.op) { + case RFKILL_OP_CHANGE_ALL: rfkill_update_global_state(ev.type, ev.soft); - - list_for_each_entry(rfkill, &rfkill_list, node) { - if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL) - continue; - - if (rfkill->type != ev.type && ev.type != RFKILL_TYPE_ALL) - continue; - - rfkill_set_block(rfkill, ev.soft); + list_for_each_entry(rfkill, &rfkill_list, node) + if (rfkill->type == ev.type || + ev.type == RFKILL_TYPE_ALL) + rfkill_set_block(rfkill, ev.soft); + ret = 0; + break; + case RFKILL_OP_CHANGE: + list_for_each_entry(rfkill, &rfkill_list, node) + if (rfkill->idx == ev.idx && + (rfkill->type == ev.type || + ev.type == RFKILL_TYPE_ALL)) + rfkill_set_block(rfkill, ev.soft); + ret = 0; + break; + default: + ret = -EINVAL; + break; } + mutex_unlock(&rfkill_global_mutex); - return count; + return ret ?: count; } static int rfkill_fop_release(struct inode *inode, struct file *file) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 23dcef12b986..784c53163b7b 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -30,7 +30,7 @@ config AF_RXRPC_DEBUG config RXKAD - tristate "RxRPC Kerberos security" + bool "RxRPC Kerberos security" depends on AF_RXRPC select CRYPTO select CRYPTO_MANAGER diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index ec126f91276b..e05a06ef2254 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -18,11 +18,12 @@ af-rxrpc-y := \ ar-recvmsg.o \ ar-security.o \ ar-skbuff.o \ - ar-transport.o + ar-transport.o \ + insecure.o \ + misc.o af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o +af-rxrpc-$(CONFIG_RXKAD) += rxkad.o af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o - -obj-$(CONFIG_RXKAD) += rxkad.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9d935fa5a2a9..e45e94ca030f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -806,6 +806,12 @@ static int __init af_rxrpc_init(void) goto error_work_queue; } + ret = rxrpc_init_security(); + if (ret < 0) { + printk(KERN_CRIT "RxRPC: Cannot initialise security\n"); + goto error_security; + } + ret = proto_register(&rxrpc_proto, 1); if (ret < 0) { printk(KERN_CRIT "RxRPC: Cannot register protocol\n"); @@ -853,6 +859,8 @@ error_sock: proto_unregister(&rxrpc_proto); error_proto: destroy_workqueue(rxrpc_workqueue); +error_security: + rxrpc_exit_security(); error_work_queue: kmem_cache_destroy(rxrpc_call_jar); error_call_jar: @@ -883,6 +891,7 @@ static void __exit af_rxrpc_exit(void) remove_proc_entry("rxrpc_conns", init_net.proc_net); remove_proc_entry("rxrpc_calls", init_net.proc_net); destroy_workqueue(rxrpc_workqueue); + rxrpc_exit_security(); kmem_cache_destroy(rxrpc_call_jar); _leave(""); } diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c index 277731a5e67a..e7a7f05f13e2 100644 --- a/net/rxrpc/ar-accept.c +++ b/net/rxrpc/ar-accept.c @@ -108,7 +108,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, goto error; } - conn = rxrpc_incoming_connection(trans, &sp->hdr, GFP_NOIO); + conn = rxrpc_incoming_connection(trans, &sp->hdr); rxrpc_put_transport(trans); if (IS_ERR(conn)) { _debug("no conn"); @@ -116,7 +116,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, goto error; } - call = rxrpc_incoming_call(rx, conn, &sp->hdr, GFP_NOIO); + call = rxrpc_incoming_call(rx, conn, &sp->hdr); rxrpc_put_connection(conn); if (IS_ERR(call)) { _debug("no call"); diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index 16d967075eaf..374478e006e7 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -20,74 +20,6 @@ #include "ar-internal.h" /* - * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). - */ -unsigned int rxrpc_requested_ack_delay = 1; - -/* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). - * - * We use this when we've received new data packets. If those packets aren't - * all consumed within this time we will send a DELAY ACK if an ACK was not - * requested to let the sender know it doesn't need to resend. - */ -unsigned int rxrpc_soft_ack_delay = 1 * HZ; - -/* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). - * - * We use this when we've consumed some previously soft-ACK'd packets when - * further packets aren't immediately received to decide when to send an IDLE - * ACK let the other end know that it can free up its Tx buffer space. - */ -unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; - -/* - * Receive window size in packets. This indicates the maximum number of - * unconsumed received packets we're willing to retain in memory. Once this - * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further - * packets. - */ -unsigned int rxrpc_rx_window_size = 32; - -/* - * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet - * made by gluing normal packets together that we're willing to handle. - */ -unsigned int rxrpc_rx_mtu = 5692; - -/* - * The maximum number of fragments in a received jumbo packet that we tell the - * sender that we're willing to handle. - */ -unsigned int rxrpc_rx_jumbo_max = 4; - -static const char *rxrpc_acks(u8 reason) -{ - static const char *const str[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", - "IDL", "-?-" - }; - - if (reason >= ARRAY_SIZE(str)) - reason = ARRAY_SIZE(str) - 1; - return str[reason]; -} - -static const s8 rxrpc_ack_priority[] = { - [0] = 0, - [RXRPC_ACK_DELAY] = 1, - [RXRPC_ACK_REQUESTED] = 2, - [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, -}; - -/* * propose an ACK be sent */ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, @@ -426,7 +358,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) int tail = call->acks_tail, old_tail; int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); - kenter("{%u,%u},%u", call->acks_hard, win, hard); + _enter("{%u,%u},%u", call->acks_hard, win, hard); ASSERTCMP(hard - call->acks_hard, <=, win); @@ -656,7 +588,8 @@ process_further: _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); /* secured packets must be verified and possibly decrypted */ - if (rxrpc_verify_packet(call, skb, _abort_code) < 0) + if (call->conn->security->verify_packet(call, skb, + _abort_code) < 0) goto protocol_error; rxrpc_insert_oos_packet(call, skb); @@ -901,8 +834,8 @@ void rxrpc_process_call(struct work_struct *work) /* there's a good chance we're going to have to send a message, so set * one up in advance */ - msg.msg_name = &call->conn->trans->peer->srx.transport.sin; - msg.msg_namelen = sizeof(call->conn->trans->peer->srx.transport.sin); + msg.msg_name = &call->conn->trans->peer->srx.transport; + msg.msg_namelen = call->conn->trans->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -973,7 +906,7 @@ void rxrpc_process_call(struct work_struct *work) ECONNABORTED, true) < 0) goto no_mem; whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->abort_code); + data = htonl(call->local_abort); iov[1].iov_base = &data; iov[1].iov_len = sizeof(data); genbit = RXRPC_CALL_EV_ABORT; @@ -1036,7 +969,7 @@ void rxrpc_process_call(struct work_struct *work) write_lock_bh(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_CALL_TIMEOUT; + call->local_abort = RX_CALL_TIMEOUT; set_bit(RXRPC_CALL_EV_ABORT, &call->events); } write_unlock_bh(&call->state_lock); diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c index 7c8d300ade9b..571a41fd5a32 100644 --- a/net/rxrpc/ar-call.c +++ b/net/rxrpc/ar-call.c @@ -411,18 +411,17 @@ found_extant_second: */ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_connection *conn, - struct rxrpc_host_header *hdr, - gfp_t gfp) + struct rxrpc_host_header *hdr) { struct rxrpc_call *call, *candidate; struct rb_node **p, *parent; u32 call_id; - _enter(",%d,,%x", conn->debug_id, gfp); + _enter(",%d", conn->debug_id); ASSERT(rx != NULL); - candidate = rxrpc_alloc_call(gfp); + candidate = rxrpc_alloc_call(GFP_NOIO); if (!candidate) return ERR_PTR(-EBUSY); @@ -682,7 +681,7 @@ void rxrpc_release_call(struct rxrpc_call *call) call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { _debug("+++ ABORTING STATE %d +++\n", call->state); call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_CALL_DEAD; + call->local_abort = RX_CALL_DEAD; set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -758,7 +757,7 @@ static void rxrpc_mark_call_released(struct rxrpc_call *call) if (call->state < RXRPC_CALL_COMPLETE) { _debug("abort call %p", call); call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_CALL_DEAD; + call->local_abort = RX_CALL_DEAD; if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) sched = true; } diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 9942da1edbf6..97f4fae74bca 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -207,6 +207,7 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) INIT_LIST_HEAD(&conn->bundle_link); conn->calls = RB_ROOT; skb_queue_head_init(&conn->rx_queue); + conn->security = &rxrpc_no_security; rwlock_init(&conn->lock); spin_lock_init(&conn->state_lock); atomic_set(&conn->usage, 1); @@ -564,8 +565,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, candidate->debug_id, candidate->trans->debug_id); rxrpc_assign_connection_id(candidate); - if (candidate->security) - candidate->security->prime_packet_security(candidate); + candidate->security->prime_packet_security(candidate); /* leave the candidate lurking in zombie mode attached to the * bundle until we're ready for it */ @@ -619,8 +619,7 @@ interrupted: */ struct rxrpc_connection * rxrpc_incoming_connection(struct rxrpc_transport *trans, - struct rxrpc_host_header *hdr, - gfp_t gfp) + struct rxrpc_host_header *hdr) { struct rxrpc_connection *conn, *candidate = NULL; struct rb_node *p, **pp; @@ -659,7 +658,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans, /* not yet present - create a candidate for a new record and then * redo the search */ - candidate = rxrpc_alloc_connection(gfp); + candidate = rxrpc_alloc_connection(GFP_NOIO); if (!candidate) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); @@ -831,7 +830,10 @@ static void rxrpc_destroy_connection(struct rxrpc_connection *conn) ASSERT(RB_EMPTY_ROOT(&conn->calls)); rxrpc_purge_queue(&conn->rx_queue); - rxrpc_clear_conn_security(conn); + conn->security->clear(conn); + key_put(conn->key); + key_put(conn->server_key); + rxrpc_put_transport(conn->trans); kfree(conn); _leave(""); diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c index 1bdaaed8cdc4..5f9563968a5b 100644 --- a/net/rxrpc/ar-connevent.c +++ b/net/rxrpc/ar-connevent.c @@ -40,11 +40,13 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, write_lock(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE) { call->state = state; - call->abort_code = abort_code; - if (state == RXRPC_CALL_LOCALLY_ABORTED) + if (state == RXRPC_CALL_LOCALLY_ABORTED) { + call->local_abort = conn->local_abort; set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - else + } else { + call->remote_abort = conn->remote_abort; set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); + } rxrpc_queue_call(call); } write_unlock(&call->state_lock); @@ -84,8 +86,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code); - msg.msg_name = &conn->trans->peer->srx.transport.sin; - msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin); + msg.msg_name = &conn->trans->peer->srx.transport; + msg.msg_namelen = conn->trans->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -101,7 +103,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); - word = htonl(abort_code); + word = htonl(conn->local_abort); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -112,7 +114,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, abort_code); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len); if (ret < 0) { @@ -172,15 +174,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: - if (conn->security) - return conn->security->respond_to_challenge( - conn, skb, _abort_code); - return -EPROTO; + return conn->security->respond_to_challenge(conn, skb, + _abort_code); case RXRPC_PACKET_TYPE_RESPONSE: - if (!conn->security) - return -EPROTO; - ret = conn->security->verify_response(conn, skb, _abort_code); if (ret < 0) return ret; @@ -236,8 +233,6 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) } } - ASSERT(conn->security != NULL); - if (conn->security->issue_challenge(conn) < 0) { abort_code = RX_CALL_DEAD; ret = -ENOMEM; diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 63ed75c40e29..01e038146b7c 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -25,12 +25,6 @@ #include <net/net_namespace.h> #include "ar-internal.h" -const char *rxrpc_pkts[] = { - "?00", - "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", - "?09", "?10", "?11", "?12", "VERSION", "?14", "?15" -}; - /* * queue a packet for recvmsg to pass to userspace * - the caller must hold a lock on call->lock @@ -199,7 +193,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, /* if the packet need security things doing to it, then it goes down * the slow path */ - if (call->conn->security) + if (call->conn->security_ix) goto enqueue_packet; sp->call = call; @@ -355,7 +349,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) write_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_REMOTELY_ABORTED; - call->abort_code = abort_code; + call->remote_abort = abort_code; set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); rxrpc_queue_call(call); } @@ -428,7 +422,7 @@ protocol_error: protocol_error_locked: if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_PROTOCOL_ERROR; + call->local_abort = RX_PROTOCOL_ERROR; set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -500,7 +494,7 @@ protocol_error: write_lock_bh(&call->state_lock); if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = RX_PROTOCOL_ERROR; + call->local_abort = RX_PROTOCOL_ERROR; set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -612,9 +606,9 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, sizeof(struct udphdr), &whdr, sizeof(whdr)) < 0) + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return -EBADMSG; - if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(whdr))) + if (!pskb_pull(skb, sizeof(whdr))) BUG(); memset(sp, 0, sizeof(*sp)); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index cd6cdbe87125..f0b807a163fa 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <net/sock.h> #include <rxrpc/packet.h> #if 0 @@ -124,11 +125,15 @@ enum rxrpc_command { * RxRPC security module interface */ struct rxrpc_security { - struct module *owner; /* providing module */ - struct list_head link; /* link in master list */ const char *name; /* name of this service */ u8 security_index; /* security type provided */ + /* Initialise a security service */ + int (*init)(void); + + /* Clean up a security service */ + void (*exit)(void); + /* initialise a connection's security */ int (*init_connection_security)(struct rxrpc_connection *); @@ -268,7 +273,7 @@ struct rxrpc_connection { struct rb_root calls; /* calls on this connection */ struct sk_buff_head rx_queue; /* received conn-level packets */ struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* channels (active calls) */ - struct rxrpc_security *security; /* applied security module */ + const struct rxrpc_security *security; /* applied security module */ struct key *key; /* security for this connection (client) */ struct key *server_key; /* security for this service */ struct crypto_skcipher *cipher; /* encryption handle */ @@ -289,7 +294,9 @@ struct rxrpc_connection { RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */ RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */ } state; - int error; /* error code for local abort */ + u32 local_abort; /* local abort code */ + u32 remote_abort; /* remote abort code */ + int error; /* local error incurred */ int debug_id; /* debug ID for printks */ unsigned int call_counter; /* call ID counter */ atomic_t serial; /* packet serial number counter */ @@ -399,7 +406,9 @@ struct rxrpc_call { rwlock_t state_lock; /* lock for state transition */ atomic_t usage; atomic_t sequence; /* Tx data packet sequence counter */ - u32 abort_code; /* local/remote abort code */ + u32 local_abort; /* local abort code */ + u32 remote_abort; /* remote abort code */ + int error; /* local error incurred */ enum rxrpc_call_state state : 8; /* current state of call */ int debug_id; /* debug ID for printks */ u8 channel; /* connection channel occupied by this call */ @@ -453,7 +462,7 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code) { write_lock_bh(&call->state_lock); if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; + call->local_abort = abort_code; call->state = RXRPC_CALL_LOCALLY_ABORTED; set_bit(RXRPC_CALL_EV_ABORT, &call->events); } @@ -478,13 +487,6 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * ar-ack.c */ -extern unsigned int rxrpc_requested_ack_delay; -extern unsigned int rxrpc_soft_ack_delay; -extern unsigned int rxrpc_idle_ack_delay; -extern unsigned int rxrpc_rx_window_size; -extern unsigned int rxrpc_rx_mtu; -extern unsigned int rxrpc_rx_jumbo_max; - void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); void rxrpc_process_call(struct work_struct *); @@ -506,7 +508,7 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, unsigned long, int, gfp_t); struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_connection *, - struct rxrpc_host_header *, gfp_t); + struct rxrpc_host_header *); struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long); void rxrpc_release_call(struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); @@ -531,8 +533,7 @@ void __exit rxrpc_destroy_all_connections(void); struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, struct rxrpc_host_header *); extern struct rxrpc_connection * -rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *, - gfp_t); +rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *); /* * ar-connevent.c @@ -550,8 +551,6 @@ void rxrpc_UDP_error_handler(struct work_struct *); /* * ar-input.c */ -extern const char *rxrpc_pkts[]; - void rxrpc_data_ready(struct sock *); int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); @@ -610,14 +609,10 @@ int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * ar-security.c */ -int rxrpc_register_security(struct rxrpc_security *); -void rxrpc_unregister_security(struct rxrpc_security *); +int __init rxrpc_init_security(void); +void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); int rxrpc_init_server_conn_security(struct rxrpc_connection *); -int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *, size_t, - void *); -int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *, u32 *); -void rxrpc_clear_conn_security(struct rxrpc_connection *); /* * ar-skbuff.c @@ -637,6 +632,33 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, struct rxrpc_peer *); /* + * insecure.c + */ +extern const struct rxrpc_security rxrpc_no_security; + +/* + * misc.c + */ +extern unsigned int rxrpc_requested_ack_delay; +extern unsigned int rxrpc_soft_ack_delay; +extern unsigned int rxrpc_idle_ack_delay; +extern unsigned int rxrpc_rx_window_size; +extern unsigned int rxrpc_rx_mtu; +extern unsigned int rxrpc_rx_jumbo_max; + +extern const char *const rxrpc_pkts[]; +extern const s8 rxrpc_ack_priority[]; + +extern const char *rxrpc_acks(u8 reason); + +/* + * rxkad.c + */ +#ifdef CONFIG_RXKAD +extern const struct rxrpc_security rxkad; +#endif + +/* * sysctl.c */ #ifdef CONFIG_SYSCTL diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index d36fb6e1a29c..51cb10062a8d 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -110,7 +110,7 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) if (call->state <= RXRPC_CALL_COMPLETE) { call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->abort_code = abort_code; + call->local_abort = abort_code; set_bit(RXRPC_CALL_EV_ABORT, &call->events); del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); @@ -663,7 +663,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, size_t pad; /* pad out if we're using security */ - if (conn->security) { + if (conn->security_ix) { pad = conn->security_size + skb->mark; pad = conn->size_align - pad; pad &= conn->size_align - 1; @@ -695,7 +695,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (more && seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; - ret = rxrpc_secure_packet( + ret = conn->security->secure_packet( call, skb, skb->mark, skb->head + sizeof(struct rxrpc_wire_header)); if (ret < 0) diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 525b2ba5a8f4..225163bc658d 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -80,7 +80,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->conn->in_clientflag ? "Svc" : "Clt", atomic_read(&call->usage), rxrpc_call_states[call->state], - call->abort_code, + call->remote_abort ?: call->local_abort, call->user_call_ID); return 0; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 64facba24a45..160f0927aa3e 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -288,7 +288,11 @@ receive_non_data_message: ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); break; case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->abort_code; + abort_code = call->remote_abort; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); + break; + case RXRPC_SKB_MARK_LOCAL_ABORT: + abort_code = call->local_abort; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); break; case RXRPC_SKB_MARK_NET_ERROR: @@ -303,6 +307,7 @@ receive_non_data_message: &abort_code); break; default: + pr_err("RxRPC: Unknown packet mark %u\n", skb->mark); BUG(); break; } @@ -401,9 +406,14 @@ u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_REMOTE_ABORT); - - return sp->call->abort_code; + switch (skb->mark) { + case RXRPC_SKB_MARK_REMOTE_ABORT: + return sp->call->remote_abort; + case RXRPC_SKB_MARK_LOCAL_ABORT: + return sp->call->local_abort; + default: + BUG(); + } } EXPORT_SYMBOL(rxrpc_kernel_get_abort_code); diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c index ceff6394a65f..d223253b22fa 100644 --- a/net/rxrpc/ar-security.c +++ b/net/rxrpc/ar-security.c @@ -22,109 +22,60 @@ static LIST_HEAD(rxrpc_security_methods); static DECLARE_RWSEM(rxrpc_security_sem); -/* - * get an RxRPC security module - */ -static struct rxrpc_security *rxrpc_security_get(struct rxrpc_security *sec) -{ - return try_module_get(sec->owner) ? sec : NULL; -} - -/* - * release an RxRPC security module - */ -static void rxrpc_security_put(struct rxrpc_security *sec) +static const struct rxrpc_security *rxrpc_security_types[] = { + [RXRPC_SECURITY_NONE] = &rxrpc_no_security, +#ifdef CONFIG_RXKAD + [RXRPC_SECURITY_RXKAD] = &rxkad, +#endif +}; + +int __init rxrpc_init_security(void) { - module_put(sec->owner); -} - -/* - * look up an rxrpc security module - */ -static struct rxrpc_security *rxrpc_security_lookup(u8 security_index) -{ - struct rxrpc_security *sec = NULL; - - _enter(""); + int i, ret; - down_read(&rxrpc_security_sem); - - list_for_each_entry(sec, &rxrpc_security_methods, link) { - if (sec->security_index == security_index) { - if (unlikely(!rxrpc_security_get(sec))) - break; - goto out; + for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) { + if (rxrpc_security_types[i]) { + ret = rxrpc_security_types[i]->init(); + if (ret < 0) + goto failed; } } - sec = NULL; -out: - up_read(&rxrpc_security_sem); - _leave(" = %p [%s]", sec, sec ? sec->name : ""); - return sec; + return 0; + +failed: + for (i--; i >= 0; i--) + if (rxrpc_security_types[i]) + rxrpc_security_types[i]->exit(); + return ret; } -/** - * rxrpc_register_security - register an RxRPC security handler - * @sec: security module - * - * register an RxRPC security handler for use by RxRPC - */ -int rxrpc_register_security(struct rxrpc_security *sec) +void rxrpc_exit_security(void) { - struct rxrpc_security *psec; - int ret; + int i; - _enter(""); - down_write(&rxrpc_security_sem); - - ret = -EEXIST; - list_for_each_entry(psec, &rxrpc_security_methods, link) { - if (psec->security_index == sec->security_index) - goto out; - } - - list_add(&sec->link, &rxrpc_security_methods); - - printk(KERN_NOTICE "RxRPC: Registered security type %d '%s'\n", - sec->security_index, sec->name); - ret = 0; - -out: - up_write(&rxrpc_security_sem); - _leave(" = %d", ret); - return ret; + for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) + if (rxrpc_security_types[i]) + rxrpc_security_types[i]->exit(); } -EXPORT_SYMBOL_GPL(rxrpc_register_security); - -/** - * rxrpc_unregister_security - unregister an RxRPC security handler - * @sec: security module - * - * unregister an RxRPC security handler +/* + * look up an rxrpc security module */ -void rxrpc_unregister_security(struct rxrpc_security *sec) +static const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) { - - _enter(""); - down_write(&rxrpc_security_sem); - list_del_init(&sec->link); - up_write(&rxrpc_security_sem); - - printk(KERN_NOTICE "RxRPC: Unregistered security type %d '%s'\n", - sec->security_index, sec->name); + if (security_index >= ARRAY_SIZE(rxrpc_security_types)) + return NULL; + return rxrpc_security_types[security_index]; } -EXPORT_SYMBOL_GPL(rxrpc_unregister_security); - /* * initialise the security on a client connection */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { + const struct rxrpc_security *sec; struct rxrpc_key_token *token; - struct rxrpc_security *sec; struct key *key = conn->key; int ret; @@ -148,8 +99,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) ret = conn->security->init_connection_security(conn); if (ret < 0) { - rxrpc_security_put(conn->security); - conn->security = NULL; + conn->security = &rxrpc_no_security; return ret; } @@ -162,7 +112,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) */ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) { - struct rxrpc_security *sec; + const struct rxrpc_security *sec; struct rxrpc_local *local = conn->trans->local; struct rxrpc_sock *rx; struct key *key; @@ -188,14 +138,12 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) /* the service appears to have died */ read_unlock_bh(&local->services_lock); - rxrpc_security_put(sec); _leave(" = -ENOENT"); return -ENOENT; found_service: if (!rx->securities) { read_unlock_bh(&local->services_lock); - rxrpc_security_put(sec); _leave(" = -ENOKEY"); return -ENOKEY; } @@ -205,7 +153,6 @@ found_service: &key_type_rxrpc_s, kdesc); if (IS_ERR(kref)) { read_unlock_bh(&local->services_lock); - rxrpc_security_put(sec); _leave(" = %ld [search]", PTR_ERR(kref)); return PTR_ERR(kref); } @@ -219,46 +166,3 @@ found_service: _leave(" = 0"); return 0; } - -/* - * secure a packet prior to transmission - */ -int rxrpc_secure_packet(const struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) -{ - if (call->conn->security) - return call->conn->security->secure_packet( - call, skb, data_size, sechdr); - return 0; -} - -/* - * secure a packet prior to transmission - */ -int rxrpc_verify_packet(const struct rxrpc_call *call, struct sk_buff *skb, - u32 *_abort_code) -{ - if (call->conn->security) - return call->conn->security->verify_packet( - call, skb, _abort_code); - return 0; -} - -/* - * clear connection security - */ -void rxrpc_clear_conn_security(struct rxrpc_connection *conn) -{ - _enter("{%d}", conn->debug_id); - - if (conn->security) { - conn->security->clear(conn); - rxrpc_security_put(conn->security); - conn->security = NULL; - } - - key_put(conn->key); - key_put(conn->server_key); -} diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c new file mode 100644 index 000000000000..e571403613c1 --- /dev/null +++ b/net/rxrpc/insecure.c @@ -0,0 +1,83 @@ +/* Null security operations. + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +static int none_init_connection_security(struct rxrpc_connection *conn) +{ + return 0; +} + +static void none_prime_packet_security(struct rxrpc_connection *conn) +{ +} + +static int none_secure_packet(const struct rxrpc_call *call, + struct sk_buff *skb, + size_t data_size, + void *sechdr) +{ + return 0; +} + +static int none_verify_packet(const struct rxrpc_call *call, + struct sk_buff *skb, + u32 *_abort_code) +{ + return 0; +} + +static int none_respond_to_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + *_abort_code = RX_PROTOCOL_ERROR; + return -EPROTO; +} + +static int none_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + *_abort_code = RX_PROTOCOL_ERROR; + return -EPROTO; +} + +static void none_clear(struct rxrpc_connection *conn) +{ +} + +static int none_init(void) +{ + return 0; +} + +static void none_exit(void) +{ +} + +/* + * RxRPC Kerberos-based security + */ +const struct rxrpc_security rxrpc_no_security = { + .name = "none", + .security_index = RXRPC_SECURITY_NONE, + .init = none_init, + .exit = none_exit, + .init_connection_security = none_init_connection_security, + .prime_packet_security = none_prime_packet_security, + .secure_packet = none_secure_packet, + .verify_packet = none_verify_packet, + .respond_to_challenge = none_respond_to_challenge, + .verify_response = none_verify_response, + .clear = none_clear, +}; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c new file mode 100644 index 000000000000..1afe9876e79f --- /dev/null +++ b/net/rxrpc/misc.c @@ -0,0 +1,89 @@ +/* Miscellaneous bits + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +/* + * How long to wait before scheduling ACK generation after seeing a + * packet with RXRPC_REQUEST_ACK set (in jiffies). + */ +unsigned int rxrpc_requested_ack_delay = 1; + +/* + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * + * We use this when we've received new data packets. If those packets aren't + * all consumed within this time we will send a DELAY ACK if an ACK was not + * requested to let the sender know it doesn't need to resend. + */ +unsigned int rxrpc_soft_ack_delay = 1 * HZ; + +/* + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * + * We use this when we've consumed some previously soft-ACK'd packets when + * further packets aren't immediately received to decide when to send an IDLE + * ACK let the other end know that it can free up its Tx buffer space. + */ +unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; + +/* + * Receive window size in packets. This indicates the maximum number of + * unconsumed received packets we're willing to retain in memory. Once this + * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further + * packets. + */ +unsigned int rxrpc_rx_window_size = 32; + +/* + * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet + * made by gluing normal packets together that we're willing to handle. + */ +unsigned int rxrpc_rx_mtu = 5692; + +/* + * The maximum number of fragments in a received jumbo packet that we tell the + * sender that we're willing to handle. + */ +unsigned int rxrpc_rx_jumbo_max = 4; + +const char *const rxrpc_pkts[] = { + "?00", + "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", + "?09", "?10", "?11", "?12", "VERSION", "?14", "?15" +}; + +const s8 rxrpc_ack_priority[] = { + [0] = 0, + [RXRPC_ACK_DELAY] = 1, + [RXRPC_ACK_REQUESTED] = 2, + [RXRPC_ACK_IDLE] = 3, + [RXRPC_ACK_PING_RESPONSE] = 4, + [RXRPC_ACK_DUPLICATE] = 5, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, + [RXRPC_ACK_EXCEEDS_WINDOW] = 7, + [RXRPC_ACK_NOSPACE] = 8, +}; + +const char *rxrpc_acks(u8 reason) +{ + static const char *const str[] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" + }; + + if (reason >= ARRAY_SIZE(str)) + reason = ARRAY_SIZE(str) - 1; + return str[reason]; +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index f0aeb8163688..6b726a046a7d 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -20,7 +20,6 @@ #include <net/sock.h> #include <net/af_rxrpc.h> #include <keys/rxrpc-type.h> -#define rxrpc_debug rxkad_debug #include "ar-internal.h" #define RXKAD_VERSION 2 @@ -31,10 +30,6 @@ #define REALM_SZ 40 /* size of principal's auth domain */ #define SNAME_SZ 40 /* size of service name */ -unsigned int rxrpc_debug; -module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(debug, "rxkad debugging mask"); - struct rxkad_level1_hdr { __be32 data_size; /* true data size (excluding padding) */ }; @@ -44,10 +39,6 @@ struct rxkad_level2_hdr { __be32 checksum; /* decrypted data checksum */ }; -MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos 4)"); -MODULE_AUTHOR("Red Hat, Inc."); -MODULE_LICENSE("GPL"); - /* * this holds a pinned cipher so that keventd doesn't get called by the cipher * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE @@ -1164,12 +1155,35 @@ static void rxkad_clear(struct rxrpc_connection *conn) } /* + * Initialise the rxkad security service. + */ +static int rxkad_init(void) +{ + /* pin the cipher we need so that the crypto layer doesn't invoke + * keventd to go get it */ + rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(rxkad_ci)) + return PTR_ERR(rxkad_ci); + return 0; +} + +/* + * Clean up the rxkad security service. + */ +static void rxkad_exit(void) +{ + if (rxkad_ci) + crypto_free_skcipher(rxkad_ci); +} + +/* * RxRPC Kerberos-based security */ -static struct rxrpc_security rxkad = { - .owner = THIS_MODULE, +const struct rxrpc_security rxkad = { .name = "rxkad", .security_index = RXRPC_SECURITY_RXKAD, + .init = rxkad_init, + .exit = rxkad_exit, .init_connection_security = rxkad_init_connection_security, .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, @@ -1179,28 +1193,3 @@ static struct rxrpc_security rxkad = { .verify_response = rxkad_verify_response, .clear = rxkad_clear, }; - -static __init int rxkad_init(void) -{ - _enter(""); - - /* pin the cipher we need so that the crypto layer doesn't invoke - * keventd to go get it */ - rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(rxkad_ci)) - return PTR_ERR(rxkad_ci); - - return rxrpc_register_security(&rxkad); -} - -module_init(rxkad_init); - -static __exit void rxkad_exit(void) -{ - _enter(""); - - rxrpc_unregister_security(&rxkad); - crypto_free_skcipher(rxkad_ci); -} - -module_exit(rxkad_exit); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index f2aabc0089da..a309a07ccb35 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -796,7 +796,7 @@ struct meta_type_ops { int (*dump)(struct sk_buff *, struct meta_value *, int); }; -static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { +static const struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { [TCF_META_TYPE_VAR] = { .destroy = meta_var_destroy, .compare = meta_var_compare, @@ -812,7 +812,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { } }; -static inline struct meta_type_ops *meta_type_ops(struct meta_value *v) +static inline const struct meta_type_ops *meta_type_ops(struct meta_value *v) { return &__meta_type_ops[meta_type(v)]; } @@ -870,7 +870,7 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m, static void meta_delete(struct meta_match *meta) { if (meta) { - struct meta_type_ops *ops = meta_type_ops(&meta->lvalue); + const struct meta_type_ops *ops = meta_type_ops(&meta->lvalue); if (ops && ops->destroy) { ops->destroy(&meta->lvalue); @@ -964,7 +964,7 @@ static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em) { struct meta_match *meta = (struct meta_match *) em->data; struct tcf_meta_hdr hdr; - struct meta_type_ops *ops; + const struct meta_type_ops *ops; memset(&hdr, 0, sizeof(hdr)); memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left)); diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 71c1a598d9bc..d9c04dc1b3f3 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -99,5 +99,9 @@ config SCTP_COOKIE_HMAC_SHA1 select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 +config INET_SCTP_DIAG + depends on INET_DIAG + def_tristate INET_DIAG + endif # IP_SCTP diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 3b4ffb021cf1..0fca5824ad0e 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_IP_SCTP) += sctp.o obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o +obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 7e8a16c77039..b335ffcef0b9 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -163,6 +163,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->singleton = 1; ch = (sctp_chunkhdr_t *) chunk->skb->data; chunk->data_accepted = 0; + + if (chunk->asoc) + sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); } chunk->chunk_hdr = ch; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5cfac8d5d3b3..4cb5aedfe3ee 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -280,82 +280,38 @@ void sctp_eps_proc_exit(struct net *net) struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; + int start_fail; }; -static struct sctp_transport *sctp_transport_get_next(struct seq_file *seq) +static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - struct sctp_transport *t; - - t = rhashtable_walk_next(&iter->hti); - for (; t; t = rhashtable_walk_next(&iter->hti)) { - if (IS_ERR(t)) { - if (PTR_ERR(t) == -EAGAIN) - continue; - break; - } + int err = sctp_transport_walk_start(&iter->hti); - if (net_eq(sock_net(t->asoc->base.sk), seq_file_net(seq)) && - t->asoc->peer.primary_path == t) - break; + if (err) { + iter->start_fail = 1; + return ERR_PTR(err); } - return t; + return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); } -static struct sctp_transport *sctp_transport_get_idx(struct seq_file *seq, - loff_t pos) -{ - void *obj = SEQ_START_TOKEN; - - while (pos && (obj = sctp_transport_get_next(seq)) && !IS_ERR(obj)) - pos--; - - return obj; -} - -static int sctp_transport_walk_start(struct seq_file *seq) +static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; - int err; - - err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti); - if (err) - return err; - - err = rhashtable_walk_start(&iter->hti); - return err == -EAGAIN ? 0 : err; + if (iter->start_fail) + return; + sctp_transport_walk_stop(&iter->hti); } -static void sctp_transport_walk_stop(struct seq_file *seq) +static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - rhashtable_walk_stop(&iter->hti); - rhashtable_walk_exit(&iter->hti); -} - -static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) -{ - int err = sctp_transport_walk_start(seq); - - if (err) - return ERR_PTR(err); - - return sctp_transport_get_idx(seq, *pos); -} - -static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) -{ - sctp_transport_walk_stop(seq); -} - -static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ ++*pos; - return sctp_transport_get_next(seq); + return sctp_transport_get_next(seq_file_net(seq), &iter->hti); } /* Display sctp associations (/proc/net/sctp/assocs). */ @@ -416,9 +372,9 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations sctp_assoc_ops = { - .start = sctp_assocs_seq_start, - .next = sctp_assocs_seq_next, - .stop = sctp_assocs_seq_stop, + .start = sctp_transport_seq_start, + .next = sctp_transport_seq_next, + .stop = sctp_transport_seq_stop, .show = sctp_assocs_seq_show, }; @@ -455,28 +411,6 @@ void sctp_assocs_proc_exit(struct net *net) remove_proc_entry("assocs", net->sctp.proc_net_sctp); } -static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) -{ - int err = sctp_transport_walk_start(seq); - - if (err) - return ERR_PTR(err); - - return sctp_transport_get_idx(seq, *pos); -} - -static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return sctp_transport_get_next(seq); -} - -static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) -{ - sctp_transport_walk_stop(seq); -} - static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; @@ -550,9 +484,9 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) } static const struct seq_operations sctp_remaddr_ops = { - .start = sctp_remaddr_seq_start, - .next = sctp_remaddr_seq_next, - .stop = sctp_remaddr_seq_stop, + .start = sctp_transport_seq_start, + .next = sctp_transport_seq_next, + .stop = sctp_transport_seq_stop, .show = sctp_remaddr_seq_show, }; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c new file mode 100644 index 000000000000..bb2d8d9608e9 --- /dev/null +++ b/net/sctp/sctp_diag.c @@ -0,0 +1,495 @@ +#include <linux/module.h> +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> +#include <net/sctp/sctp.h> + +extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, + struct sock *sk); +extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns); + +static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info); + +/* define some functions to make asoc/ep fill look clean */ +static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, + struct sock *sk, + struct sctp_association *asoc) +{ + union sctp_addr laddr, paddr; + struct dst_entry *dst; + + laddr = list_entry(asoc->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list)->a; + paddr = asoc->peer.primary_path->ipaddr; + dst = asoc->peer.primary_path->dst; + + r->idiag_family = sk->sk_family; + r->id.idiag_sport = htons(asoc->base.bind_addr.port); + r->id.idiag_dport = htons(asoc->peer.port); + r->id.idiag_if = dst ? dst->dev->ifindex : 0; + sock_diag_save_cookie(sk, r->id.idiag_cookie); + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + *(struct in6_addr *)r->id.idiag_src = laddr.v6.sin6_addr; + *(struct in6_addr *)r->id.idiag_dst = paddr.v6.sin6_addr; + } else +#endif + { + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + + r->id.idiag_src[0] = laddr.v4.sin_addr.s_addr; + r->id.idiag_dst[0] = paddr.v4.sin_addr.s_addr; + } + + r->idiag_state = asoc->state; + r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; + r->idiag_retrans = asoc->rtx_data_chunks; + r->idiag_expires = jiffies_to_msecs( + asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies); +} + +static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, + struct list_head *address_list) +{ + struct sctp_sockaddr_entry *laddr; + int addrlen = sizeof(struct sockaddr_storage); + int addrcnt = 0; + struct nlattr *attr; + void *info = NULL; + + list_for_each_entry_rcu(laddr, address_list, list) + addrcnt++; + + attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + list_for_each_entry_rcu(laddr, address_list, list) { + memcpy(info, &laddr->a, addrlen); + info += addrlen; + } + + return 0; +} + +static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb, + struct sctp_association *asoc) +{ + int addrlen = sizeof(struct sockaddr_storage); + struct sctp_transport *from; + struct nlattr *attr; + void *info = NULL; + + attr = nla_reserve(skb, INET_DIAG_PEERS, + addrlen * asoc->peer.transport_count); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + list_for_each_entry(from, &asoc->peer.transport_addr_list, + transports) { + memcpy(info, &from->ipaddr, addrlen); + info += addrlen; + } + + return 0; +} + +/* sctp asoc/ep fill*/ +static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, + struct sk_buff *skb, + const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + int portid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct list_head *addr_list; + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + int ext = req->idiag_ext; + struct sctp_infox infox; + void *info = NULL; + + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), + nlmsg_flags); + if (!nlh) + return -EMSGSIZE; + + r = nlmsg_data(nlh); + BUG_ON(!sk_fullsock(sk)); + + if (asoc) { + inet_diag_msg_sctpasoc_fill(r, sk, asoc); + } else { + inet_diag_msg_common_fill(r, sk); + r->idiag_state = sk->sk_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; + } + + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + goto errout; + + if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) { + u32 mem[SK_MEMINFO_VARS]; + int amt; + + if (asoc && asoc->ep->sndbuf_policy) + amt = asoc->sndbuf_used; + else + amt = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_WMEM_ALLOC] = amt; + mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + + if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0) + goto errout; + } + + if (ext & (1 << (INET_DIAG_INFO - 1))) { + struct nlattr *attr; + + attr = nla_reserve(skb, INET_DIAG_INFO, + sizeof(struct sctp_info)); + if (!attr) + goto errout; + + info = nla_data(attr); + } + infox.sctpinfo = (struct sctp_info *)info; + infox.asoc = asoc; + sctp_diag_get_info(sk, r, &infox); + + addr_list = asoc ? &asoc->base.bind_addr.address_list + : &ep->base.bind_addr.address_list; + if (inet_diag_msg_sctpladdrs_fill(skb, addr_list)) + goto errout; + + if (asoc && (ext & (1 << (INET_DIAG_CONG - 1)))) + if (nla_put_string(skb, INET_DIAG_CONG, "reno") < 0) + goto errout; + + if (asoc && inet_diag_msg_sctpaddrs_fill(skb, asoc)) + goto errout; + + nlmsg_end(skb, nlh); + return 0; + +errout: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* callback and param */ +struct sctp_comm_param { + struct sk_buff *skb; + struct netlink_callback *cb; + const struct inet_diag_req_v2 *r; + const struct nlmsghdr *nlh; +}; + +static size_t inet_assoc_attr_size(struct sctp_association *asoc) +{ + int addrlen = sizeof(struct sockaddr_storage); + int addrcnt = 0; + struct sctp_sockaddr_entry *laddr; + + list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, + list) + addrcnt++; + + return nla_total_size(sizeof(struct sctp_info)) + + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(addrlen * asoc->peer.transport_count) + + nla_total_size(addrlen * addrcnt) + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + 64; +} + +static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) +{ + struct sctp_association *assoc = tsp->asoc; + struct sock *sk = tsp->asoc->base.sk; + struct sctp_comm_param *commp = p; + struct sk_buff *in_skb = commp->skb; + const struct inet_diag_req_v2 *req = commp->r; + const struct nlmsghdr *nlh = commp->nlh; + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + int err; + + err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + if (err) + goto out; + + err = -ENOMEM; + rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); + if (!rep) + goto out; + + lock_sock(sk); + if (sk != assoc->base.sk) { + release_sock(sk); + sk = assoc->base.sk; + lock_sock(sk); + } + err = inet_sctp_diag_fill(sk, assoc, rep, req, + sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0, nlh); + release_sock(sk); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } + + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, + MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + return err; +} + +static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) +{ + struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct sk_buff *skb = commp->skb; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct sctp_association *assoc = + list_entry(ep->asocs.next, struct sctp_association, asocs); + int err = 0; + + /* find the ep only once through the transports by this condition */ + if (tsp->asoc != assoc) + goto out; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) + goto out; + + lock_sock(sk); + if (sk != assoc->base.sk) + goto release; + list_for_each_entry(assoc, &ep->asocs, asocs) { + if (cb->args[4] < cb->args[1]) + goto next; + + if (r->id.idiag_sport != htons(assoc->base.bind_addr.port) && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != htons(assoc->peer.port) && + r->id.idiag_dport) + goto next; + + if (!cb->args[3] && + inet_sctp_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb->nlh) < 0) { + cb->args[3] = 1; + err = 2; + goto release; + } + cb->args[3] = 1; + + if (inet_sctp_diag_fill(sk, assoc, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { + err = 2; + goto release; + } +next: + cb->args[4]++; + } + cb->args[1] = 0; + cb->args[2]++; + cb->args[3] = 0; + cb->args[4] = 0; +release: + release_sock(sk); + return err; +out: + cb->args[2]++; + return err; +} + +static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) +{ + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct sk_buff *skb = commp->skb; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct net *net = sock_net(skb->sk); + struct inet_sock *inet = inet_sk(sk); + int err = 0; + + if (!net_eq(sock_net(sk), net)) + goto out; + + if (cb->args[4] < cb->args[1]) + goto next; + + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next; + + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + + if (inet_sctp_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh) < 0) { + err = 2; + goto out; + } +next: + cb->args[4]++; +out: + return err; +} + +/* define the functions for sctp_diag_handler*/ +static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + struct sctp_infox *infox = (struct sctp_infox *)info; + + if (infox->asoc) { + r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc); + r->idiag_wqueue = infox->asoc->sndbuf_used; + } else { + r->idiag_rqueue = sk->sk_ack_backlog; + r->idiag_wqueue = sk->sk_max_ack_backlog; + } + if (infox->sctpinfo) + sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo); +} + +static int sctp_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + union sctp_addr laddr, paddr; + struct sctp_comm_param commp = { + .skb = in_skb, + .r = req, + .nlh = nlh, + }; + + if (req->sdiag_family == AF_INET) { + laddr.v4.sin_port = req->id.idiag_sport; + laddr.v4.sin_addr.s_addr = req->id.idiag_src[0]; + laddr.v4.sin_family = AF_INET; + + paddr.v4.sin_port = req->id.idiag_dport; + paddr.v4.sin_addr.s_addr = req->id.idiag_dst[0]; + paddr.v4.sin_family = AF_INET; + } else { + laddr.v6.sin6_port = req->id.idiag_sport; + memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64); + laddr.v6.sin6_family = AF_INET6; + + paddr.v6.sin6_port = req->id.idiag_dport; + memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64); + paddr.v6.sin6_family = AF_INET6; + } + + return sctp_transport_lookup_process(sctp_tsp_dump_one, + net, &laddr, &paddr, &commp); +} + +static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + u32 idiag_states = r->idiag_states; + struct net *net = sock_net(skb->sk); + struct sctp_comm_param commp = { + .skb = skb, + .cb = cb, + .r = r, + }; + + /* eps hashtable dumps + * args: + * 0 : if it will traversal listen sock + * 1 : to record the sock pos of this time's traversal + * 4 : to work as a temporary variable to traversal list + */ + if (cb->args[0] == 0) { + if (!(idiag_states & TCPF_LISTEN)) + goto skip; + if (sctp_for_each_endpoint(sctp_ep_dump, &commp)) + goto done; +skip: + cb->args[0] = 1; + cb->args[1] = 0; + cb->args[4] = 0; + } + + /* asocs by transport hashtable dump + * args: + * 1 : to record the assoc pos of this time's traversal + * 2 : to record the transport pos of this time's traversal + * 3 : to mark if we have dumped the ep info of the current asoc + * 4 : to work as a temporary variable to traversal list + */ + if (!(idiag_states & ~TCPF_LISTEN)) + goto done; + sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); +done: + cb->args[1] = cb->args[4]; + cb->args[4] = 0; +} + +static const struct inet_diag_handler sctp_diag_handler = { + .dump = sctp_diag_dump, + .dump_one = sctp_diag_dump_one, + .idiag_get_info = sctp_diag_get_info, + .idiag_type = IPPROTO_SCTP, + .idiag_info_size = sizeof(struct sctp_info), +}; + +static int __init sctp_diag_init(void) +{ + return inet_diag_register(&sctp_diag_handler); +} + +static void __exit sctp_diag_exit(void) +{ + inet_diag_unregister(&sctp_diag_handler); +} + +module_init(sctp_diag_init); +module_exit(sctp_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 41b081a64752..e8f0112f9b28 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1218,6 +1218,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_seq_t *commands, gfp_t gfp) { + struct sock *sk = ep->base.sk; + struct sctp_sock *sp = sctp_sk(sk); int error = 0; int force; sctp_cmd_t *cmd; @@ -1738,6 +1740,11 @@ out: error = sctp_outq_uncork(&asoc->outqueue, gfp); } else if (local_cork) error = sctp_outq_uncork(&asoc->outqueue, gfp); + + if (sp->pending_data_ready) { + sk->sk_data_ready(sk); + sp->pending_data_ready = 0; + } return error; nomem: error = -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 878d28eda1a6..777d0324594a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4202,6 +4202,222 @@ static void sctp_shutdown(struct sock *sk, int how) } } +int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info) +{ + struct sctp_transport *prim; + struct list_head *pos; + int mask; + + memset(info, 0, sizeof(*info)); + if (!asoc) { + struct sctp_sock *sp = sctp_sk(sk); + + info->sctpi_s_autoclose = sp->autoclose; + info->sctpi_s_adaptation_ind = sp->adaptation_ind; + info->sctpi_s_pd_point = sp->pd_point; + info->sctpi_s_nodelay = sp->nodelay; + info->sctpi_s_disable_fragments = sp->disable_fragments; + info->sctpi_s_v4mapped = sp->v4mapped; + info->sctpi_s_frag_interleave = sp->frag_interleave; + + return 0; + } + + info->sctpi_tag = asoc->c.my_vtag; + info->sctpi_state = asoc->state; + info->sctpi_rwnd = asoc->a_rwnd; + info->sctpi_unackdata = asoc->unack_data; + info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); + info->sctpi_instrms = asoc->c.sinit_max_instreams; + info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + list_for_each(pos, &asoc->base.inqueue.in_chunk_list) + info->sctpi_inqueue++; + list_for_each(pos, &asoc->outqueue.out_chunk_list) + info->sctpi_outqueue++; + info->sctpi_overall_error = asoc->overall_error_count; + info->sctpi_max_burst = asoc->max_burst; + info->sctpi_maxseg = asoc->frag_point; + info->sctpi_peer_rwnd = asoc->peer.rwnd; + info->sctpi_peer_tag = asoc->c.peer_vtag; + + mask = asoc->peer.ecn_capable << 1; + mask = (mask | asoc->peer.ipv4_address) << 1; + mask = (mask | asoc->peer.ipv6_address) << 1; + mask = (mask | asoc->peer.hostname_address) << 1; + mask = (mask | asoc->peer.asconf_capable) << 1; + mask = (mask | asoc->peer.prsctp_capable) << 1; + mask = (mask | asoc->peer.auth_capable); + info->sctpi_peer_capable = mask; + mask = asoc->peer.sack_needed << 1; + mask = (mask | asoc->peer.sack_generation) << 1; + mask = (mask | asoc->peer.zero_window_announced); + info->sctpi_peer_sack = mask; + + info->sctpi_isacks = asoc->stats.isacks; + info->sctpi_osacks = asoc->stats.osacks; + info->sctpi_opackets = asoc->stats.opackets; + info->sctpi_ipackets = asoc->stats.ipackets; + info->sctpi_rtxchunks = asoc->stats.rtxchunks; + info->sctpi_outofseqtsns = asoc->stats.outofseqtsns; + info->sctpi_idupchunks = asoc->stats.idupchunks; + info->sctpi_gapcnt = asoc->stats.gapcnt; + info->sctpi_ouodchunks = asoc->stats.ouodchunks; + info->sctpi_iuodchunks = asoc->stats.iuodchunks; + info->sctpi_oodchunks = asoc->stats.oodchunks; + info->sctpi_iodchunks = asoc->stats.iodchunks; + info->sctpi_octrlchunks = asoc->stats.octrlchunks; + info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; + + prim = asoc->peer.primary_path; + memcpy(&info->sctpi_p_address, &prim->ipaddr, + sizeof(struct sockaddr_storage)); + info->sctpi_p_state = prim->state; + info->sctpi_p_cwnd = prim->cwnd; + info->sctpi_p_srtt = prim->srtt; + info->sctpi_p_rto = jiffies_to_msecs(prim->rto); + info->sctpi_p_hbinterval = prim->hbinterval; + info->sctpi_p_pathmaxrxt = prim->pathmaxrxt; + info->sctpi_p_sackdelay = jiffies_to_msecs(prim->sackdelay); + info->sctpi_p_ssthresh = prim->ssthresh; + info->sctpi_p_partial_bytes_acked = prim->partial_bytes_acked; + info->sctpi_p_flight_size = prim->flight_size; + info->sctpi_p_error = prim->error_count; + + return 0; +} +EXPORT_SYMBOL_GPL(sctp_get_sctp_info); + +/* use callback to avoid exporting the core structure */ +int sctp_transport_walk_start(struct rhashtable_iter *iter) +{ + int err; + + err = rhashtable_walk_init(&sctp_transport_hashtable, iter, + GFP_KERNEL); + if (err) + return err; + + err = rhashtable_walk_start(iter); + if (err && err != -EAGAIN) { + rhashtable_walk_exit(iter); + return err; + } + + return 0; +} + +void sctp_transport_walk_stop(struct rhashtable_iter *iter) +{ + rhashtable_walk_stop(iter); + rhashtable_walk_exit(iter); +} + +struct sctp_transport *sctp_transport_get_next(struct net *net, + struct rhashtable_iter *iter) +{ + struct sctp_transport *t; + + t = rhashtable_walk_next(iter); + for (; t; t = rhashtable_walk_next(iter)) { + if (IS_ERR(t)) { + if (PTR_ERR(t) == -EAGAIN) + continue; + break; + } + + if (net_eq(sock_net(t->asoc->base.sk), net) && + t->asoc->peer.primary_path == t) + break; + } + + return t; +} + +struct sctp_transport *sctp_transport_get_idx(struct net *net, + struct rhashtable_iter *iter, + int pos) +{ + void *obj = SEQ_START_TOKEN; + + while (pos && (obj = sctp_transport_get_next(net, iter)) && + !IS_ERR(obj)) + pos--; + + return obj; +} + +int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), + void *p) { + int err = 0; + int hash = 0; + struct sctp_ep_common *epb; + struct sctp_hashbucket *head; + + for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize; + hash++, head++) { + read_lock(&head->lock); + sctp_for_each_hentry(epb, &head->chain) { + err = cb(sctp_ep(epb), p); + if (err) + break; + } + read_unlock(&head->lock); + } + + return err; +} +EXPORT_SYMBOL_GPL(sctp_for_each_endpoint); + +int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr, void *p) +{ + struct sctp_transport *transport; + int err = 0; + + rcu_read_lock(); + transport = sctp_addrs_lookup_transport(net, laddr, paddr); + if (!transport || !sctp_transport_hold(transport)) + goto out; + err = cb(transport, p); + sctp_transport_put(transport); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); + +int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), + struct net *net, int pos, void *p) { + struct rhashtable_iter hti; + void *obj; + int err; + + err = sctp_transport_walk_start(&hti); + if (err) + return err; + + sctp_transport_get_idx(net, &hti, pos); + obj = sctp_transport_get_next(net, &hti); + for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) { + struct sctp_transport *transport = obj; + + if (!sctp_transport_hold(transport)) + continue; + err = cb(transport, p); + sctp_transport_put(transport); + if (err) + break; + } + sctp_transport_walk_stop(&hti); + + return err; +} +EXPORT_SYMBOL_GPL(sctp_for_each_transport); + /* 7.2.1 Association Status (SCTP_STATUS) * Applications can retrieve current status information about an @@ -6430,6 +6646,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) poll_wait(file, sk_sleep(sk), wait); + sock_rps_record_flow(sk); + /* A TCP-style listening socket becomes readable when the accept queue * is not empty. */ @@ -6764,13 +6982,11 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { - spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) atomic_inc(&skb->users); - spin_unlock_bh(&sk->sk_receive_queue.lock); } else { - skb = skb_dequeue(&sk->sk_receive_queue); + skb = __skb_dequeue(&sk->sk_receive_queue); } if (skb) @@ -7186,6 +7402,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_lingertime = sk->sk_lingertime; newsk->sk_rcvtimeo = sk->sk_rcvtimeo; newsk->sk_sndtimeo = sk->sk_sndtimeo; + newsk->sk_rxhash = sk->sk_rxhash; newinet = inet_sk(newsk); diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ce469d648ffb..ec12a8920e5f 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -141,7 +141,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) */ if (!skb_queue_empty(&sp->pd_lobby)) { struct list_head *list; - sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue); + skb_queue_splice_tail_init(&sp->pd_lobby, + &sk->sk_receive_queue); list = (struct list_head *)&sctp_sk(sk)->pd_lobby; INIT_LIST_HEAD(list); return 1; @@ -252,7 +253,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * collected on a list. */ if (skb_list) - sctp_skb_list_tail(skb_list, queue); + skb_queue_splice_tail_init(skb_list, queue); else __skb_queue_tail(queue, skb); @@ -264,7 +265,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue) - sk->sk_data_ready(sk); + sctp_sk(sk)->pending_data_ready = 1; return 1; out_free: @@ -1140,5 +1141,5 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) /* If there is data waiting, send it up the socket now. */ if (sctp_ulpq_clear_pd(ulpq) || ev) - sk->sk_data_ready(sk); + sctp_sk(sk)->pending_data_ready = 1; } diff --git a/net/socket.c b/net/socket.c index 5f77a8e93830..5dbb0bbe12a7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -587,20 +587,20 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE) + if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) flags |= SKBTX_HW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) + if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) flags |= SKBTX_SW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) + if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) + if (tsflags & SOF_TIMESTAMPING_TX_ACK) flags |= SKBTX_ACK_TSTAMP; *tx_flags = flags; @@ -709,17 +709,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) + int flags) { - return sock->ops->recvmsg(sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); } -int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) { - int err = security_socket_recvmsg(sock, msg, size, flags); + int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); - return err ?: sock_recvmsg_nosec(sock, msg, size, flags); + return err ?: sock_recvmsg_nosec(sock, msg, flags); } EXPORT_SYMBOL(sock_recvmsg); @@ -746,7 +745,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); set_fs(KERNEL_DS); - result = sock_recvmsg(sock, msg, size, flags); + result = sock_recvmsg(sock, msg, flags); set_fs(oldfs); return result; } @@ -796,7 +795,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!iov_iter_count(to)) /* Match SYS5 behaviour */ return 0; - res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags); + res = sock_recvmsg(sock, &msg, msg.msg_flags); *to = msg.msg_iter; return res; } @@ -1046,7 +1045,7 @@ static int sock_fasync(int fd, struct file *filp, int on) return -EINVAL; lock_sock(sk); - wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk)); fasync_helper(fd, filp, on, &wq->fasync_list); if (!wq->fasync_list) @@ -1696,7 +1695,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); + err = sock_recvmsg(sock, &msg, flags); if (err >= 0 && addr != NULL) { err2 = move_addr_to_user(&address, @@ -2073,7 +2072,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; unsigned long cmsg_ptr; - int total_len, len; + int len; ssize_t err; /* kernel mode address */ @@ -2091,7 +2090,6 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov); if (err < 0) return err; - total_len = iov_iter_count(&msg_sys->msg_iter); cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); @@ -2101,8 +2099,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, - total_len, flags); + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags); if (err < 0) goto out_freeiov; len = err; diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index de70c78025d7..f217c348b341 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -155,7 +155,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) struct xdr_skb_reader desc; desc.skb = skb; - desc.offset = sizeof(struct udphdr); + desc.offset = 0; desc.count = skb->len - desc.offset; if (skb_csum_unnecessary(skb)) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1413cdcc131c..dadfec66dbd8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -85,8 +85,7 @@ static void svc_reclassify_socket(struct socket *sock) { struct sock *sk = sock->sk; - WARN_ON_ONCE(sock_owned_by_user(sk)); - if (sock_owned_by_user(sk)) + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) return; switch (sk->sk_family) { @@ -617,7 +616,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) svsk->sk_sk->sk_stamp = skb->tstamp; set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ - len = skb->len - sizeof(struct udphdr); + len = skb->len; rqstp->rq_arg.len = len; rqstp->rq_prot = IPPROTO_UDP; @@ -641,8 +640,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) skb_free_datagram_locked(svsk->sk_sk, skb); } else { /* we can use it in-place */ - rqstp->rq_arg.head[0].iov_base = skb->data + - sizeof(struct udphdr); + rqstp->rq_arg.head[0].iov_base = skb->data; rqstp->rq_arg.head[0].iov_len = len; if (skb_checksum_complete(skb)) goto out_free; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 65e759569e48..d0756ac5c0f2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -995,15 +995,14 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, u32 _xid; __be32 *xp; - repsize = skb->len - sizeof(struct udphdr); + repsize = skb->len; if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d!\n", repsize); return; } /* Copy the XID from the skb... */ - xp = skb_header_pointer(skb, sizeof(struct udphdr), - sizeof(_xid), &_xid); + xp = skb_header_pointer(skb, 0, sizeof(_xid), &_xid); if (xp == NULL) return; @@ -1881,8 +1880,7 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { - WARN_ON_ONCE(sock_owned_by_user(sock->sk)); - if (sock_owned_by_user(sock->sk)) + if (WARN_ON_ONCE(!sock_allow_reclassification(sock->sk))) return; switch (family) { diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 27a5406213c6..6f11c62bc8f9 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -205,6 +205,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, struct tipc_bearer *b; struct tipc_media *m; struct tipc_bearer_names b_names; + struct sk_buff *skb; char addr_string[16]; u32 bearer_id; u32 with_this_prio; @@ -301,7 +302,7 @@ restart: b->net_plane = bearer_id + 'A'; b->priority = priority; - res = tipc_disc_create(net, b, &b->bcast_addr); + res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", @@ -310,7 +311,8 @@ restart: } rcu_assign_pointer(tn->bearer_list[bearer_id], b); - + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); @@ -335,23 +337,16 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) */ static void bearer_disable(struct net *net, struct tipc_bearer *b) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - u32 i; + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); b->media->disable_media(b); - - tipc_node_delete_links(net, b->identity); + tipc_node_delete_links(net, bearer_id); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->link_req) tipc_disc_delete(b->link_req); - - for (i = 0; i < MAX_BEARERS; i++) { - if (b == rtnl_dereference(tn->bearer_list[i])) { - RCU_INIT_POINTER(tn->bearer_list[i], NULL); - break; - } - } + RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); kfree_rcu(b, rcu); } @@ -394,7 +389,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface - * @buf: the packet to be sent + * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ @@ -403,17 +398,21 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, { struct net_device *dev; int delta; + void *tipc_ptr; dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); if (!dev) return 0; + /* Send RESET message even if bearer is detached from device */ + tipc_ptr = rtnl_dereference(dev->tipc_ptr); + if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) + goto drop; + delta = dev->hard_header_len - skb_headroom(skb); if ((delta > 0) && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - return 0; - } + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) + goto drop; skb_reset_network_header(skb); skb->dev = dev; @@ -422,6 +421,9 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; +drop: + kfree_skb(skb); + return 0; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) @@ -450,6 +452,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (likely(b)) b->media->send_msg(net, skb, b, dest); + else + kfree_skb(skb); rcu_read_unlock(); } @@ -468,11 +472,11 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) { - skb_queue_walk_safe(xmitq, skb, tmp) { - __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, dst); - } + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } @@ -490,14 +494,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) { - skb_queue_walk_safe(xmitq, skb, tmp) { - hdr = buf_msg(skb); - msg_set_non_seq(hdr, 1); - msg_set_mc_netid(hdr, net_id); - __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, &b->bcast_addr); - } + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + hdr = buf_msg(skb); + msg_set_non_seq(hdr, 1); + msg_set_mc_netid(hdr, net_id); + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, &b->bcast_addr); } rcu_read_unlock(); } @@ -513,24 +517,21 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, * ignores packets sent using interface multicast, and traffic sent to other * nodes (which can happen if interface is running in promiscuous mode). */ -static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct tipc_bearer *b; rcu_read_lock(); b = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_rcv(dev_net(dev), buf, b); - rcu_read_unlock(); - return NET_RX_SUCCESS; - } + if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) { + skb->next = NULL; + tipc_rcv(dev_net(dev), skb, b); + rcu_read_unlock(); + return NET_RX_SUCCESS; } rcu_read_unlock(); - - kfree_skb(buf); + kfree_skb(skb); return NET_RX_DROP; } @@ -548,9 +549,18 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); + struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; + int i; b = rtnl_dereference(dev->tipc_ptr); + if (!b) { + for (i = 0; i < MAX_BEARERS; b = NULL, i++) { + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (b->media_ptr == dev)) + break; + } + } if (!b) return NOTIFY_DONE; @@ -560,13 +570,20 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, case NETDEV_CHANGE: if (netif_carrier_ok(dev)) break; + case NETDEV_UP: + rcu_assign_pointer(dev->tipc_ptr, b); + break; case NETDEV_GOING_DOWN: + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); + tipc_reset_bearer(net, b); + break; case NETDEV_CHANGEMTU: tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, - (char *)dev->dev_addr); + (char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index e31820516774..f686e41b5abb 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -42,8 +42,6 @@ #include <net/genetlink.h> #define MAX_MEDIA 3 -#define MAX_NODES 4096 -#define WSIZE 32 /* Identifiers associated with TIPC message header media address info * - address info field is 32 bytes long @@ -62,16 +60,6 @@ #define TIPC_MEDIA_TYPE_UDP 3 /** - * struct tipc_node_map - set of node identifiers - * @count: # of nodes in set - * @map: bitmap of node identifiers that are in the set - */ -struct tipc_node_map { - u32 count; - u32 map[MAX_NODES / WSIZE]; -}; - -/** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) * @media_id: TIPC media type identifier @@ -142,7 +130,6 @@ struct tipc_media { * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests * @net_plane: network plane ('A' through 'H') currently associated with bearer - * @nodes: indicates which nodes in cluster can be reached through bearer * * Note: media-specific code is responsible for initialization of the fields * indicated below when a bearer is enabled; TIPC's generic bearer code takes @@ -163,8 +150,6 @@ struct tipc_bearer { u32 identity; struct tipc_link_req *link_req; char net_plane; - int node_cnt; - struct tipc_node_map nodes; }; struct tipc_bearer_names { diff --git a/net/tipc/discover.c b/net/tipc/discover.c index f1e738e80535..ad9d477cc242 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -268,10 +268,9 @@ exit: * Returns 0 if successful, otherwise -errno. */ int tipc_disc_create(struct net *net, struct tipc_bearer *b, - struct tipc_media_addr *dest) + struct tipc_media_addr *dest, struct sk_buff **skb) { struct tipc_link_req *req; - struct sk_buff *skb; req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) @@ -293,9 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); b->link_req = req; - skb = skb_clone(req->buf, GFP_ATOMIC); - if (skb) - tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); + *skb = skb_clone(req->buf, GFP_ATOMIC); return 0; } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index c9b12770c5ed..b80a335389c0 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -40,7 +40,7 @@ struct tipc_link_req; int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, - struct tipc_media_addr *dest); + struct tipc_media_addr *dest, struct sk_buff **skb); void tipc_disc_delete(struct tipc_link_req *req); void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); diff --git a/net/tipc/link.c b/net/tipc/link.c index 7d2bb3e70baa..2e28a7d7e802 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -140,6 +140,7 @@ struct tipc_link { char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; + u16 rst_cnt; /* Failover/synch */ u16 drop_point; @@ -701,40 +702,35 @@ static void link_profile_stats(struct tipc_link *l) /* tipc_link_timeout - perform periodic task as instructed from node timeout */ -/* tipc_link_timeout - perform periodic task as instructed from node timeout - */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int rc = 0; - int mtyp = STATE_MSG; - bool xmit = false; - bool prb = false; + int mtyp, rc = 0; + bool state = false; + bool probe = false; + bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; - bool bc_up = link_is_up(l->bc_rcvlink); link_profile_stats(l); switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: - if (!l->silent_intv_cnt) { - if (bc_up && (bc_acked != bc_snt)) - xmit = true; - } else if (l->silent_intv_cnt <= l->abort_limit) { - xmit = true; - prb = true; - } else { - rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - } - l->silent_intv_cnt++; + if (l->silent_intv_cnt > l->abort_limit) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + mtyp = STATE_MSG; + state = bc_acked != bc_snt; + probe = l->silent_intv_cnt; + if (probe) + l->silent_intv_cnt++; break; case LINK_RESET: - xmit = true; + setup = l->rst_cnt++ <= 4; + setup |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: - xmit = true; + setup = true; mtyp = ACTIVATE_MSG; break; case LINK_PEER_RESET: @@ -745,8 +741,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) break; } - if (xmit) - tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq); + if (state || probe || setup) + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); return rc; } @@ -833,6 +829,7 @@ void tipc_link_reset(struct tipc_link *l) l->rcv_nxt = 1; l->acked = 0; l->silent_intv_cnt = 0; + l->rst_cnt = 0; l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; @@ -1110,12 +1107,12 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } -/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission +/* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce * risk of ack storms towards the sender */ -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { if (!l) return 0; @@ -1140,11 +1137,17 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = RESET_MSG; + struct sk_buff *skb; if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + + /* Inform peer that this endpoint is going down if applicable */ + skb = skb_peek_tail(xmitq); + if (skb && (l->state == LINK_RESET)) + msg_set_peer_stopping(buf_msg(skb), 1); } /* tipc_link_build_nack_msg: prepare link nack message for transmission @@ -1219,7 +1222,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) - rc |= tipc_link_build_ack_msg(l, xmitq); + rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) break; } while ((skb = __skb_dequeue(defq))); @@ -1411,7 +1414,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->priority = peers_prio; /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if ((mtyp == RESET_MSG) || !link_is_up(l)) + if (msg_peer_stopping(hdr)) + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + else if ((mtyp == RESET_MSG) || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 6a94175ee20a..d7e9d42fcb2d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -123,7 +123,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 55778a0aebf3..58bf51541813 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -715,6 +715,16 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) msg_set_bits(m, 5, 12, 0x1, r); } +static inline u32 msg_peer_stopping(struct tipc_msg *m) +{ + return msg_bits(m, 5, 13, 0x1); +} + +static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) +{ + msg_set_bits(m, 5, 13, 0x1, s); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; @@ -779,6 +789,11 @@ static inline bool msg_peer_node_is_up(struct tipc_msg *m) return msg_redundant_link(m); } +static inline bool msg_is_reset(struct tipc_msg *hdr) +{ + return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); +} + struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178fd3850..68d9f7b8485c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -545,6 +545,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Established link <%s> on network plane %c\n", tipc_link_name(nl), tipc_link_plane(nl)); + /* Ensure that a STATE message goes first */ + tipc_link_build_state_msg(nl, xmitq); + /* First link? => give it both slots */ if (!ol) { *slot0 = bearer_id; @@ -581,8 +584,12 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { + struct tipc_media_addr *maddr; + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); tipc_node_write_unlock(n); } @@ -1279,7 +1286,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { tipc_node_read_lock(n); - tipc_link_build_ack_msg(le->link, &xmitq); + tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); } diff --git a/net/tipc/server.c b/net/tipc/server.c index 2446bfbaa309..7a0af2dc0406 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,6 +86,7 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); +static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -102,6 +103,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + tipc_sock_release(con); sock_release(sock); con->sock = NULL; } @@ -184,26 +186,31 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } +static void tipc_sock_release(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); + + tipc_unregister_callbacks(con); +} + static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; spin_unlock_bh(&s->idr_lock); - tipc_unregister_callbacks(con); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this - * connection from server connection list and set - * sk->sk_user_data to 0 before releasing connection object. + * connection from server connection list. */ kernel_sock_shutdown(con->sock, SHUT_RDWR); diff --git a/net/tipc/server.h b/net/tipc/server.h index 9015faedb1b0..34f8055afa3b 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -53,7 +53,7 @@ * @send_wq: send workqueue * @max_rcvbuf_size: maximum permitted receive message length * @tipc_conn_new: callback will be called when new connection is incoming - * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_release: callback will be called before releasing the connection * @tipc_conn_recvmsg: callback will be called when message arrives * @saddr: TIPC server address * @name: server name @@ -70,7 +70,7 @@ struct tipc_server { struct workqueue_struct *send_wq; int max_rcvbuf_size; void *(*tipc_conn_new)(int conid); - void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_release)(int conid, void *usr_data); void (*tipc_conn_recvmsg)(struct net *net, int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index e6cb386fbf34..79de588c7bd6 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -302,7 +302,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, } /* Handle one termination request for the subscriber */ -static void tipc_subscrb_shutdown_cb(int conid, void *usr_data) +static void tipc_subscrb_release_cb(int conid, void *usr_data) { tipc_subscrb_delete((struct tipc_subscriber *)usr_data); } @@ -365,7 +365,7 @@ int tipc_topsrv_start(struct net *net) topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; topsrv->tipc_conn_new = tipc_subscrb_connect_cb; - topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb; + topsrv->tipc_conn_release = tipc_subscrb_release_cb; strncpy(topsrv->name, name, strlen(name) + 1); tn->topsrv = topsrv; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 59cabc9bce69..a6631fb319c1 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -768,7 +768,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, if (chan == other_chan) return true; - if (chan->band != IEEE80211_BAND_5GHZ) + if (chan->band != NL80211_BAND_5GHZ) continue; r1 = cfg80211_get_unii(chan->center_freq); diff --git a/net/wireless/core.c b/net/wireless/core.c index 9f1c4aa851ef..7f7b9409bf4c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -557,7 +557,7 @@ int wiphy_register(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); int res; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *sband; bool have_band = false; int i; @@ -626,6 +626,13 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->set_mac_acl))) return -EINVAL; + /* assure only valid behaviours are flagged by driver + * hence subtract 2 as bit 0 is invalid. + */ + if (WARN_ON(wiphy->bss_select_support && + (wiphy->bss_select_support & ~(BIT(__NL80211_BSS_SELECT_ATTR_AFTER_LAST) - 2)))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); @@ -640,7 +647,7 @@ int wiphy_register(struct wiphy *wiphy) return res; /* sanity check supported bands/channels */ - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) continue; @@ -652,7 +659,7 @@ int wiphy_register(struct wiphy *wiphy) * on 60GHz band, there are no legacy rates, so * n_bitrates is 0 */ - if (WARN_ON(band != IEEE80211_BAND_60GHZ && + if (WARN_ON(band != NL80211_BAND_60GHZ && !sband->n_bitrates)) return -EINVAL; @@ -662,7 +669,7 @@ int wiphy_register(struct wiphy *wiphy) * global structure for that. */ if (cfg80211_disable_40mhz_24ghz && - band == IEEE80211_BAND_2GHZ && + band == NL80211_BAND_2GHZ && sband->ht_cap.ht_supported) { sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40; diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 454157717efa..5d453916a417 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -69,7 +69,7 @@ static ssize_t ht40allow_map_read(struct file *file, struct wiphy *wiphy = file->private_data; char *buf; unsigned int offset = 0, buf_size = PAGE_SIZE, i, r; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *sband; buf = kzalloc(buf_size, GFP_KERNEL); @@ -78,7 +78,7 @@ static ssize_t ht40allow_map_read(struct file *file, rtnl_lock(); - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) continue; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 4c55fab9b4e4..4a4dda53bdf1 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -104,7 +104,7 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct ieee80211_supported_band *sband = rdev->wiphy.bands[params->chandef.chan->band]; int j; - u32 flag = params->chandef.chan->band == IEEE80211_BAND_5GHZ ? + u32 flag = params->chandef.chan->band == NL80211_BAND_5GHZ ? IEEE80211_RATE_MANDATORY_A : IEEE80211_RATE_MANDATORY_B; @@ -236,7 +236,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct cfg80211_cached_keys *ck = NULL; - enum ieee80211_band band; + enum nl80211_band band; int i, err; ASSERT_WDEV_LOCK(wdev); @@ -248,7 +248,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!wdev->wext.ibss.chandef.chan) { struct ieee80211_channel *new_chan = NULL; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 092300b30c37..fa2066b56f36 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -128,9 +128,9 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!setup->chandef.chan) { /* if we don't have that either, use the first usable channel */ - enum ieee80211_band band; + enum nl80211_band band; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; int i; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index ff328250bc44..c284d883c349 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -726,7 +726,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) wiphy = &rdev->wiphy; rtnl_lock(); - for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { + for (bandid = 0; bandid < NUM_NL80211_BANDS; bandid++) { sband = wiphy->bands[bandid]; if (!sband) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 056a7307862b..fd7f34a2b10c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -402,6 +402,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, [NL80211_ATTR_PBSS] = { .type = NLA_FLAG }, + [NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_SUPPORT_P2P_PS] = { .type = NLA_U8 }, }; /* policy for the key attributes */ @@ -486,6 +488,15 @@ nl80211_plan_policy[NL80211_SCHED_SCAN_PLAN_MAX + 1] = { [NL80211_SCHED_SCAN_PLAN_ITERATIONS] = { .type = NLA_U32 }, }; +static const struct nla_policy +nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { + [NL80211_BSS_SELECT_ATTR_RSSI] = { .type = NLA_FLAG }, + [NL80211_BSS_SELECT_ATTR_BAND_PREF] = { .type = NLA_U32 }, + [NL80211_BSS_SELECT_ATTR_RSSI_ADJUST] = { + .len = sizeof(struct nl80211_bss_select_rssi_adjust) + }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -1266,7 +1277,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_cmds; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_channel *chan; int i; const struct ieee80211_txrx_stypes *mgmt_stypes = @@ -1399,7 +1410,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; for (band = state->band_start; - band < IEEE80211_NUM_BANDS; band++) { + band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = rdev->wiphy.bands[band]; @@ -1461,7 +1472,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, } nla_nest_end(msg, nl_bands); - if (band < IEEE80211_NUM_BANDS) + if (band < NUM_NL80211_BANDS) state->band_start = band + 1; else state->band_start = 0; @@ -1731,6 +1742,25 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.ext_features)) goto nla_put_failure; + if (rdev->wiphy.bss_select_support) { + struct nlattr *nested; + u32 bss_select_support = rdev->wiphy.bss_select_support; + + nested = nla_nest_start(msg, NL80211_ATTR_BSS_SELECT); + if (!nested) + goto nla_put_failure; + + i = 0; + while (bss_select_support) { + if ((bss_select_support & 1) && + nla_put_flag(msg, i)) + goto nla_put_failure; + i++; + bss_select_support >>= 1; + } + nla_nest_end(msg, nested); + } + /* done */ state->split_start = 0; break; @@ -3463,7 +3493,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); - if (params.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ]) + if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) return -EOPNOTSUPP; wdev_lock(wdev); @@ -3977,6 +4007,10 @@ int cfg80211_check_station_change(struct wiphy *wiphy, statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; + if (params->support_p2p_ps != -1 && + statype != CFG80211_STA_AP_CLIENT_UNASSOC) + return -EINVAL; + if (params->aid && !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) && statype != CFG80211_STA_AP_CLIENT_UNASSOC) @@ -4270,6 +4304,18 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) else params.listen_interval = -1; + if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) { + u8 tmp; + + tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); + if (tmp >= NUM_NL80211_P2P_PS_STATUS) + return -EINVAL; + + params.support_p2p_ps = tmp; + } else { + params.support_p2p_ps = -1; + } + if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -4393,6 +4439,23 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) { + u8 tmp; + + tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); + if (tmp >= NUM_NL80211_P2P_PS_STATUS) + return -EINVAL; + + params.support_p2p_ps = tmp; + } else { + /* + * if not specified, assume it's supported for P2P GO interface, + * and is NOT supported for AP interface + */ + params.support_p2p_ps = + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO; + } + if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); else @@ -5758,6 +5821,73 @@ static int validate_scan_freqs(struct nlattr *freqs) return n_channels; } +static bool is_band_valid(struct wiphy *wiphy, enum nl80211_band b) +{ + return b < NUM_NL80211_BANDS && wiphy->bands[b]; +} + +static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, + struct cfg80211_bss_selection *bss_select) +{ + struct nlattr *attr[NL80211_BSS_SELECT_ATTR_MAX + 1]; + struct nlattr *nest; + int err; + bool found = false; + int i; + + /* only process one nested attribute */ + nest = nla_data(nla); + if (!nla_ok(nest, nla_len(nest))) + return -EINVAL; + + err = nla_parse(attr, NL80211_BSS_SELECT_ATTR_MAX, nla_data(nest), + nla_len(nest), nl80211_bss_select_policy); + if (err) + return err; + + /* only one attribute may be given */ + for (i = 0; i <= NL80211_BSS_SELECT_ATTR_MAX; i++) { + if (attr[i]) { + if (found) + return -EINVAL; + found = true; + } + } + + bss_select->behaviour = __NL80211_BSS_SELECT_ATTR_INVALID; + + if (attr[NL80211_BSS_SELECT_ATTR_RSSI]) + bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI; + + if (attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]) { + bss_select->behaviour = NL80211_BSS_SELECT_ATTR_BAND_PREF; + bss_select->param.band_pref = + nla_get_u32(attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]); + if (!is_band_valid(wiphy, bss_select->param.band_pref)) + return -EINVAL; + } + + if (attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]) { + struct nl80211_bss_select_rssi_adjust *adj_param; + + adj_param = nla_data(attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]); + bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI_ADJUST; + bss_select->param.adjust.band = adj_param->band; + bss_select->param.adjust.delta = adj_param->delta; + if (!is_band_valid(wiphy, bss_select->param.adjust.band)) + return -EINVAL; + } + + /* user-space did not provide behaviour attribute */ + if (bss_select->behaviour == __NL80211_BSS_SELECT_ATTR_INVALID) + return -EINVAL; + + if (!(wiphy->bss_select_support & BIT(bss_select->behaviour))) + return -EINVAL; + + return 0; +} + static int nl80211_parse_random_mac(struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask) { @@ -5888,10 +6018,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) i++; } } else { - enum ieee80211_band band; + enum nl80211_band band; /* all channels */ - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; @@ -5936,7 +6066,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->ie_len); } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) + for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) request->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; @@ -5945,9 +6075,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SUPP_RATES], tmp) { - enum ieee80211_band band = nla_type(attr); + enum nl80211_band band = nla_type(attr); - if (band < 0 || band >= IEEE80211_NUM_BANDS) { + if (band < 0 || band >= NUM_NL80211_BANDS) { err = -EINVAL; goto out_free; } @@ -5996,6 +6126,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); + if (info->attrs[NL80211_ATTR_MAC]) + memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), + ETH_ALEN); + else + eth_broadcast_addr(request->bssid); + request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; @@ -6129,7 +6265,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_sched_scan_request *request; struct nlattr *attr; int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; - enum ieee80211_band band; + enum nl80211_band band; size_t ie_len; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF; @@ -6294,7 +6430,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, } } else { /* all channels */ - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; @@ -7402,14 +7538,14 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) static bool nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev, - int mcast_rate[IEEE80211_NUM_BANDS], + int mcast_rate[NUM_NL80211_BANDS], int rateval) { struct wiphy *wiphy = &rdev->wiphy; bool found = false; int band, i; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = wiphy->bands[band]; @@ -7589,7 +7725,7 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; u32 nla_rate; int err; @@ -7922,6 +8058,10 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.mfp = NL80211_MFP_NO; } + if (info->attrs[NL80211_ATTR_PREV_BSSID]) + connect.prev_bssid = + nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = nl80211_get_valid_chan( wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ]); @@ -7990,13 +8130,29 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) } connect.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); - if (connect.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ]) { + if (connect.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { kzfree(connkeys); return -EOPNOTSUPP; } + if (info->attrs[NL80211_ATTR_BSS_SELECT]) { + /* bss selection makes no sense if bssid is set */ + if (connect.bssid) { + kzfree(connkeys); + return -EINVAL; + } + + err = parse_bss_select(info->attrs[NL80211_ATTR_BSS_SELECT], + wiphy, &connect.bss_select); + if (err) { + kzfree(connkeys); + return err; + } + } + wdev_lock(dev->ieee80211_ptr); - err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); + err = cfg80211_connect(rdev, dev, &connect, connkeys, + connect.prev_bssid); wdev_unlock(dev->ieee80211_ptr); if (err) kzfree(connkeys); @@ -8394,7 +8550,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, memset(&mask, 0, sizeof(mask)); /* Default to all rates enabled */ - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = rdev->wiphy.bands[i]; if (!sband) @@ -8418,14 +8574,14 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, /* * The nested attribute uses enum nl80211_band as the index. This maps - * directly to the enum ieee80211_band values used in cfg80211. + * directly to the enum nl80211_band values used in cfg80211. */ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { - enum ieee80211_band band = nla_type(tx_rates); + enum nl80211_band band = nla_type(tx_rates); int err; - if (band < 0 || band >= IEEE80211_NUM_BANDS) + if (band < 0 || band >= NUM_NL80211_BANDS) return -EINVAL; sband = rdev->wiphy.bands[band]; if (sband == NULL) @@ -10590,7 +10746,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, * section 10.22.6.2.1. Disallow 5/10Mhz channels as well for now, the * specification is not defined for them. */ - if (chandef.chan->band == IEEE80211_BAND_2GHZ && + if (chandef.chan->band == NL80211_BAND_2GHZ && chandef.width != NL80211_CHAN_WIDTH_20_NOHT && chandef.width != NL80211_CHAN_WIDTH_20) return -EINVAL; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 8ae0c04f9fc7..85ff30bee2b9 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1048,7 +1048,7 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev, static inline int rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, struct net_device *dev, - int mcast_rate[IEEE80211_NUM_BANDS]) + int mcast_rate[NUM_NL80211_BANDS]) { int ret = -ENOTSUPP; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c5fb317eee68..e271dea6bc02 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1546,12 +1546,12 @@ static void reg_process_ht_flags_band(struct wiphy *wiphy, static void reg_process_ht_flags(struct wiphy *wiphy) { - enum ieee80211_band band; + enum nl80211_band band; if (!wiphy) return; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } @@ -1673,7 +1673,7 @@ static void reg_check_channels(void) static void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { - enum ieee80211_band band; + enum nl80211_band band; struct regulatory_request *lr = get_last_request(); if (ignore_reg_update(wiphy, initiator)) { @@ -1690,7 +1690,7 @@ static void wiphy_update_regulatory(struct wiphy *wiphy, lr->dfs_region = get_cfg80211_regdom()->dfs_region; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) handle_band(wiphy, initiator, wiphy->bands[band]); reg_process_beacons(wiphy); @@ -1786,14 +1786,14 @@ static void handle_band_custom(struct wiphy *wiphy, void wiphy_apply_custom_regulatory(struct wiphy *wiphy, const struct ieee80211_regdomain *regd) { - enum ieee80211_band band; + enum nl80211_band band; unsigned int bands_set = 0; WARN(!(wiphy->regulatory_flags & REGULATORY_CUSTOM_REG), "wiphy should have REGULATORY_CUSTOM_REG\n"); wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!wiphy->bands[band]) continue; handle_band_custom(wiphy, wiphy->bands[band], regd); @@ -2228,7 +2228,7 @@ static void reg_process_self_managed_hints(void) struct wiphy *wiphy; const struct ieee80211_regdomain *tmp; const struct ieee80211_regdomain *regd; - enum ieee80211_band band; + enum nl80211_band band; struct regulatory_request request = {}; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { @@ -2246,7 +2246,7 @@ static void reg_process_self_managed_hints(void) rcu_assign_pointer(wiphy->regd, regd); rcu_free_regdom(tmp); - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) handle_band_custom(wiphy, wiphy->bands[band], regd); reg_process_ht_flags(wiphy); @@ -2404,7 +2404,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band, +void regulatory_hint_country_ie(struct wiphy *wiphy, enum nl80211_band band, const u8 *country_ie, u8 country_ie_len) { char alpha2[2]; @@ -2504,11 +2504,11 @@ static void restore_alpha2(char *alpha2, bool reset_user) static void restore_custom_reg_settings(struct wiphy *wiphy) { struct ieee80211_supported_band *sband; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_channel *chan; int i; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) continue; @@ -2623,9 +2623,9 @@ void regulatory_hint_disconnect(void) static bool freq_is_chan_12_13_14(u16 freq) { - if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) || - freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) || - freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ)) + if (freq == ieee80211_channel_to_frequency(12, NL80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(13, NL80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(14, NL80211_BAND_2GHZ)) return true; return false; } @@ -2650,7 +2650,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy, if (beacon_chan->beacon_found || beacon_chan->flags & IEEE80211_CHAN_RADAR || - (beacon_chan->band == IEEE80211_BAND_2GHZ && + (beacon_chan->band == NL80211_BAND_2GHZ && !freq_is_chan_12_13_14(beacon_chan->center_freq))) return 0; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 9f495d76eca0..f6ced316b5a4 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -104,7 +104,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy, * information for a band the BSS is not present in it will be ignored. */ void regulatory_hint_country_ie(struct wiphy *wiphy, - enum ieee80211_band band, + enum nl80211_band band, const u8 *country_ie, u8 country_ie_len); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 14d5369eb778..abdf651a70d9 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -531,7 +531,7 @@ static int cmp_bss(struct cfg80211_bss *a, } static bool cfg80211_bss_type_match(u16 capability, - enum ieee80211_band band, + enum nl80211_band band, enum ieee80211_bss_type bss_type) { bool ret = true; @@ -540,7 +540,7 @@ static bool cfg80211_bss_type_match(u16 capability, if (bss_type == IEEE80211_BSS_TYPE_ANY) return ret; - if (band == IEEE80211_BAND_60GHZ) { + if (band == NL80211_BAND_60GHZ) { mask = WLAN_CAPABILITY_DMG_TYPE_MASK; switch (bss_type) { case IEEE80211_BSS_TYPE_ESS: @@ -1006,7 +1006,7 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, if (!res) return NULL; - if (channel->band == IEEE80211_BAND_60GHZ) { + if (channel->band == NL80211_BAND_60GHZ) { bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) @@ -1089,7 +1089,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (!res) return NULL; - if (channel->band == IEEE80211_BAND_60GHZ) { + if (channel->band == NL80211_BAND_60GHZ) { bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK; if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) @@ -1185,7 +1185,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct iw_scan_req *wreq = NULL; struct cfg80211_scan_request *creq = NULL; int i, err, n_channels = 0; - enum ieee80211_band band; + enum nl80211_band band; if (!netif_running(dev)) return -ENETDOWN; @@ -1229,7 +1229,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* translate "Scan on frequencies" request */ i = 0; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) @@ -1289,10 +1289,12 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq->n_ssids = 0; } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) + for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; + eth_broadcast_addr(creq->bssid); + rdev->scan_req = creq; err = rdev_scan(rdev, creq); if (err) { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 544558171787..e22e5b83cfa9 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -81,7 +81,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) return -ENOMEM; if (wdev->conn->params.channel) { - enum ieee80211_band band = wdev->conn->params.channel->band; + enum nl80211_band band = wdev->conn->params.channel->band; struct ieee80211_supported_band *sband = wdev->wiphy->bands[band]; @@ -93,11 +93,11 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) request->rates[band] = (1 << sband->n_bitrates) - 1; } else { int i = 0, j; - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *bands; struct ieee80211_channel *channel; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { bands = wdev->wiphy->bands[band]; if (!bands) continue; @@ -119,6 +119,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) wdev->conn->params.ssid_len); request->ssids[0].ssid_len = wdev->conn->params.ssid_len; + eth_broadcast_addr(request->bssid); + request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; @@ -490,8 +492,18 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, if (!rdev->ops->auth || !rdev->ops->assoc) return -EOPNOTSUPP; - if (wdev->current_bss) - return -EALREADY; + if (wdev->current_bss) { + if (!prev_bssid) + return -EALREADY; + if (prev_bssid && + !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) + return -ENOTCONN; + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + wdev->current_bss = NULL; + + cfg80211_sme_free(wdev); + } if (WARN_ON(wdev->conn)) return -EINPROGRESS; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 09b242b09bed..3c1091ae6c36 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -110,7 +110,7 @@ conf->dot11MeshHWMPconfirmationInterval; \ } while (0) -#define CHAN_ENTRY __field(enum ieee80211_band, band) \ +#define CHAN_ENTRY __field(enum nl80211_band, band) \ __field(u16, center_freq) #define CHAN_ASSIGN(chan) \ do { \ @@ -125,7 +125,7 @@ #define CHAN_PR_FMT "band: %d, freq: %u" #define CHAN_PR_ARG __entry->band, __entry->center_freq -#define CHAN_DEF_ENTRY __field(enum ieee80211_band, band) \ +#define CHAN_DEF_ENTRY __field(enum nl80211_band, band) \ __field(u32, control_freq) \ __field(u32, width) \ __field(u32, center_freq1) \ @@ -1259,6 +1259,7 @@ TRACE_EVENT(rdev_connect, __field(bool, privacy) __field(u32, wpa_versions) __field(u32, flags) + MAC_ENTRY(prev_bssid) ), TP_fast_assign( WIPHY_ASSIGN; @@ -1270,13 +1271,14 @@ TRACE_EVENT(rdev_connect, __entry->privacy = sme->privacy; __entry->wpa_versions = sme->crypto.wpa_versions; __entry->flags = sme->flags; + MAC_ASSIGN(prev_bssid, sme->prev_bssid); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT ", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, " - "flags: %u", + "flags: %u, previous bssid: " MAC_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->ssid, __entry->auth_type, BOOL_TO_STR(__entry->privacy), - __entry->wpa_versions, __entry->flags) + __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid)) ); TRACE_EVENT(rdev_set_cqm_rssi_config, @@ -2645,7 +2647,7 @@ TRACE_EVENT(cfg80211_scan_done, TP_STRUCT__entry( __field(u32, n_channels) __dynamic_array(u8, ie, request ? request->ie_len : 0) - __array(u32, rates, IEEE80211_NUM_BANDS) + __array(u32, rates, NUM_NL80211_BANDS) __field(u32, wdev_id) MAC_ENTRY(wiphy_mac) __field(bool, no_cck) @@ -2656,7 +2658,7 @@ TRACE_EVENT(cfg80211_scan_done, memcpy(__get_dynamic_array(ie), request->ie, request->ie_len); memcpy(__entry->rates, request->rates, - IEEE80211_NUM_BANDS); + NUM_NL80211_BANDS); __entry->wdev_id = request->wdev ? request->wdev->identifier : 0; if (request->wiphy) @@ -2881,25 +2883,25 @@ TRACE_EVENT(rdev_start_radar_detection, TRACE_EVENT(rdev_set_mcast_rate, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - int mcast_rate[IEEE80211_NUM_BANDS]), + int mcast_rate[NUM_NL80211_BANDS]), TP_ARGS(wiphy, netdev, mcast_rate), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY - __array(int, mcast_rate, IEEE80211_NUM_BANDS) + __array(int, mcast_rate, NUM_NL80211_BANDS) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; memcpy(__entry->mcast_rate, mcast_rate, - sizeof(int) * IEEE80211_NUM_BANDS); + sizeof(int) * NUM_NL80211_BANDS); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]", WIPHY_PR_ARG, NETDEV_PR_ARG, - __entry->mcast_rate[IEEE80211_BAND_2GHZ], - __entry->mcast_rate[IEEE80211_BAND_5GHZ], - __entry->mcast_rate[IEEE80211_BAND_60GHZ]) + __entry->mcast_rate[NL80211_BAND_2GHZ], + __entry->mcast_rate[NL80211_BAND_5GHZ], + __entry->mcast_rate[NL80211_BAND_60GHZ]) ); TRACE_EVENT(rdev_set_coalesce, diff --git a/net/wireless/util.c b/net/wireless/util.c index 9f440a9de63b..f36039888eb5 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -47,7 +47,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, if (WARN_ON(!sband)) return 1; - if (sband->band == IEEE80211_BAND_2GHZ) { + if (sband->band == NL80211_BAND_2GHZ) { if (scan_width == NL80211_BSS_CHAN_WIDTH_5 || scan_width == NL80211_BSS_CHAN_WIDTH_10) mandatory_flag = IEEE80211_RATE_MANDATORY_G; @@ -65,26 +65,26 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_mandatory_rates); -int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) +int ieee80211_channel_to_frequency(int chan, enum nl80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J * there are overlapping channel numbers in 5GHz and 2GHz bands */ if (chan <= 0) return 0; /* not supported */ switch (band) { - case IEEE80211_BAND_2GHZ: + case NL80211_BAND_2GHZ: if (chan == 14) return 2484; else if (chan < 14) return 2407 + chan * 5; break; - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: if (chan >= 182 && chan <= 196) return 4000 + chan * 5; else return 5000 + chan * 5; break; - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_60GHZ: if (chan < 5) return 56160 + chan * 2160; break; @@ -116,11 +116,11 @@ EXPORT_SYMBOL(ieee80211_frequency_to_channel); struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, int freq) { - enum ieee80211_band band; + enum nl80211_band band; struct ieee80211_supported_band *sband; int i; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) @@ -137,12 +137,12 @@ struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, EXPORT_SYMBOL(__ieee80211_get_channel); static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, - enum ieee80211_band band) + enum nl80211_band band) { int i, want; switch (band) { - case IEEE80211_BAND_5GHZ: + case NL80211_BAND_5GHZ: want = 3; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == 60 || @@ -155,7 +155,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, } WARN_ON(want); break; - case IEEE80211_BAND_2GHZ: + case NL80211_BAND_2GHZ: want = 7; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == 10) { @@ -185,12 +185,12 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, } WARN_ON(want != 0 && want != 3 && want != 6); break; - case IEEE80211_BAND_60GHZ: + case NL80211_BAND_60GHZ: /* check for mandatory HT MCS 1..4 */ WARN_ON(!sband->ht_cap.ht_supported); WARN_ON((sband->ht_cap.mcs.rx_mask[0] & 0x1e) != 0x1e); break; - case IEEE80211_NUM_BANDS: + case NUM_NL80211_BANDS: WARN_ON(1); break; } @@ -198,9 +198,9 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, void ieee80211_set_bitrate_flags(struct wiphy *wiphy) { - enum ieee80211_band band; + enum nl80211_band band; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) if (wiphy->bands[band]) set_mandatory_flags_band(wiphy->bands[band], band); } @@ -1399,22 +1399,22 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, EXPORT_SYMBOL(ieee80211_ie_split_ric); bool ieee80211_operating_class_to_band(u8 operating_class, - enum ieee80211_band *band) + enum nl80211_band *band) { switch (operating_class) { case 112: case 115 ... 127: case 128 ... 130: - *band = IEEE80211_BAND_5GHZ; + *band = NL80211_BAND_5GHZ; return true; case 81: case 82: case 83: case 84: - *band = IEEE80211_BAND_2GHZ; + *band = NL80211_BAND_2GHZ; return true; case 180: - *band = IEEE80211_BAND_60GHZ; + *band = NL80211_BAND_60GHZ; return true; } @@ -1726,10 +1726,10 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy) { - enum ieee80211_band band; + enum nl80211_band band; unsigned int n_channels = 0; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + for (band = 0; band < NUM_NL80211_BANDS; band++) if (wiphy->bands[band]) n_channels += wiphy->bands[band]->n_channels; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index fd682832a0e3..4c89f0ca61ba 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -32,13 +32,13 @@ int cfg80211_wext_giwname(struct net_device *dev, if (!wdev) return -EOPNOTSUPP; - sband = wdev->wiphy->bands[IEEE80211_BAND_5GHZ]; + sband = wdev->wiphy->bands[NL80211_BAND_5GHZ]; if (sband) { is_a = true; is_ht |= sband->ht_cap.ht_supported; } - sband = wdev->wiphy->bands[IEEE80211_BAND_2GHZ]; + sband = wdev->wiphy->bands[NL80211_BAND_2GHZ]; if (sband) { int i; /* Check for mandatory rates */ @@ -143,7 +143,7 @@ int cfg80211_wext_giwrange(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct iw_range *range = (struct iw_range *) extra; - enum ieee80211_band band; + enum nl80211_band band; int i, c = 0; if (!wdev) @@ -215,7 +215,7 @@ int cfg80211_wext_giwrange(struct net_device *dev, } } - for (band = 0; band < IEEE80211_NUM_BANDS; band ++) { + for (band = 0; band < NUM_NL80211_BANDS; band ++) { struct ieee80211_supported_band *sband; sband = wdev->wiphy->bands[band]; @@ -265,11 +265,11 @@ int cfg80211_wext_freq(struct iw_freq *freq) * -EINVAL for impossible things. */ if (freq->e == 0) { - enum ieee80211_band band = IEEE80211_BAND_2GHZ; + enum nl80211_band band = NL80211_BAND_2GHZ; if (freq->m < 0) return 0; if (freq->m > 14) - band = IEEE80211_BAND_5GHZ; + band = NL80211_BAND_5GHZ; return ieee80211_channel_to_frequency(freq->m, band); } else { int i, div = 1000000; @@ -1245,7 +1245,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev, maxrate = rate->value / 100000; } - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wdev->wiphy->bands[band]; if (sband == NULL) continue; diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index b50ee5d622e1..6250b1cfcde5 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -399,7 +399,10 @@ static int __init wireless_nlevent_init(void) if (err) return err; - return register_netdevice_notifier(&wext_netdev_notifier); + err = register_netdevice_notifier(&wext_netdev_notifier); + if (err) + unregister_pernet_subsys(&wext_pernet_ops); + return err; } subsys_initcall(wireless_nlevent_init); |