diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 14:27:40 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 14:27:40 -0700 |
commit | f9da455b93f6ba076935b4ef4589f61e529ae046 (patch) | |
tree | 3c4e69ce1ba1d6bf65915b97a76ca2172105b278 /net | |
parent | 0e04c641b199435f3779454055f6a7de258ecdfc (diff) | |
parent | e5eca6d41f53db48edd8cf88a3f59d2c30227f8e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Seccomp BPF filters can now be JIT'd, from Alexei Starovoitov.
2) Multiqueue support in xen-netback and xen-netfront, from Andrew J
Benniston.
3) Allow tweaking of aggregation settings in cdc_ncm driver, from Bjørn
Mork.
4) BPF now has a "random" opcode, from Chema Gonzalez.
5) Add more BPF documentation and improve test framework, from Daniel
Borkmann.
6) Support TCP fastopen over ipv6, from Daniel Lee.
7) Add software TSO helper functions and use them to support software
TSO in mvneta and mv643xx_eth drivers. From Ezequiel Garcia.
8) Support software TSO in fec driver too, from Nimrod Andy.
9) Add Broadcom SYSTEMPORT driver, from Florian Fainelli.
10) Handle broadcasts more gracefully over macvlan when there are large
numbers of interfaces configured, from Herbert Xu.
11) Allow more control over fwmark used for non-socket based responses,
from Lorenzo Colitti.
12) Do TCP congestion window limiting based upon measurements, from Neal
Cardwell.
13) Support busy polling in SCTP, from Neal Horman.
14) Allow RSS key to be configured via ethtool, from Venkata Duvvuru.
15) Bridge promisc mode handling improvements from Vlad Yasevich.
16) Don't use inetpeer entries to implement ID generation any more, it
performs poorly, from Eric Dumazet.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1522 commits)
rtnetlink: fix userspace API breakage for iproute2 < v3.9.0
tcp: fixing TLP's FIN recovery
net: fec: Add software TSO support
net: fec: Add Scatter/gather support
net: fec: Increase buffer descriptor entry number
net: fec: Factorize feature setting
net: fec: Enable IP header hardware checksum
net: fec: Factorize the .xmit transmit function
bridge: fix compile error when compiling without IPv6 support
bridge: fix smatch warning / potential null pointer dereference
via-rhine: fix full-duplex with autoneg disable
bnx2x: Enlarge the dorq threshold for VFs
bnx2x: Check for UNDI in uncommon branch
bnx2x: Fix 1G-baseT link
bnx2x: Fix link for KR with swapped polarity lane
sctp: Fix sk_ack_backlog wrap-around problem
net/core: Add VF link state control policy
net/fsl: xgmac_mdio is dependent on OF_MDIO
net/fsl: Make xgmac_mdio read error message useful
net_sched: drr: warn when qdisc is not work conserving
...
Diffstat (limited to 'net')
346 files changed, 13776 insertions, 6799 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 3c32bd257b73..9012b1c922b6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -63,7 +63,7 @@ bool vlan_do_receive(struct sk_buff **skbp) } /* Must be invoked with rcu_read_lock. */ -struct net_device *__vlan_find_dev_deep(struct net_device *dev, +struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev, __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); @@ -81,13 +81,13 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev, upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - return __vlan_find_dev_deep(upper_dev, + return __vlan_find_dev_deep_rcu(upper_dev, vlan_proto, vlan_id); } return NULL; } -EXPORT_SYMBOL(__vlan_find_dev_deep); +EXPORT_SYMBOL(__vlan_find_dev_deep_rcu); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 019efb79708f..ad2ac3c00398 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -643,9 +643,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; - features &= real_dev->vlan_features; + features = netdev_intersect_features(features, real_dev->vlan_features); features |= NETIF_F_RXCSUM; - features &= real_dev->features; + features = netdev_intersect_features(features, real_dev->features); features |= old_features & NETIF_F_SOFT_FEATURES; features |= NETIF_F_LLTX; @@ -671,38 +671,36 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { + struct vlan_pcpu_stats *p; + u32 rx_errors = 0, tx_dropped = 0; + int i; - if (vlan_dev_priv(dev)->vlan_pcpu_stats) { - struct vlan_pcpu_stats *p; - u32 rx_errors = 0, tx_dropped = 0; - int i; - - for_each_possible_cpu(i) { - u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; - unsigned int start; - - p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); - do { - start = u64_stats_fetch_begin_irq(&p->syncp); - rxpackets = p->rx_packets; - rxbytes = p->rx_bytes; - rxmulticast = p->rx_multicast; - txpackets = p->tx_packets; - txbytes = p->tx_bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); - - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->multicast += rxmulticast; - stats->tx_packets += txpackets; - stats->tx_bytes += txbytes; - /* rx_errors & tx_dropped are u32 */ - rx_errors += p->rx_errors; - tx_dropped += p->tx_dropped; - } - stats->rx_errors = rx_errors; - stats->tx_dropped = tx_dropped; + for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; + unsigned int start; + + p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + rxpackets = p->rx_packets; + rxbytes = p->rx_bytes; + rxmulticast = p->rx_multicast; + txpackets = p->tx_packets; + txbytes = p->tx_bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->multicast += rxmulticast; + stats->tx_packets += txpackets; + stats->tx_bytes += txbytes; + /* rx_errors & tx_dropped are u32 */ + rx_errors += p->rx_errors; + tx_dropped += p->tx_dropped; } + stats->rx_errors = rx_errors; + stats->tx_dropped = tx_dropped; + return stats; } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 786ee2f83d5f..01a1082e02b3 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1669,7 +1669,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr goto out; } - if (sk->sk_no_check == 1) + if (sk->sk_no_check_tx) ddp->deh_sum = 0; else ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp)); diff --git a/net/atm/svc.c b/net/atm/svc.c index 1281049c135f..d8e5d0c2ebbc 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -263,17 +263,11 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, goto out; } } -/* - * Not supported yet - * - * #ifndef CONFIG_SINGLE_SIGITF - */ + vcc->qos.txtp.max_pcr = SELECT_TOP_PCR(vcc->qos.txtp); vcc->qos.txtp.pcr = 0; vcc->qos.txtp.min_pcr = 0; -/* - * #endif - */ + error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci); if (!error) sock->state = SS_CONNECTED; diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index b758881be108..a12e25efaf6f 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -245,6 +245,7 @@ static int batadv_algorithms_open(struct inode *inode, struct file *file) static int batadv_originators_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_orig_seq_print_text, net_dev); } @@ -258,18 +259,21 @@ static int batadv_originators_hardif_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_orig_hardif_seq_print_text, net_dev); } static int batadv_gateways_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_gw_client_seq_print_text, net_dev); } static int batadv_transtable_global_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_tt_global_seq_print_text, net_dev); } @@ -277,6 +281,7 @@ static int batadv_transtable_global_open(struct inode *inode, struct file *file) static int batadv_bla_claim_table_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_bla_claim_table_seq_print_text, net_dev); } @@ -285,6 +290,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_bla_backbone_table_seq_print_text, net_dev); } @@ -300,6 +306,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode, static int batadv_dat_cache_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_dat_cache_seq_print_text, net_dev); } #endif @@ -307,6 +314,7 @@ static int batadv_dat_cache_open(struct inode *inode, struct file *file) static int batadv_transtable_local_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_tt_local_seq_print_text, net_dev); } @@ -319,6 +327,7 @@ struct batadv_debuginfo { static int batadv_nc_nodes_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_nc_nodes_seq_print_text, net_dev); } #endif @@ -333,7 +342,7 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \ .llseek = seq_lseek, \ .release = single_release, \ } \ -}; +} /* the following attributes are general and therefore they will be directly * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs @@ -395,7 +404,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ .llseek = seq_lseek, \ .release = single_release, \ }, \ -}; +} static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, batadv_originators_hardif_open); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index aa5d4946d0d7..f2c066b21716 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -594,7 +594,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, if (!neigh_node) goto free_orig; - tmp_skb = pskb_copy(skb, GFP_ATOMIC); + tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, cand[i].orig_node, packet_subtype)) { @@ -662,6 +662,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) void batadv_dat_status_update(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_dat_tvlv_container_update(bat_priv); } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 770dc890ceef..118b990bae25 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2014.2.0" +#define BATADV_SOURCE_VERSION "2014.3.0" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index a9546fe541eb..8d04d174669e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -86,6 +86,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) void batadv_nc_status_update(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_nc_tvlv_container_update(bat_priv); } @@ -1343,7 +1344,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, struct ethhdr *ethhdr; /* Copy skb header to change the mac header */ - skb = pskb_copy(skb, GFP_ATOMIC); + skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!skb) return; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 744a59b85e15..e7ee65dc20bf 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -884,7 +884,7 @@ static void batadv_softif_init_early(struct net_device *dev) /* generate random address */ eth_hw_addr_random(dev); - SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); + dev->ethtool_ops = &batadv_ethtool_ops; memset(priv, 0, sizeof(*priv)); } diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 1ebb0d9e2ea5..fc47baa888c5 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -29,12 +29,14 @@ static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) { struct device *dev = container_of(obj->parent, struct device, kobj); + return to_net_dev(dev); } static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj) { struct net_device *net_dev = batadv_kobj_to_netdev(obj); + return netdev_priv(net_dev); } @@ -106,7 +108,7 @@ struct batadv_attribute batadv_attr_vlan_##_name = { \ .mode = _mode }, \ .show = _show, \ .store = _store, \ -}; +} /* Use this, if you have customized show and store functions */ #define BATADV_ATTR(_name, _mode, _show, _store) \ @@ -115,7 +117,7 @@ struct batadv_attribute batadv_attr_##_name = { \ .mode = _mode }, \ .show = _show, \ .store = _store, \ -}; +} #define BATADV_ATTR_SIF_STORE_BOOL(_name, _post_func) \ ssize_t batadv_store_##_name(struct kobject *kobj, \ @@ -124,6 +126,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + \ return __batadv_store_bool_attr(buff, count, _post_func, attr, \ &bat_priv->_name, net_dev); \ } @@ -133,6 +136,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ struct attribute *attr, char *buff) \ { \ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ + \ return sprintf(buff, "%s\n", \ atomic_read(&bat_priv->_name) == 0 ? \ "disabled" : "enabled"); \ @@ -155,6 +159,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + \ return __batadv_store_uint_attr(buff, count, _min, _max, \ _post_func, attr, \ &bat_priv->_name, net_dev); \ @@ -165,6 +170,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ struct attribute *attr, char *buff) \ { \ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ + \ return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name)); \ } \ @@ -188,6 +194,7 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ size_t res = __batadv_store_bool_attr(buff, count, _post_func, \ attr, &vlan->_name, \ bat_priv->soft_iface); \ + \ batadv_softif_vlan_free_ref(vlan); \ return res; \ } @@ -202,6 +209,7 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \ size_t res = sprintf(buff, "%s\n", \ atomic_read(&vlan->_name) == 0 ? \ "disabled" : "enabled"); \ + \ batadv_softif_vlan_free_ref(vlan); \ return res; \ } @@ -324,12 +332,14 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj, struct attribute *attr, char *buff) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name); } static void batadv_post_gw_reselect(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_gw_reselect(bat_priv); } diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 73492b91105a..8796ffa08b43 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -420,12 +420,18 @@ static int conn_send(struct l2cap_conn *conn, return 0; } -static void get_dest_bdaddr(struct in6_addr *ip6_daddr, - bdaddr_t *addr, u8 *addr_type) +static u8 get_addr_type_from_eui64(u8 byte) { - u8 *eui64; + /* Is universal(0) or local(1) bit, */ + if (byte & 0x02) + return ADDR_LE_DEV_RANDOM; - eui64 = ip6_daddr->s6_addr + 8; + return ADDR_LE_DEV_PUBLIC; +} + +static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr) +{ + u8 *eui64 = ip6_daddr->s6_addr + 8; addr->b[0] = eui64[7]; addr->b[1] = eui64[6]; @@ -433,16 +439,19 @@ static void get_dest_bdaddr(struct in6_addr *ip6_daddr, addr->b[3] = eui64[2]; addr->b[4] = eui64[1]; addr->b[5] = eui64[0]; +} - addr->b[5] ^= 2; +static void convert_dest_bdaddr(struct in6_addr *ip6_daddr, + bdaddr_t *addr, u8 *addr_type) +{ + copy_to_bdaddr(ip6_daddr, addr); - /* Set universal/local bit to 0 */ - if (addr->b[5] & 1) { - addr->b[5] &= ~1; - *addr_type = ADDR_LE_DEV_PUBLIC; - } else { - *addr_type = ADDR_LE_DEV_RANDOM; - } + /* We need to toggle the U/L bit that we got from IPv6 address + * so that we get the proper address and type of the BD address. + */ + addr->b[5] ^= 0x02; + + *addr_type = get_addr_type_from_eui64(addr->b[5]); } static int header_create(struct sk_buff *skb, struct net_device *netdev, @@ -473,9 +482,11 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev, /* Get destination BT device from skb. * If there is no such peer then discard the packet. */ - get_dest_bdaddr(&hdr->daddr, &addr, &addr_type); + convert_dest_bdaddr(&hdr->daddr, &addr, &addr_type); - BT_DBG("dest addr %pMR type %d", &addr, addr_type); + BT_DBG("dest addr %pMR type %s IP %pI6c", &addr, + addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM", + &hdr->daddr); read_lock_irqsave(&devices_lock, flags); peer = peer_lookup_ba(dev, &addr, addr_type); @@ -556,7 +567,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) } else { unsigned long flags; - get_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type); + convert_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type); eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8; dev = lowpan_dev(netdev); @@ -564,8 +575,10 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) peer = peer_lookup_ba(dev, &addr, addr_type); read_unlock_irqrestore(&devices_lock, flags); - BT_DBG("xmit from %s to %pMR (%pI6c) peer %p", netdev->name, - &addr, &lowpan_cb(skb)->addr, peer); + BT_DBG("xmit %s to %pMR type %s IP %pI6c peer %p", + netdev->name, &addr, + addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM", + &lowpan_cb(skb)->addr, peer); if (peer && peer->conn) err = send_pkt(peer->conn, netdev->dev_addr, @@ -620,13 +633,13 @@ static void set_addr(u8 *eui, u8 *addr, u8 addr_type) eui[6] = addr[1]; eui[7] = addr[0]; - eui[0] ^= 2; - - /* Universal/local bit set, RFC 4291 */ + /* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */ if (addr_type == ADDR_LE_DEV_PUBLIC) - eui[0] |= 1; + eui[0] &= ~0x02; else - eui[0] &= ~1; + eui[0] |= 0x02; + + BT_DBG("type %d addr %*phC", addr_type, 8, eui); } static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr, @@ -634,7 +647,6 @@ static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr, { netdev->addr_assign_type = NET_ADDR_PERM; set_addr(netdev->dev_addr, addr->b, addr_type); - netdev->dev_addr[0] ^= 2; } static void ifup(struct net_device *netdev) @@ -684,13 +696,6 @@ static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev) memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8, EUI64_ADDR_LEN); - peer->eui64_addr[0] ^= 2; /* second bit-flip (Universe/Local) - * is done according RFC2464 - */ - - raw_dump_inline(__func__, "peer IPv6 address", - (unsigned char *)&peer->peer_addr, 16); - raw_dump_inline(__func__, "peer EUI64 address", peer->eui64_addr, 8); write_lock_irqsave(&devices_lock, flags); INIT_LIST_HEAD(&peer->list); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 521fd4f3985e..8671bc79a35b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -28,6 +28,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> #include "smp.h" #include "a2mp.h" @@ -367,9 +368,23 @@ static void le_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, le_conn_timeout.work); + struct hci_dev *hdev = conn->hdev; BT_DBG(""); + /* We could end up here due to having done directed advertising, + * so clean up the state if necessary. This should however only + * happen with broken hardware or if low duty cycle was used + * (which doesn't have a timeout of its own). + */ + if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { + u8 enable = 0x00; + hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + return; + } + hci_le_create_connection_cancel(conn); } @@ -393,6 +408,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->io_capability = hdev->io_capability; conn->remote_auth = 0xff; conn->key_type = 0xff; + conn->tx_power = HCI_TX_POWER_INVALID; + conn->max_tx_power = HCI_TX_POWER_INVALID; set_bit(HCI_CONN_POWER_SAVE, &conn->flags); conn->disc_timeout = HCI_DISCONN_TIMEOUT; @@ -401,6 +418,10 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) case ACL_LINK: conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK; break; + case LE_LINK: + /* conn->src should reflect the local identity address */ + hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + break; case SCO_LINK: if (lmp_esco_capable(hdev)) conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | @@ -545,6 +566,11 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) * favor of connection establishment, we should restart it. */ hci_update_background_scan(hdev); + + /* Re-enable advertising in case this was a failed connection + * attempt as a peripheral. + */ + mgmt_reenable_advertising(hdev); } static void create_le_conn_complete(struct hci_dev *hdev, u8 status) @@ -605,6 +631,45 @@ static void hci_req_add_le_create_conn(struct hci_request *req, conn->state = BT_CONNECT; } +static void hci_req_directed_advertising(struct hci_request *req, + struct hci_conn *conn) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_param cp; + u8 own_addr_type; + u8 enable; + + enable = 0x00; + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); + + /* Clear the HCI_ADVERTISING bit temporarily so that the + * hci_update_random_address knows that it's safe to go ahead + * and write a new random address. The flag will be set back on + * as soon as the SET_ADV_ENABLE HCI command completes. + */ + clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + + /* Set require_privacy to false so that the remote device has a + * chance of identifying us. + */ + if (hci_update_random_address(req, false, &own_addr_type) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + cp.type = LE_ADV_DIRECT_IND; + cp.own_address_type = own_addr_type; + cp.direct_addr_type = conn->dst_type; + bacpy(&cp.direct_addr, &conn->dst); + cp.channel_map = hdev->le_adv_channel_map; + + hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); + + enable = 0x01; + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); + + conn->state = BT_CONNECT; +} + struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u8 auth_type) { @@ -614,9 +679,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, struct hci_request req; int err; - if (test_bit(HCI_ADVERTISING, &hdev->flags)) - return ERR_PTR(-ENOTSUPP); - /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing @@ -664,13 +726,20 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-ENOMEM); conn->dst_type = dst_type; - - conn->out = true; - conn->link_mode |= HCI_LM_MASTER; conn->sec_level = BT_SECURITY_LOW; conn->pending_sec_level = sec_level; conn->auth_type = auth_type; + hci_req_init(&req, hdev); + + if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { + hci_req_directed_advertising(&req, conn); + goto create_conn; + } + + conn->out = true; + conn->link_mode |= HCI_LM_MASTER; + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); if (params) { conn->le_conn_min_interval = params->conn_min_interval; @@ -680,8 +749,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->le_conn_max_interval = hdev->le_conn_max_interval; } - hci_req_init(&req, hdev); - /* If controller is scanning, we stop it since some controllers are * not able to scan and connect at the same time. Also set the * HCI_LE_SCAN_INTERRUPTED flag so that the command complete @@ -695,6 +762,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, hci_req_add_le_create_conn(&req, conn); +create_conn: err = hci_req_run(&req, create_le_conn_complete); if (err) { hci_conn_del(conn); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 1c6ffaa8902f..0a43cce9a914 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -34,6 +34,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> #include "smp.h" @@ -579,6 +580,62 @@ static int sniff_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, sniff_max_interval_set, "%llu\n"); +static int conn_info_min_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val > hdev->conn_info_max_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_min_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_min_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_min_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, + conn_info_min_age_set, "%llu\n"); + +static int conn_info_max_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val < hdev->conn_info_min_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_max_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_max_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_max_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, + conn_info_max_age_set, "%llu\n"); + static int identity_show(struct seq_file *f, void *p) { struct hci_dev *hdev = f->private; @@ -955,14 +1012,9 @@ static ssize_t le_auto_conn_write(struct file *file, const char __user *data, if (count < 3) return -EINVAL; - buf = kzalloc(count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, data, count)) { - err = -EFAULT; - goto done; - } + buf = memdup_user(data, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); if (memcmp(buf, "add", 3) == 0) { n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu %hhu", @@ -1759,6 +1811,11 @@ static int __hci_init(struct hci_dev *hdev) &blacklist_fops); debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, + &conn_info_min_age_fops); + debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, + &conn_info_max_age_fops); + if (lmp_bredr_capable(hdev)) { debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, &inquiry_cache_fops); @@ -1828,6 +1885,9 @@ static int __hci_init(struct hci_dev *hdev) &lowpan_debugfs_fops); debugfs_create_file("le_auto_conn", 0644, hdev->debugfs, hdev, &le_auto_conn_fops); + debugfs_create_u16("discov_interleaved_timeout", 0644, + hdev->debugfs, + &hdev->discov_interleaved_timeout); } return 0; @@ -2033,12 +2093,11 @@ bool hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data, hci_remove_remote_oob_data(hdev, &data->bdaddr); - if (ssp) - *ssp = data->ssp_mode; + *ssp = data->ssp_mode; ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr); if (ie) { - if (ie->data.ssp_mode && ssp) + if (ie->data.ssp_mode) *ssp = true; if (ie->name_state == NAME_NEEDED && @@ -3791,6 +3850,9 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_conn_max_interval = 0x0038; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; + hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; + hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE; + hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE; mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 682f33a38366..21e5913d12e0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -991,10 +991,25 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!sent) return; + if (status) + return; + hci_dev_lock(hdev); - if (!status) - mgmt_advertising(hdev, *sent); + /* If we're doing connection initation as peripheral. Set a + * timeout in case something goes wrong. + */ + if (*sent) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + queue_delayed_work(hdev->workqueue, + &conn->le_conn_timeout, + HCI_LE_CONN_TIMEOUT); + } + + mgmt_advertising(hdev, *sent); hci_dev_unlock(hdev); } @@ -1018,6 +1033,33 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } +static bool has_pending_adv_report(struct hci_dev *hdev) +{ + struct discovery_state *d = &hdev->discovery; + + return bacmp(&d->last_adv_addr, BDADDR_ANY); +} + +static void clear_pending_adv_report(struct hci_dev *hdev) +{ + struct discovery_state *d = &hdev->discovery; + + bacpy(&d->last_adv_addr, BDADDR_ANY); + d->last_adv_data_len = 0; +} + +static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 bdaddr_type, s8 rssi, u8 *data, u8 len) +{ + struct discovery_state *d = &hdev->discovery; + + bacpy(&d->last_adv_addr, bdaddr); + d->last_adv_addr_type = bdaddr_type; + d->last_adv_rssi = rssi; + memcpy(d->last_adv_data, data, len); + d->last_adv_data_len = len; +} + static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1036,9 +1078,25 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCAN_ENABLE: set_bit(HCI_LE_SCAN, &hdev->dev_flags); + if (hdev->le_scan_type == LE_SCAN_ACTIVE) + clear_pending_adv_report(hdev); break; case LE_SCAN_DISABLE: + /* We do this here instead of when setting DISCOVERY_STOPPED + * since the latter would potentially require waiting for + * inquiry to stop too. + */ + if (has_pending_adv_report(hdev)) { + struct discovery_state *d = &hdev->discovery; + + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, + d->last_adv_rssi, 0, 1, + d->last_adv_data, + d->last_adv_data_len, NULL, 0); + } + /* Cancel this timer so that we don't try to disable scanning * when it's already disabled. */ @@ -1187,6 +1245,59 @@ static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, amp_write_rem_assoc_continue(hdev, rp->phy_handle); } +static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_rssi *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) + conn->rssi = rp->rssi; + + hci_dev_unlock(hdev); +} + +static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_cp_read_tx_power *sent; + struct hci_rp_read_tx_power *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); + if (!sent) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (!conn) + goto unlock; + + switch (sent->type) { + case 0x00: + conn->tx_power = rp->tx_power; + break; + case 0x01: + conn->max_tx_power = rp->tx_power; + break; + } + +unlock: + hci_dev_unlock(hdev); +} + static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -1342,6 +1453,7 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev, * is requested. */ if (!hci_conn_ssp_enabled(conn) && !(conn->auth_type & 0x01) && + conn->pending_sec_level != BT_SECURITY_FIPS && conn->pending_sec_level != BT_SECURITY_HIGH && conn->pending_sec_level != BT_SECURITY_MEDIUM) return 0; @@ -1827,7 +1939,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) name_known = hci_inquiry_cache_update(hdev, &data, false, &ssp); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, 0, !name_known, ssp, NULL, - 0); + 0, NULL, 0); } hci_dev_unlock(hdev); @@ -2579,6 +2691,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_write_remote_amp_assoc(hdev, skb); break; + case HCI_OP_READ_RSSI: + hci_cc_read_rssi(hdev, skb); + break; + + case HCI_OP_READ_TX_POWER: + hci_cc_read_tx_power(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; @@ -2957,7 +3077,8 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) } if (key->type == HCI_LK_COMBINATION && key->pin_len < 16 && - conn->pending_sec_level == BT_SECURITY_HIGH) { + (conn->pending_sec_level == BT_SECURITY_HIGH || + conn->pending_sec_level == BT_SECURITY_FIPS)) { BT_DBG("%s ignoring key unauthenticated for high security", hdev->name); goto not_found; @@ -3102,7 +3223,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, false, &ssp); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - !name_known, ssp, NULL, 0); + !name_known, ssp, NULL, 0, NULL, 0); } } else { struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); @@ -3120,7 +3241,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, false, &ssp); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - !name_known, ssp, NULL, 0); + !name_known, ssp, NULL, 0, NULL, 0); } } @@ -3309,7 +3430,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, eir_len = eir_get_length(info->data, sizeof(info->data)); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, !name_known, - ssp, info->data, eir_len); + ssp, info->data, eir_len, NULL, 0); } hci_dev_unlock(hdev); @@ -3367,24 +3488,20 @@ unlock: static u8 hci_get_auth_req(struct hci_conn *conn) { - /* If remote requests dedicated bonding follow that lead */ - if (conn->remote_auth == HCI_AT_DEDICATED_BONDING || - conn->remote_auth == HCI_AT_DEDICATED_BONDING_MITM) { - /* If both remote and local IO capabilities allow MITM - * protection then require it, otherwise don't */ - if (conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT || - conn->io_capability == HCI_IO_NO_INPUT_OUTPUT) - return HCI_AT_DEDICATED_BONDING; - else - return HCI_AT_DEDICATED_BONDING_MITM; - } - /* If remote requests no-bonding follow that lead */ if (conn->remote_auth == HCI_AT_NO_BONDING || conn->remote_auth == HCI_AT_NO_BONDING_MITM) return conn->remote_auth | (conn->auth_type & 0x01); - return conn->auth_type; + /* If both remote and local have enough IO capabilities, require + * MITM protection + */ + if (conn->remote_cap != HCI_IO_NO_INPUT_OUTPUT && + conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) + return conn->remote_auth | 0x01; + + /* No MITM protection possible so ignore remote requirement */ + return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01); } static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3414,8 +3531,21 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) * to DisplayYesNo as it is not supported by BT spec. */ cp.capability = (conn->io_capability == 0x04) ? HCI_IO_DISPLAY_YESNO : conn->io_capability; - conn->auth_type = hci_get_auth_req(conn); - cp.authentication = conn->auth_type; + + /* If we are initiators, there is no remote information yet */ + if (conn->remote_auth == 0xff) { + cp.authentication = conn->auth_type; + + /* Request MITM protection if our IO caps allow it + * except for the no-bonding case + */ + if (conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && + cp.authentication != HCI_AT_NO_BONDING) + cp.authentication |= 0x01; + } else { + conn->auth_type = hci_get_auth_req(conn); + cp.authentication = conn->auth_type; + } if (hci_find_remote_oob_data(hdev, &conn->dst) && (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags))) @@ -3483,12 +3613,9 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, rem_mitm = (conn->remote_auth & 0x01); /* If we require MITM but the remote device can't provide that - * (it has NoInputNoOutput) then reject the confirmation - * request. The only exception is when we're dedicated bonding - * initiators (connect_cfm_cb set) since then we always have the MITM - * bit set. */ - if (!conn->connect_cfm_cb && loc_mitm && - conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) { + * (it has NoInputNoOutput) then reject the confirmation request + */ + if (loc_mitm && conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) { BT_DBG("Rejecting request: remote device can't provide MITM"); hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY, sizeof(ev->bdaddr), &ev->bdaddr); @@ -3846,17 +3973,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->dst_type = ev->bdaddr_type; - /* The advertising parameters for own address type - * define which source address and source address - * type this connections has. - */ - if (bacmp(&conn->src, BDADDR_ANY)) { - conn->src_type = ADDR_LE_DEV_PUBLIC; - } else { - bacpy(&conn->src, &hdev->static_addr); - conn->src_type = ADDR_LE_DEV_RANDOM; - } - if (ev->role == LE_CONN_ROLE_MASTER) { conn->out = true; conn->link_mode |= HCI_LM_MASTER; @@ -3881,27 +3997,24 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) &conn->init_addr, &conn->init_addr_type); } - } else { - /* Set the responder (our side) address type based on - * the advertising address type. - */ - conn->resp_addr_type = hdev->adv_addr_type; - if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) - bacpy(&conn->resp_addr, &hdev->random_addr); - else - bacpy(&conn->resp_addr, &hdev->bdaddr); - - conn->init_addr_type = ev->bdaddr_type; - bacpy(&conn->init_addr, &ev->bdaddr); } } else { cancel_delayed_work(&conn->le_conn_timeout); } - /* Ensure that the hci_conn contains the identity address type - * regardless of which address the connection was made with. - */ - hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + if (!conn->out) { + /* Set the responder (our side) address type based on + * the advertising address type. + */ + conn->resp_addr_type = hdev->adv_addr_type; + if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) + bacpy(&conn->resp_addr, &hdev->random_addr); + else + bacpy(&conn->resp_addr, &hdev->bdaddr); + + conn->init_addr_type = ev->bdaddr_type; + bacpy(&conn->init_addr, &ev->bdaddr); + } /* Lookup the identity address from the stored connection * address and address type. @@ -3981,25 +4094,97 @@ static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, } } +static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, + u8 bdaddr_type, s8 rssi, u8 *data, u8 len) +{ + struct discovery_state *d = &hdev->discovery; + bool match; + + /* Passive scanning shouldn't trigger any device found events */ + if (hdev->le_scan_type == LE_SCAN_PASSIVE) { + if (type == LE_ADV_IND || type == LE_ADV_DIRECT_IND) + check_pending_le_conn(hdev, bdaddr, bdaddr_type); + return; + } + + /* If there's nothing pending either store the data from this + * event or send an immediate device found event if the data + * should not be stored for later. + */ + if (!has_pending_adv_report(hdev)) { + /* If the report will trigger a SCAN_REQ store it for + * later merging. + */ + if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + store_pending_adv_report(hdev, bdaddr, bdaddr_type, + rssi, data, len); + return; + } + + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, 0, 1, data, len, NULL, 0); + return; + } + + /* Check if the pending report is for the same device as the new one */ + match = (!bacmp(bdaddr, &d->last_adv_addr) && + bdaddr_type == d->last_adv_addr_type); + + /* If the pending data doesn't match this report or this isn't a + * scan response (e.g. we got a duplicate ADV_IND) then force + * sending of the pending data. + */ + if (type != LE_ADV_SCAN_RSP || !match) { + /* Send out whatever is in the cache, but skip duplicates */ + if (!match) + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, + d->last_adv_rssi, 0, 1, + d->last_adv_data, + d->last_adv_data_len, NULL, 0); + + /* If the new report will trigger a SCAN_REQ store it for + * later merging. + */ + if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + store_pending_adv_report(hdev, bdaddr, bdaddr_type, + rssi, data, len); + return; + } + + /* The advertising reports cannot be merged, so clear + * the pending report and send out a device found event. + */ + clear_pending_adv_report(hdev); + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, 0, 1, data, len, NULL, 0); + return; + } + + /* If we get here we've got a pending ADV_IND or ADV_SCAN_IND and + * the new event is a SCAN_RSP. We can therefore proceed with + * sending a merged device found event. + */ + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, rssi, 0, 1, data, len, + d->last_adv_data, d->last_adv_data_len); + clear_pending_adv_report(hdev); +} + static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { u8 num_reports = skb->data[0]; void *ptr = &skb->data[1]; - s8 rssi; hci_dev_lock(hdev); while (num_reports--) { struct hci_ev_le_advertising_info *ev = ptr; - - if (ev->evt_type == LE_ADV_IND || - ev->evt_type == LE_ADV_DIRECT_IND) - check_pending_le_conn(hdev, &ev->bdaddr, - ev->bdaddr_type); + s8 rssi; rssi = ev->data[ev->length]; - mgmt_device_found(hdev, &ev->bdaddr, LE_LINK, ev->bdaddr_type, - NULL, rssi, 0, 1, ev->data, ev->length); + process_adv_report(hdev, ev->evt_type, &ev->bdaddr, + ev->bdaddr_type, rssi, ev->data, ev->length); ptr += sizeof(*ev) + ev->length + 1; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b9a418e578e0..80d25c150a65 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -143,7 +143,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (!skb_copy) { /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true); if (!skb_copy) continue; @@ -247,8 +247,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) struct hci_mon_hdr *hdr; /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, + GFP_ATOMIC, true); if (!skb_copy) continue; @@ -524,16 +524,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, case HCISETRAW: if (!capable(CAP_NET_ADMIN)) return -EPERM; - - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) - return -EPERM; - - if (arg) - set_bit(HCI_RAW, &hdev->flags); - else - clear_bit(HCI_RAW, &hdev->flags); - - return 0; + return -EOPNOTSUPP; case HCIGETCONNINFO: return hci_get_conn_info(hdev, (void __user *) arg); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dc4d301d3a72..6eabbe05fe54 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -471,8 +471,14 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) chan->max_tx = L2CAP_DEFAULT_MAX_TX; chan->tx_win = L2CAP_DEFAULT_TX_WINDOW; chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; + chan->remote_max_tx = chan->max_tx; + chan->remote_tx_win = chan->tx_win; chan->ack_win = L2CAP_DEFAULT_TX_WINDOW; chan->sec_level = BT_SECURITY_LOW; + chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; + chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; + chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; + chan->conf_state = 0; set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ef5e5b04f34f..ade3fb4c23bc 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1180,13 +1180,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { BT_DBG("backlog full %d", parent->sk_ack_backlog); + release_sock(parent); return NULL; } sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); - if (!sk) + if (!sk) { + release_sock(parent); return NULL; + } bt_sock_reclassify_lock(sk, BTPROTO_L2CAP); diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index b3fbc73516c4..941ad7530eda 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -58,6 +58,7 @@ int bt_to_errno(__u16 code) return EIO; case 0x04: + case 0x3c: return EHOSTDOWN; case 0x05: diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d2d4e0d5aed0..0fce54412ffd 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -29,12 +29,13 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> #include "smp.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 5 +#define MGMT_REVISION 6 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -83,6 +84,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_DEBUG_KEYS, MGMT_OP_SET_PRIVACY, MGMT_OP_LOAD_IRKS, + MGMT_OP_GET_CONN_INFO, }; static const u16 mgmt_events[] = { @@ -2850,10 +2852,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, } sec_level = BT_SECURITY_MEDIUM; - if (cp->io_cap == 0x03) - auth_type = HCI_AT_DEDICATED_BONDING; - else - auth_type = HCI_AT_DEDICATED_BONDING_MITM; + auth_type = HCI_AT_DEDICATED_BONDING; if (cp->addr.type == BDADDR_BREDR) { conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level, @@ -3351,6 +3350,8 @@ static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) static void start_discovery_complete(struct hci_dev *hdev, u8 status) { + unsigned long timeout = 0; + BT_DBG("status %d", status); if (status) { @@ -3366,13 +3367,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) switch (hdev->discovery.type) { case DISCOV_TYPE_LE: - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - DISCOV_LE_TIMEOUT); + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); break; case DISCOV_TYPE_INTERLEAVED: - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - DISCOV_INTERLEAVED_TIMEOUT); + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); break; case DISCOV_TYPE_BREDR: @@ -3381,6 +3380,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) default: BT_ERR("Invalid discovery type %d", hdev->discovery.type); } + + if (!timeout) + return; + + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout); } static int start_discovery(struct sock *sk, struct hci_dev *hdev, @@ -4530,7 +4534,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; - u8 type, addr_type; + u8 type, addr_type, authenticated; if (key->addr.type == BDADDR_LE_PUBLIC) addr_type = ADDR_LE_DEV_PUBLIC; @@ -4542,8 +4546,19 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, else type = HCI_SMP_LTK_SLAVE; + switch (key->type) { + case MGMT_LTK_UNAUTHENTICATED: + authenticated = 0x00; + break; + case MGMT_LTK_AUTHENTICATED: + authenticated = 0x01; + break; + default: + continue; + } + hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type, - key->type, key->val, key->enc_size, key->ediv, + authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -4555,6 +4570,218 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } +struct cmd_conn_lookup { + struct hci_conn *conn; + bool valid_tx_power; + u8 mgmt_status; +}; + +static void get_conn_info_complete(struct pending_cmd *cmd, void *data) +{ + struct cmd_conn_lookup *match = data; + struct mgmt_cp_get_conn_info *cp; + struct mgmt_rp_get_conn_info rp; + struct hci_conn *conn = cmd->user_data; + + if (conn != match->conn) + return; + + cp = (struct mgmt_cp_get_conn_info *) cmd->param; + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!match->mgmt_status) { + rp.rssi = conn->rssi; + + if (match->valid_tx_power) { + rp.tx_power = conn->tx_power; + rp.max_tx_power = conn->max_tx_power; + } else { + rp.tx_power = HCI_TX_POWER_INVALID; + rp.max_tx_power = HCI_TX_POWER_INVALID; + } + } + + cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, + match->mgmt_status, &rp, sizeof(rp)); + + hci_conn_drop(conn); + + mgmt_pending_remove(cmd); +} + +static void conn_info_refresh_complete(struct hci_dev *hdev, u8 status) +{ + struct hci_cp_read_rssi *cp; + struct hci_conn *conn; + struct cmd_conn_lookup match; + u16 handle; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + /* TX power data is valid in case request completed successfully, + * otherwise we assume it's not valid. At the moment we assume that + * either both or none of current and max values are valid to keep code + * simple. + */ + match.valid_tx_power = !status; + + /* Commands sent in request are either Read RSSI or Read Transmit Power + * Level so we check which one was last sent to retrieve connection + * handle. Both commands have handle as first parameter so it's safe to + * cast data on the same command struct. + * + * First command sent is always Read RSSI and we fail only if it fails. + * In other case we simply override error to indicate success as we + * already remembered if TX power value is actually valid. + */ + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI); + if (!cp) { + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); + status = 0; + } + + if (!cp) { + BT_ERR("invalid sent_cmd in response"); + goto unlock; + } + + handle = __le16_to_cpu(cp->handle); + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) { + BT_ERR("unknown handle (%d) in response", handle); + goto unlock; + } + + match.conn = conn; + match.mgmt_status = mgmt_status(status); + + /* Cache refresh is complete, now reply for mgmt request for given + * connection only. + */ + mgmt_pending_foreach(MGMT_OP_GET_CONN_INFO, hdev, + get_conn_info_complete, &match); + +unlock: + hci_dev_unlock(hdev); +} + +static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_get_conn_info *cp = data; + struct mgmt_rp_get_conn_info rp; + struct hci_conn *conn; + unsigned long conn_info_age; + int err = 0; + + BT_DBG("%s", hdev->name); + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + hci_dev_lock(hdev); + + if (!hdev_is_powered(hdev)) { + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + goto unlock; + } + + if (cp->addr.type == BDADDR_BREDR) + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, + &cp->addr.bdaddr); + else + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); + + if (!conn || conn->state != BT_CONNECTED) { + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); + goto unlock; + } + + /* To avoid client trying to guess when to poll again for information we + * calculate conn info age as random value between min/max set in hdev. + */ + conn_info_age = hdev->conn_info_min_age + + prandom_u32_max(hdev->conn_info_max_age - + hdev->conn_info_min_age); + + /* Query controller to refresh cached values if they are too old or were + * never read. + */ + if (time_after(jiffies, conn->conn_info_timestamp + + msecs_to_jiffies(conn_info_age)) || + !conn->conn_info_timestamp) { + struct hci_request req; + struct hci_cp_read_tx_power req_txp_cp; + struct hci_cp_read_rssi req_rssi_cp; + struct pending_cmd *cmd; + + hci_req_init(&req, hdev); + req_rssi_cp.handle = cpu_to_le16(conn->handle); + hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp), + &req_rssi_cp); + + /* For LE links TX power does not change thus we don't need to + * query for it once value is known. + */ + if (!bdaddr_type_is_le(cp->addr.type) || + conn->tx_power == HCI_TX_POWER_INVALID) { + req_txp_cp.handle = cpu_to_le16(conn->handle); + req_txp_cp.type = 0x00; + hci_req_add(&req, HCI_OP_READ_TX_POWER, + sizeof(req_txp_cp), &req_txp_cp); + } + + /* Max TX power needs to be read only once per connection */ + if (conn->max_tx_power == HCI_TX_POWER_INVALID) { + req_txp_cp.handle = cpu_to_le16(conn->handle); + req_txp_cp.type = 0x01; + hci_req_add(&req, HCI_OP_READ_TX_POWER, + sizeof(req_txp_cp), &req_txp_cp); + } + + err = hci_req_run(&req, conn_info_refresh_complete); + if (err < 0) + goto unlock; + + cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev, + data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_conn_hold(conn); + cmd->user_data = conn; + + conn->conn_info_timestamp = jiffies; + } else { + /* Cache is valid, just reply with values cached in hci_conn */ + rp.rssi = conn->rssi; + rp.tx_power = conn->tx_power; + rp.max_tx_power = conn->max_tx_power; + + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + } + +unlock: + hci_dev_unlock(hdev); + return err; +} + static const struct mgmt_handler { int (*func) (struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len); @@ -4610,6 +4837,7 @@ static const struct mgmt_handler { { set_debug_keys, false, MGMT_SETTING_SIZE }, { set_privacy, false, MGMT_SET_PRIVACY_SIZE }, { load_irks, true, MGMT_LOAD_IRKS_SIZE }, + { get_conn_info, false, MGMT_GET_CONN_INFO_SIZE }, }; @@ -5005,6 +5233,14 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL); } +static u8 mgmt_ltk_type(struct smp_ltk *ltk) +{ + if (ltk->authenticated) + return MGMT_LTK_AUTHENTICATED; + + return MGMT_LTK_UNAUTHENTICATED; +} + void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) { struct mgmt_ev_new_long_term_key ev; @@ -5030,7 +5266,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) bacpy(&ev.key.addr.bdaddr, &key->bdaddr); ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); - ev.key.type = key->authenticated; + ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; ev.key.rand = key->rand; @@ -5668,8 +5904,9 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, } void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8 - ssp, u8 *eir, u16 eir_len) + u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, + u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp, + u8 scan_rsp_len) { char buf[512]; struct mgmt_ev_device_found *ev = (void *) buf; @@ -5679,8 +5916,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, if (!hci_discovery_active(hdev)) return; - /* Leave 5 bytes for a potential CoD field */ - if (sizeof(*ev) + eir_len + 5 > sizeof(buf)) + /* Make sure that the buffer is big enough. The 5 extra bytes + * are for the potential CoD field. + */ + if (sizeof(*ev) + eir_len + scan_rsp_len + 5 > sizeof(buf)) return; memset(buf, 0, sizeof(buf)); @@ -5707,8 +5946,11 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); - ev->eir_len = cpu_to_le16(eir_len); - ev_size = sizeof(*ev) + eir_len; + if (scan_rsp_len > 0) + memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len); + + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); + ev_size = sizeof(*ev) + eir_len + scan_rsp_len; mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, ev_size, NULL); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index cf620260affa..754b6fe4f742 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -307,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d); skb_queue_head_init(&d->tx_queue); - spin_lock_init(&d->lock); + mutex_init(&d->lock); atomic_set(&d->refcnt, 1); rfcomm_dlc_clear_state(d); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 403ec09f480a..8e385a0ae60e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -70,7 +70,7 @@ struct rfcomm_dev { }; static LIST_HEAD(rfcomm_dev_list); -static DEFINE_SPINLOCK(rfcomm_dev_lock); +static DEFINE_MUTEX(rfcomm_dev_lock); static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb); static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err); @@ -96,9 +96,9 @@ static void rfcomm_dev_destruct(struct tty_port *port) if (dev->tty_dev) tty_unregister_device(rfcomm_tty_driver, dev->id); - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); list_del(&dev->list); - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); kfree(dev); @@ -161,14 +161,14 @@ static struct rfcomm_dev *rfcomm_dev_get(int id) { struct rfcomm_dev *dev; - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); dev = __rfcomm_dev_lookup(id); if (dev && !tty_port_get(&dev->port)) dev = NULL; - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); return dev; } @@ -224,7 +224,7 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req, if (!dev) return ERR_PTR(-ENOMEM); - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); if (req->dev_id < 0) { dev->id = 0; @@ -305,11 +305,11 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req, holds reference to this module. */ __module_get(THIS_MODULE); - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); return dev; out: - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); kfree(dev); return ERR_PTR(err); } @@ -524,7 +524,7 @@ static int rfcomm_get_dev_list(void __user *arg) di = dl->dev_info; - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); list_for_each_entry(dev, &rfcomm_dev_list, list) { if (!tty_port_get(&dev->port)) @@ -540,7 +540,7 @@ static int rfcomm_get_dev_list(void __user *arg) break; } - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); dl->dev_num = n; size = sizeof(*dl) + n * sizeof(*di); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index dfb4e1161c10..3d1cc164557d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -35,6 +35,33 @@ #define AUTH_REQ_MASK 0x07 +#define SMP_FLAG_TK_VALID 1 +#define SMP_FLAG_CFM_PENDING 2 +#define SMP_FLAG_MITM_AUTH 3 +#define SMP_FLAG_COMPLETE 4 +#define SMP_FLAG_INITIATOR 5 + +struct smp_chan { + struct l2cap_conn *conn; + u8 preq[7]; /* SMP Pairing Request */ + u8 prsp[7]; /* SMP Pairing Response */ + u8 prnd[16]; /* SMP Pairing Random (local) */ + u8 rrnd[16]; /* SMP Pairing Random (remote) */ + u8 pcnf[16]; /* SMP Pairing Confirm */ + u8 tk[16]; /* SMP Temporary Key */ + u8 enc_key_size; + u8 remote_key_dist; + bdaddr_t id_addr; + u8 id_addr_type; + u8 irk[16]; + struct smp_csrk *csrk; + struct smp_csrk *slave_csrk; + struct smp_ltk *ltk; + struct smp_ltk *slave_ltk; + struct smp_irk *remote_irk; + unsigned long flags; +}; + static inline void swap128(const u8 src[16], u8 dst[16]) { int i; @@ -369,7 +396,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, /* Initialize key for JUST WORKS */ memset(smp->tk, 0, sizeof(smp->tk)); - clear_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + clear_bit(SMP_FLAG_TK_VALID, &smp->flags); BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io); @@ -388,19 +415,18 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, method = JUST_WORKS; /* Don't confirm locally initiated pairing attempts */ - if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, - &smp->smp_flags)) + if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, &smp->flags)) method = JUST_WORKS; /* If Just Works, Continue with Zero TK */ if (method == JUST_WORKS) { - set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + set_bit(SMP_FLAG_TK_VALID, &smp->flags); return 0; } /* Not Just Works/Confirm results in MITM Authentication */ if (method != JUST_CFM) - set_bit(SMP_FLAG_MITM_AUTH, &smp->smp_flags); + set_bit(SMP_FLAG_MITM_AUTH, &smp->flags); /* If both devices have Keyoard-Display I/O, the master * Confirms and the slave Enters the passkey. @@ -419,7 +445,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, passkey %= 1000000; put_unaligned_le32(passkey, smp->tk); BT_DBG("PassKey: %d", passkey); - set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + set_bit(SMP_FLAG_TK_VALID, &smp->flags); } hci_dev_lock(hcon->hdev); @@ -441,15 +467,13 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, return ret; } -static void confirm_work(struct work_struct *work) +static u8 smp_confirm(struct smp_chan *smp) { - struct smp_chan *smp = container_of(work, struct smp_chan, confirm); struct l2cap_conn *conn = smp->conn; struct hci_dev *hdev = conn->hcon->hdev; struct crypto_blkcipher *tfm = hdev->tfm_aes; struct smp_cmd_pairing_confirm cp; int ret; - u8 reason; BT_DBG("conn %p", conn); @@ -463,35 +487,27 @@ static void confirm_work(struct work_struct *work) hci_dev_unlock(hdev); - if (ret) { - reason = SMP_UNSPECIFIED; - goto error; - } + if (ret) + return SMP_UNSPECIFIED; - clear_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + clear_bit(SMP_FLAG_CFM_PENDING, &smp->flags); smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); - return; - -error: - smp_failure(conn, reason); + return 0; } -static void random_work(struct work_struct *work) +static u8 smp_random(struct smp_chan *smp) { - struct smp_chan *smp = container_of(work, struct smp_chan, random); struct l2cap_conn *conn = smp->conn; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; struct crypto_blkcipher *tfm = hdev->tfm_aes; - u8 reason, confirm[16]; + u8 confirm[16]; int ret; - if (IS_ERR_OR_NULL(tfm)) { - reason = SMP_UNSPECIFIED; - goto error; - } + if (IS_ERR_OR_NULL(tfm)) + return SMP_UNSPECIFIED; BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); @@ -504,15 +520,12 @@ static void random_work(struct work_struct *work) hci_dev_unlock(hdev); - if (ret) { - reason = SMP_UNSPECIFIED; - goto error; - } + if (ret) + return SMP_UNSPECIFIED; if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) { BT_ERR("Pairing failed (confirmation values mismatch)"); - reason = SMP_CONFIRM_FAILED; - goto error; + return SMP_CONFIRM_FAILED; } if (hcon->out) { @@ -525,10 +538,8 @@ static void random_work(struct work_struct *work) memset(stk + smp->enc_key_size, 0, SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size); - if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) { - reason = SMP_UNSPECIFIED; - goto error; - } + if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) + return SMP_UNSPECIFIED; hci_le_start_enc(hcon, ediv, rand, stk); hcon->enc_key_size = smp->enc_key_size; @@ -550,10 +561,7 @@ static void random_work(struct work_struct *work) ediv, rand); } - return; - -error: - smp_failure(conn, reason); + return 0; } static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) @@ -564,9 +572,6 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) if (!smp) return NULL; - INIT_WORK(&smp->confirm, confirm_work); - INIT_WORK(&smp->random, random_work); - smp->conn = conn; conn->smp_chan = smp; conn->hcon->smp_conn = conn; @@ -583,7 +588,7 @@ void smp_chan_destroy(struct l2cap_conn *conn) BUG_ON(!smp); - complete = test_bit(SMP_FLAG_COMPLETE, &smp->smp_flags); + complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags); mgmt_smp_complete(conn->hcon, complete); kfree(smp->csrk); @@ -634,7 +639,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) put_unaligned_le32(value, smp->tk); /* Fall Through */ case MGMT_OP_USER_CONFIRM_REPLY: - set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + set_bit(SMP_FLAG_TK_VALID, &smp->flags); break; case MGMT_OP_USER_PASSKEY_NEG_REPLY: case MGMT_OP_USER_CONFIRM_NEG_REPLY: @@ -646,8 +651,11 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) } /* If it is our turn to send Pairing Confirm, do so now */ - if (test_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags)) - queue_work(hcon->hdev->workqueue, &smp->confirm); + if (test_bit(SMP_FLAG_CFM_PENDING, &smp->flags)) { + u8 rsp = smp_confirm(smp); + if (rsp) + smp_failure(conn, rsp); + } return 0; } @@ -656,14 +664,13 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_pairing rsp, *req = (void *) skb->data; struct smp_chan *smp; - u8 key_size; - u8 auth = SMP_AUTH_NONE; + u8 key_size, auth; int ret; BT_DBG("conn %p", conn); if (skb->len < sizeof(*req)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; if (conn->hcon->link_mode & HCI_LM_MASTER) return SMP_CMD_NOTSUPP; @@ -681,8 +688,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) skb_pull(skb, sizeof(*req)); /* We didn't start the pairing, so match remote */ - if (req->auth_req & SMP_AUTH_BONDING) - auth = req->auth_req; + auth = req->auth_req; conn->hcon->pending_sec_level = authreq_to_seclevel(auth); @@ -704,7 +710,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (ret) return SMP_UNSPECIFIED; - clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags); + clear_bit(SMP_FLAG_INITIATOR, &smp->flags); return 0; } @@ -713,14 +719,13 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_pairing *req, *rsp = (void *) skb->data; struct smp_chan *smp = conn->smp_chan; - struct hci_dev *hdev = conn->hcon->hdev; u8 key_size, auth = SMP_AUTH_NONE; int ret; BT_DBG("conn %p", conn); if (skb->len < sizeof(*rsp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; if (!(conn->hcon->link_mode & HCI_LM_MASTER)) return SMP_CMD_NOTSUPP; @@ -753,11 +758,11 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (ret) return SMP_UNSPECIFIED; - set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); /* Can't compose response until we have been confirmed */ - if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags)) - queue_work(hdev->workqueue, &smp->confirm); + if (test_bit(SMP_FLAG_TK_VALID, &smp->flags)) + return smp_confirm(smp); return 0; } @@ -765,12 +770,11 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_chan *smp = conn->smp_chan; - struct hci_dev *hdev = conn->hcon->hdev; BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); if (skb->len < sizeof(smp->pcnf)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf)); skb_pull(skb, sizeof(smp->pcnf)); @@ -778,10 +782,10 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) if (conn->hcon->out) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); - else if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags)) - queue_work(hdev->workqueue, &smp->confirm); + else if (test_bit(SMP_FLAG_TK_VALID, &smp->flags)) + return smp_confirm(smp); else - set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); return 0; } @@ -789,19 +793,16 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_chan *smp = conn->smp_chan; - struct hci_dev *hdev = conn->hcon->hdev; BT_DBG("conn %p", conn); if (skb->len < sizeof(smp->rrnd)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; memcpy(smp->rrnd, skb->data, sizeof(smp->rrnd)); skb_pull(skb, sizeof(smp->rrnd)); - queue_work(hdev->workqueue, &smp->random); - - return 0; + return smp_random(smp); } static u8 smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level) @@ -836,7 +837,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; if (!(conn->hcon->link_mode & HCI_LM_MASTER)) return SMP_CMD_NOTSUPP; @@ -861,7 +862,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags); + clear_bit(SMP_FLAG_INITIATOR, &smp->flags); return 0; } @@ -908,10 +909,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq = seclevel_to_authreq(sec_level); - /* hcon->auth_type is set by pair_device in mgmt.c. If the MITM - * flag is set we should also set it for the SMP request. + /* Require MITM if IO Capability allows or the security level + * requires it. */ - if ((hcon->auth_type & 0x01)) + if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT || + sec_level > BT_SECURITY_MEDIUM) authreq |= SMP_AUTH_MITM; if (hcon->link_mode & HCI_LM_MASTER) { @@ -928,7 +930,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); } - set_bit(SMP_FLAG_INITIATOR, &smp->smp_flags); + set_bit(SMP_FLAG_INITIATOR, &smp->flags); done: hcon->pending_sec_level = sec_level; @@ -944,7 +946,7 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY)) @@ -969,7 +971,7 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY)) @@ -1001,7 +1003,7 @@ static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG(""); if (skb->len < sizeof(*info)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ID_KEY)) @@ -1025,7 +1027,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, BT_DBG(""); if (skb->len < sizeof(*info)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ID_KEY)) @@ -1075,7 +1077,7 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_SIGN)) @@ -1358,7 +1360,7 @@ int smp_distribute_keys(struct l2cap_conn *conn) clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags); cancel_delayed_work_sync(&conn->security_timer); - set_bit(SMP_FLAG_COMPLETE, &smp->smp_flags); + set_bit(SMP_FLAG_COMPLETE, &smp->flags); smp_notify_keys(conn); smp_chan_destroy(conn); diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 1277147a9150..5a8dc36460a1 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -111,39 +111,11 @@ struct smp_cmd_security_req { #define SMP_CMD_NOTSUPP 0x07 #define SMP_UNSPECIFIED 0x08 #define SMP_REPEATED_ATTEMPTS 0x09 +#define SMP_INVALID_PARAMS 0x0a #define SMP_MIN_ENC_KEY_SIZE 7 #define SMP_MAX_ENC_KEY_SIZE 16 -#define SMP_FLAG_TK_VALID 1 -#define SMP_FLAG_CFM_PENDING 2 -#define SMP_FLAG_MITM_AUTH 3 -#define SMP_FLAG_COMPLETE 4 -#define SMP_FLAG_INITIATOR 5 - -struct smp_chan { - struct l2cap_conn *conn; - u8 preq[7]; /* SMP Pairing Request */ - u8 prsp[7]; /* SMP Pairing Response */ - u8 prnd[16]; /* SMP Pairing Random (local) */ - u8 rrnd[16]; /* SMP Pairing Random (remote) */ - u8 pcnf[16]; /* SMP Pairing Confirm */ - u8 tk[16]; /* SMP Temporary Key */ - u8 enc_key_size; - u8 remote_key_dist; - bdaddr_t id_addr; - u8 id_addr_type; - u8 irk[16]; - struct smp_csrk *csrk; - struct smp_csrk *slave_csrk; - struct smp_ltk *ltk; - struct smp_ltk *slave_ltk; - struct smp_irk *remote_irk; - unsigned long smp_flags; - struct work_struct confirm; - struct work_struct random; -}; - /* SMP Commands */ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); diff --git a/net/bridge/Makefile b/net/bridge/Makefile index e85498b2f166..8590b942bffa 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ - br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \ + br_ioctl.o br_stp.o br_stp_bpdu.o \ br_stp_if.o br_stp_timer.o br_netlink.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o @@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o -obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ +obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br.c b/net/bridge/br.c index 19311aafcf5a..1a755a1e5410 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -22,6 +22,104 @@ #include "br_private.h" +/* + * Handle changes in state of network devices enslaved to a bridge. + * + * Note: don't care about up/down if bridge itself is down, because + * port state is checked when bridge is brought up. + */ +static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + bool changed_addr; + int err; + + /* register of bridge completed, add sysfs entries */ + if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { + br_sysfs_addbr(dev); + return NOTIFY_DONE; + } + + /* not a port of a bridge */ + p = br_port_get_rtnl(dev); + if (!p) + return NOTIFY_DONE; + + br = p->br; + + switch (event) { + case NETDEV_CHANGEMTU: + dev_set_mtu(br->dev, br_min_mtu(br)); + break; + + case NETDEV_CHANGEADDR: + spin_lock_bh(&br->lock); + br_fdb_changeaddr(p, dev->dev_addr); + changed_addr = br_stp_recalculate_bridge_id(br); + spin_unlock_bh(&br->lock); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + break; + + case NETDEV_CHANGE: + br_port_carrier_check(p); + break; + + case NETDEV_FEAT_CHANGE: + netdev_update_features(br->dev); + break; + + case NETDEV_DOWN: + spin_lock_bh(&br->lock); + if (br->dev->flags & IFF_UP) + br_stp_disable_port(p); + spin_unlock_bh(&br->lock); + break; + + case NETDEV_UP: + if (netif_running(br->dev) && netif_oper_up(dev)) { + spin_lock_bh(&br->lock); + br_stp_enable_port(p); + spin_unlock_bh(&br->lock); + } + break; + + case NETDEV_UNREGISTER: + br_del_if(br, dev); + break; + + case NETDEV_CHANGENAME: + err = br_sysfs_renameif(p); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlaying device to change its type. */ + return NOTIFY_BAD; + + case NETDEV_RESEND_IGMP: + /* Propagate to master device */ + call_netdevice_notifiers(event, br->dev); + break; + } + + /* Events that may cause spanning tree to refresh */ + if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || + event == NETDEV_CHANGE || event == NETDEV_DOWN) + br_ifinfo_notify(RTM_NEWLINK, p); + + return NOTIFY_DONE; +} + +static struct notifier_block br_device_notifier = { + .notifier_call = br_device_event +}; + static void __net_exit br_net_exit(struct net *net) { struct net_device *dev; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 3e2da2cb72db..568cccd39a3d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -112,6 +112,12 @@ static void br_dev_set_multicast_list(struct net_device *dev) { } +static void br_dev_change_rx_flags(struct net_device *dev, int change) +{ + if (change & IFF_PROMISC) + br_manage_promisc(netdev_priv(dev)); +} + static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -309,6 +315,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, .ndo_set_rx_mode = br_dev_set_multicast_list, + .ndo_change_rx_flags = br_dev_change_rx_flags, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -348,14 +355,15 @@ void br_dev_setup(struct net_device *dev) dev->netdev_ops = &br_netdev_ops; dev->destructor = br_dev_free; - SET_ETHTOOL_OPS(dev, &br_ethtool_ops); + dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | - NETIF_F_HW_VLAN_CTAG_TX; - dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; dev->vlan_features = COMMON_FEATURES; br->dev = dev; @@ -370,6 +378,7 @@ void br_dev_setup(struct net_device *dev) br->stp_enabled = BR_NO_STP; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; br->designated_root = br->bridge_id; br->bridge_max_age = br->max_age = 20 * HZ; @@ -380,4 +389,5 @@ void br_dev_setup(struct net_device *dev) br_netfilter_rtable_init(br); br_stp_timer_init(br); br_multicast_init(br); + br_vlan_init(br); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 474d36f93342..b524c36c1273 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -85,8 +85,58 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } +/* When a static FDB entry is added, the mac address from the entry is + * added to the bridge private HW address list and all required ports + * are then updated with the new information. + * Called under RTNL. + */ +static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr) +{ + int err; + struct net_bridge_port *p, *tmp; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) { + err = dev_uc_add(p->dev, addr); + if (err) + goto undo; + } + } + + return; +undo: + list_for_each_entry(tmp, &br->port_list, list) { + if (tmp == p) + break; + if (!br_promisc_port(tmp)) + dev_uc_del(tmp->dev, addr); + } +} + +/* When a static FDB entry is deleted, the HW address from that entry is + * also removed from the bridge private HW address list and updates all + * the ports with needed information. + * Called under RTNL. + */ +static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr) +{ + struct net_bridge_port *p; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) + dev_uc_del(p->dev, addr); + } +} + static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) { + if (f->is_static) + fdb_del_hw(br, f->addr.addr); + hlist_del_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); @@ -466,6 +516,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return -ENOMEM; fdb->is_local = fdb->is_static = 1; + fdb_add_hw(br, addr); fdb_notify(br, fdb, RTM_NEWNEIGH); return 0; } @@ -571,6 +622,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) goto nla_put_failure; + if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) + goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); ci.ndm_confirmed = 0; ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); @@ -592,6 +645,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -684,13 +738,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, } if (fdb_to_nud(fdb) != state) { - if (state & NUD_PERMANENT) - fdb->is_local = fdb->is_static = 1; - else if (state & NUD_NOARP) { + if (state & NUD_PERMANENT) { + fdb->is_local = 1; + if (!fdb->is_static) { + fdb->is_static = 1; + fdb_add_hw(br, addr); + } + } else if (state & NUD_NOARP) { fdb->is_local = 0; - fdb->is_static = 1; - } else - fdb->is_local = fdb->is_static = 0; + if (!fdb->is_static) { + fdb->is_static = 1; + fdb_add_hw(br, addr); + } + } else { + fdb->is_local = 0; + if (fdb->is_static) { + fdb->is_static = 0; + fdb_del_hw(br, addr); + } + } modified = true; } @@ -880,3 +946,59 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], out: return err; } + +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *fdb, *tmp; + int i; + int err; + + ASSERT_RTNL(); + + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry(fdb, &br->hash[i], hlist) { + /* We only care for static entries */ + if (!fdb->is_static) + continue; + + err = dev_uc_add(p->dev, fdb->addr.addr); + if (err) + goto rollback; + } + } + return 0; + +rollback: + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry(tmp, &br->hash[i], hlist) { + /* If we reached the fdb that failed, we can stop */ + if (tmp == fdb) + break; + + /* We only care for static entries */ + if (!tmp->is_static) + continue; + + dev_uc_del(p->dev, tmp->addr.addr); + } + } + return err; +} + +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *fdb; + int i; + + ASSERT_RTNL(); + + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) { + /* We only care for static entries */ + if (!fdb->is_static) + continue; + + dev_uc_del(p->dev, fdb->addr.addr); + } + } +} diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 5262b8617eb9..3eca3fdf8fe1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -85,6 +85,111 @@ void br_port_carrier_check(struct net_bridge_port *p) spin_unlock_bh(&br->lock); } +static void br_port_set_promisc(struct net_bridge_port *p) +{ + int err = 0; + + if (br_promisc_port(p)) + return; + + err = dev_set_promiscuity(p->dev, 1); + if (err) + return; + + br_fdb_unsync_static(p->br, p); + p->flags |= BR_PROMISC; +} + +static void br_port_clear_promisc(struct net_bridge_port *p) +{ + int err; + + /* Check if the port is already non-promisc or if it doesn't + * support UNICAST filtering. Without unicast filtering support + * we'll end up re-enabling promisc mode anyway, so just check for + * it here. + */ + if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT)) + return; + + /* Since we'll be clearing the promisc mode, program the port + * first so that we don't have interruption in traffic. + */ + err = br_fdb_sync_static(p->br, p); + if (err) + return; + + dev_set_promiscuity(p->dev, -1); + p->flags &= ~BR_PROMISC; +} + +/* When a port is added or removed or when certain port flags + * change, this function is called to automatically manage + * promiscuity setting of all the bridge ports. We are always called + * under RTNL so can skip using rcu primitives. + */ +void br_manage_promisc(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool set_all = false; + + /* If vlan filtering is disabled or bridge interface is placed + * into promiscuous mode, place all ports in promiscuous mode. + */ + if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br)) + set_all = true; + + list_for_each_entry(p, &br->port_list, list) { + if (set_all) { + br_port_set_promisc(p); + } else { + /* If the number of auto-ports is <= 1, then all other + * ports will have their output configuration + * statically specified through fdbs. Since ingress + * on the auto-port becomes forwarding/egress to other + * ports and egress configuration is statically known, + * we can say that ingress configuration of the + * auto-port is also statically known. + * This lets us disable promiscuous mode and write + * this config to hw. + */ + if (br->auto_cnt == 0 || + (br->auto_cnt == 1 && br_auto_port(p))) + br_port_clear_promisc(p); + else + br_port_set_promisc(p); + } + } +} + +static void nbp_update_port_count(struct net_bridge *br) +{ + struct net_bridge_port *p; + u32 cnt = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (br_auto_port(p)) + cnt++; + } + if (br->auto_cnt != cnt) { + br->auto_cnt = cnt; + br_manage_promisc(br); + } +} + +static void nbp_delete_promisc(struct net_bridge_port *p) +{ + /* If port is currently promiscuous, unset promiscuity. + * Otherwise, it is a static port so remove all addresses + * from it. + */ + dev_set_allmulti(p->dev, -1); + if (br_promisc_port(p)) + dev_set_promiscuity(p->dev, -1); + else + br_fdb_unsync_static(p->br, p); +} + static void release_nbp(struct kobject *kobj) { struct net_bridge_port *p @@ -133,7 +238,7 @@ static void del_nbp(struct net_bridge_port *p) sysfs_remove_link(br->ifobj, p->dev->name); - dev_set_promiscuity(dev, -1); + nbp_delete_promisc(p); spin_lock_bh(&br->lock); br_stp_disable_port(p); @@ -141,10 +246,11 @@ static void del_nbp(struct net_bridge_port *p) br_ifinfo_notify(RTM_DELLINK, p); + list_del_rcu(&p->list); + nbp_vlan_flush(p); br_fdb_delete_by_port(br, p, 1); - - list_del_rcu(&p->list); + nbp_update_port_count(br); dev->priv_flags &= ~IFF_BRIDGE_PORT; @@ -353,7 +459,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) call_netdevice_notifiers(NETDEV_JOIN, dev); - err = dev_set_promiscuity(dev, 1); + err = dev_set_allmulti(dev, 1); if (err) goto put_back; @@ -384,6 +490,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) list_add_rcu(&p->list, &br->port_list); + nbp_update_port_count(br); + netdev_update_features(br->dev); if (br->dev->needed_headroom < dev->needed_headroom) @@ -421,7 +529,7 @@ err2: kobject_put(&p->kobj); p = NULL; /* kobject_put frees */ err1: - dev_set_promiscuity(dev, -1); + dev_set_allmulti(dev, -1); put_back: dev_put(dev); kfree(p); @@ -455,3 +563,11 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) return 0; } + +void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) +{ + struct net_bridge *br = p->br; + + if (mask & BR_AUTO_MASK) + nbp_update_port_count(br); +} diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 04d6348fd530..366c43649079 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -177,6 +177,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) p = br_port_get_rcu(skb->dev); if (unlikely(is_link_local_ether_addr(dest))) { + u16 fwd_mask = p->br->group_fwd_mask_required; + /* * See IEEE 802.1D Table 7-10 Reserved addresses * @@ -194,7 +196,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) case 0x00: /* Bridge Group Address */ /* If STP is turned off, then must forward to keep loop detection */ - if (p->br->stp_enabled == BR_NO_STP) + if (p->br->stp_enabled == BR_NO_STP || + fwd_mask & (1u << dest[5])) goto forward; break; @@ -203,7 +206,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) default: /* Allow selective forwarding for most other protocols */ - if (p->br->group_fwd_mask & (1u << dest[5])) + fwd_mask |= p->br->group_fwd_mask; + if (fwd_mask & (1u << dest[5])) goto forward; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b7b1914dfa25..5df05269d17a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -418,13 +418,13 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { - if (timer_pending(&br->ip4_querier.timer)) + if (timer_pending(&br->ip4_other_query.timer)) return -EBUSY; ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) } else { - if (timer_pending(&br->ip6_querier.timer)) + if (timer_pending(&br->ip6_other_query.timer)) return -EBUSY; ip.u.ip6 = entry->addr.u.ip6; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7b757b5dc773..abfa0b65a111 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -11,6 +11,7 @@ */ #include <linux/err.h> +#include <linux/export.h> #include <linux/if_ether.h> #include <linux/igmp.h> #include <linux/jhash.h> @@ -35,7 +36,7 @@ #include "br_private.h" static void br_multicast_start_querier(struct net_bridge *br, - struct bridge_mcast_query *query); + struct bridge_mcast_own_query *query); unsigned int br_mdb_rehash_seq; static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) @@ -761,7 +762,7 @@ static void br_multicast_local_router_expired(unsigned long data) } static void br_multicast_querier_expired(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query) { spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || br->multicast_disabled) @@ -777,7 +778,7 @@ static void br_ip4_multicast_querier_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_querier_expired(br, &br->ip4_query); + br_multicast_querier_expired(br, &br->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) @@ -785,10 +786,22 @@ static void br_ip6_multicast_querier_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_querier_expired(br, &br->ip6_query); + br_multicast_querier_expired(br, &br->ip6_own_query); } #endif +static void br_multicast_select_own_querier(struct net_bridge *br, + struct br_ip *ip, + struct sk_buff *skb) +{ + if (ip->proto == htons(ETH_P_IP)) + br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr; +#if IS_ENABLED(CONFIG_IPV6) + else + br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr; +#endif +} + static void __br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *ip) @@ -804,17 +817,19 @@ static void __br_multicast_send_query(struct net_bridge *br, skb->dev = port->dev; NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); - } else + } else { + br_multicast_select_own_querier(br, ip, skb); netif_rx(skb); + } } static void br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *own_query) { unsigned long time; struct br_ip br_group; - struct bridge_mcast_querier *querier = NULL; + struct bridge_mcast_other_query *other_query = NULL; if (!netif_running(br->dev) || br->multicast_disabled || !br->multicast_querier) @@ -822,31 +837,32 @@ static void br_multicast_send_query(struct net_bridge *br, memset(&br_group.u, 0, sizeof(br_group.u)); - if (port ? (query == &port->ip4_query) : - (query == &br->ip4_query)) { - querier = &br->ip4_querier; + if (port ? (own_query == &port->ip4_own_query) : + (own_query == &br->ip4_own_query)) { + other_query = &br->ip4_other_query; br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) } else { - querier = &br->ip6_querier; + other_query = &br->ip6_other_query; br_group.proto = htons(ETH_P_IPV6); #endif } - if (!querier || timer_pending(&querier->timer)) + if (!other_query || timer_pending(&other_query->timer)) return; __br_multicast_send_query(br, port, &br_group); time = jiffies; - time += query->startup_sent < br->multicast_startup_query_count ? + time += own_query->startup_sent < br->multicast_startup_query_count ? br->multicast_startup_query_interval : br->multicast_query_interval; - mod_timer(&query->timer, time); + mod_timer(&own_query->timer, time); } -static void br_multicast_port_query_expired(struct net_bridge_port *port, - struct bridge_mcast_query *query) +static void +br_multicast_port_query_expired(struct net_bridge_port *port, + struct bridge_mcast_own_query *query) { struct net_bridge *br = port->br; @@ -868,7 +884,7 @@ static void br_ip4_multicast_port_query_expired(unsigned long data) { struct net_bridge_port *port = (void *)data; - br_multicast_port_query_expired(port, &port->ip4_query); + br_multicast_port_query_expired(port, &port->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) @@ -876,7 +892,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data) { struct net_bridge_port *port = (void *)data; - br_multicast_port_query_expired(port, &port->ip6_query); + br_multicast_port_query_expired(port, &port->ip6_own_query); } #endif @@ -886,11 +902,11 @@ void br_multicast_add_port(struct net_bridge_port *port) setup_timer(&port->multicast_router_timer, br_multicast_router_expired, (unsigned long)port); - setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired, - (unsigned long)port); + setup_timer(&port->ip4_own_query.timer, + br_ip4_multicast_port_query_expired, (unsigned long)port); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired, - (unsigned long)port); + setup_timer(&port->ip6_own_query.timer, + br_ip6_multicast_port_query_expired, (unsigned long)port); #endif } @@ -899,7 +915,7 @@ void br_multicast_del_port(struct net_bridge_port *port) del_timer_sync(&port->multicast_router_timer); } -static void br_multicast_enable(struct bridge_mcast_query *query) +static void br_multicast_enable(struct bridge_mcast_own_query *query) { query->startup_sent = 0; @@ -916,9 +932,9 @@ void br_multicast_enable_port(struct net_bridge_port *port) if (br->multicast_disabled || !netif_running(br->dev)) goto out; - br_multicast_enable(&port->ip4_query); + br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - br_multicast_enable(&port->ip6_query); + br_multicast_enable(&port->ip6_own_query); #endif out: @@ -938,9 +954,9 @@ void br_multicast_disable_port(struct net_bridge_port *port) if (!hlist_unhashed(&port->rlist)) hlist_del_init_rcu(&port->rlist); del_timer(&port->multicast_router_timer); - del_timer(&port->ip4_query.timer); + del_timer(&port->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&port->ip6_query.timer); + del_timer(&port->ip6_own_query.timer); #endif spin_unlock(&br->multicast_lock); } @@ -1064,15 +1080,80 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif +static bool br_ip4_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + __be32 saddr) +{ + if (!timer_pending(&br->ip4_own_query.timer) && + !timer_pending(&br->ip4_other_query.timer)) + goto update; + + if (!br->ip4_querier.addr.u.ip4) + goto update; + + if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4)) + goto update; + + return false; + +update: + br->ip4_querier.addr.u.ip4 = saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip4_querier.port, port); + + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static bool br_ip6_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct in6_addr *saddr) +{ + if (!timer_pending(&br->ip6_own_query.timer) && + !timer_pending(&br->ip6_other_query.timer)) + goto update; + + if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0) + goto update; + + return false; + +update: + br->ip6_querier.addr.u.ip6 = *saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip6_querier.port, port); + + return true; +} +#endif + +static bool br_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *saddr) +{ + switch (saddr->proto) { + case htons(ETH_P_IP): + return br_ip4_multicast_select_querier(br, port, saddr->u.ip4); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6); +#endif + } + + return false; +} + static void -br_multicast_update_querier_timer(struct net_bridge *br, - struct bridge_mcast_querier *querier, - unsigned long max_delay) +br_multicast_update_query_timer(struct net_bridge *br, + struct bridge_mcast_other_query *query, + unsigned long max_delay) { - if (!timer_pending(&querier->timer)) - querier->delay_time = jiffies + max_delay; + if (!timer_pending(&query->timer)) + query->delay_time = jiffies + max_delay; - mod_timer(&querier->timer, jiffies + br->multicast_querier_interval); + mod_timer(&query->timer, jiffies + br->multicast_querier_interval); } /* @@ -1125,16 +1206,14 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, - struct bridge_mcast_querier *querier, - int saddr, - bool is_general_query, + struct bridge_mcast_other_query *query, + struct br_ip *saddr, unsigned long max_delay) { - if (saddr && is_general_query) - br_multicast_update_querier_timer(br, querier, max_delay); - else if (timer_pending(&querier->timer)) + if (!br_multicast_select_querier(br, port, saddr)) return; + br_multicast_update_query_timer(br, query, max_delay); br_multicast_mark_router(br, port); } @@ -1149,6 +1228,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct igmpv3_query *ih3; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; __be32 group; @@ -1190,11 +1270,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, goto out; } - br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, - !group, max_delay); + if (!group) { + saddr.proto = htons(ETH_P_IP); + saddr.u.ip4 = iph->saddr; - if (!group) + br_multicast_query_received(br, port, &br->ip4_other_query, + &saddr, max_delay); goto out; + } mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) @@ -1234,6 +1317,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct mld2_query *mld2q; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; const struct in6_addr *group = NULL; @@ -1282,12 +1366,16 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; } - br_multicast_query_received(br, port, &br->ip6_querier, - !ipv6_addr_any(&ip6h->saddr), - is_general_query, max_delay); + if (is_general_query) { + saddr.proto = htons(ETH_P_IPV6); + saddr.u.ip6 = ip6h->saddr; - if (!group) + br_multicast_query_received(br, port, &br->ip6_other_query, + &saddr, max_delay); + goto out; + } else if (!group) { goto out; + } mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) @@ -1315,11 +1403,12 @@ out: } #endif -static void br_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, - struct br_ip *group, - struct bridge_mcast_querier *querier, - struct bridge_mcast_query *query) +static void +br_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group, + struct bridge_mcast_other_query *other_query, + struct bridge_mcast_own_query *own_query) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -1330,7 +1419,7 @@ static void br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED) || - timer_pending(&querier->timer)) + timer_pending(&other_query->timer)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1344,7 +1433,7 @@ static void br_multicast_leave_group(struct net_bridge *br, time = jiffies + br->multicast_last_member_count * br->multicast_last_member_interval; - mod_timer(&query->timer, time); + mod_timer(&own_query->timer, time); for (p = mlock_dereference(mp->ports, br); p != NULL; @@ -1425,17 +1514,19 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; - struct bridge_mcast_query *query = port ? &port->ip4_query : - &br->ip4_query; + struct bridge_mcast_own_query *own_query; if (ipv4_is_local_multicast(group)) return; + own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query); + br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, + own_query); } #if IS_ENABLED(CONFIG_IPV6) @@ -1445,18 +1536,19 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; - struct bridge_mcast_query *query = port ? &port->ip6_query : - &br->ip6_query; - + struct bridge_mcast_own_query *own_query; if (ipv6_addr_is_ll_all_nodes(group)) return; + own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query); + br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, + own_query); } #endif @@ -1723,12 +1815,14 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, } static void br_multicast_query_expired(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query, + struct bridge_mcast_querier *querier) { spin_lock(&br->multicast_lock); if (query->startup_sent < br->multicast_startup_query_count) query->startup_sent++; + rcu_assign_pointer(querier, NULL); br_multicast_send_query(br, NULL, query); spin_unlock(&br->multicast_lock); } @@ -1737,7 +1831,7 @@ static void br_ip4_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_query_expired(br, &br->ip4_query); + br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) @@ -1745,7 +1839,7 @@ static void br_ip6_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_query_expired(br, &br->ip6_query); + br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); } #endif @@ -1767,28 +1861,30 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; - br->ip4_querier.delay_time = 0; + br->ip4_other_query.delay_time = 0; + br->ip4_querier.port = NULL; #if IS_ENABLED(CONFIG_IPV6) - br->ip6_querier.delay_time = 0; + br->ip6_other_query.delay_time = 0; + br->ip6_querier.port = NULL; #endif spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); - setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired, - (unsigned long)br); - setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired, + setup_timer(&br->ip4_other_query.timer, + br_ip4_multicast_querier_expired, (unsigned long)br); + setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, (unsigned long)br); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired, - (unsigned long)br); - setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired, + setup_timer(&br->ip6_other_query.timer, + br_ip6_multicast_querier_expired, (unsigned long)br); + setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired, (unsigned long)br); #endif } static void __br_multicast_open(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query) { query->startup_sent = 0; @@ -1800,9 +1896,9 @@ static void __br_multicast_open(struct net_bridge *br, void br_multicast_open(struct net_bridge *br) { - __br_multicast_open(br, &br->ip4_query); + __br_multicast_open(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - __br_multicast_open(br, &br->ip6_query); + __br_multicast_open(br, &br->ip6_own_query); #endif } @@ -1815,11 +1911,11 @@ void br_multicast_stop(struct net_bridge *br) int i; del_timer_sync(&br->multicast_router_timer); - del_timer_sync(&br->ip4_querier.timer); - del_timer_sync(&br->ip4_query.timer); + del_timer_sync(&br->ip4_other_query.timer); + del_timer_sync(&br->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&br->ip6_querier.timer); - del_timer_sync(&br->ip6_query.timer); + del_timer_sync(&br->ip6_other_query.timer); + del_timer_sync(&br->ip6_own_query.timer); #endif spin_lock_bh(&br->multicast_lock); @@ -1923,7 +2019,7 @@ unlock: } static void br_multicast_start_querier(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query) { struct net_bridge_port *port; @@ -1934,11 +2030,11 @@ static void br_multicast_start_querier(struct net_bridge *br, port->state == BR_STATE_BLOCKING) continue; - if (query == &br->ip4_query) - br_multicast_enable(&port->ip4_query); + if (query == &br->ip4_own_query) + br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) else - br_multicast_enable(&port->ip6_query); + br_multicast_enable(&port->ip6_own_query); #endif } } @@ -1974,9 +2070,9 @@ rollback: goto rollback; } - br_multicast_start_querier(br, &br->ip4_query); + br_multicast_start_querier(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - br_multicast_start_querier(br, &br->ip6_query); + br_multicast_start_querier(br, &br->ip6_own_query); #endif unlock: @@ -2001,16 +2097,16 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) max_delay = br->multicast_query_response_interval; - if (!timer_pending(&br->ip4_querier.timer)) - br->ip4_querier.delay_time = jiffies + max_delay; + if (!timer_pending(&br->ip4_other_query.timer)) + br->ip4_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip4_query); + br_multicast_start_querier(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - if (!timer_pending(&br->ip6_querier.timer)) - br->ip6_querier.delay_time = jiffies + max_delay; + if (!timer_pending(&br->ip6_other_query.timer)) + br->ip6_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip6_query); + br_multicast_start_querier(br, &br->ip6_own_query); #endif unlock: @@ -2061,3 +2157,109 @@ unlock: return err; } + +/** + * br_multicast_list_adjacent - Returns snooped multicast addresses + * @dev: The bridge port adjacent to which to retrieve addresses + * @br_ip_list: The list to store found, snooped multicast IP addresses in + * + * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast + * snooping feature on all bridge ports of dev's bridge device, excluding + * the addresses from dev itself. + * + * Returns the number of items added to br_ip_list. + * + * Notes: + * - br_ip_list needs to be initialized by caller + * - br_ip_list might contain duplicates in the end + * (needs to be taken care of by caller) + * - br_ip_list needs to be freed by caller + */ +int br_multicast_list_adjacent(struct net_device *dev, + struct list_head *br_ip_list) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct net_bridge_port_group *group; + struct br_ip_list *entry; + int count = 0; + + rcu_read_lock(); + if (!br_ip_list || !br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + list_for_each_entry_rcu(port, &br->port_list, list) { + if (!port->dev || port->dev == dev) + continue; + + hlist_for_each_entry_rcu(group, &port->mglist, mglist) { + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + goto unlock; + + entry->addr = group->addr; + list_add(&entry->list, br_ip_list); + count++; + } + } + +unlock: + rcu_read_unlock(); + return count; +} +EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); + +/** + * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port + * @dev: The bridge port adjacent to which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a selected querier is behind one of the other ports of this + * bridge. Otherwise returns false. + */ +bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + bool ret = false; + + rcu_read_lock(); + if (!br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + switch (proto) { + case ETH_P_IP: + if (!timer_pending(&br->ip4_other_query.timer) || + rcu_dereference(br->ip4_querier.port) == port) + goto unlock; + break; +#if IS_ENABLED(CONFIG_IPV6) + case ETH_P_IPV6: + if (!timer_pending(&br->ip6_other_query.timer) || + rcu_dereference(br->ip6_querier.port) == port) + goto unlock; + break; +#endif + default: + goto unlock; + } + + ret = true; +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2acf7fa1fec6..a615264cf01a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; - vlan = __vlan_find_dev_deep(br, skb->vlan_proto, + vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e8844d975b32..26edb518b839 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -328,6 +328,7 @@ static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) { int err; + unsigned long old_flags = p->flags; br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); @@ -353,6 +354,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) if (err) return err; } + + br_port_flags_change(p, old_flags ^ p->flags); return 0; } diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c deleted file mode 100644 index 2998dd1769a0..000000000000 --- a/net/bridge/br_notify.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Device event handling - * Linux ethernet bridge - * - * Authors: - * Lennert Buytenhek <buytenh@gnu.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/rtnetlink.h> -#include <net/net_namespace.h> - -#include "br_private.h" - -static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr); - -struct notifier_block br_device_notifier = { - .notifier_call = br_device_event -}; - -/* - * Handle changes in state of network devices enslaved to a bridge. - * - * Note: don't care about up/down if bridge itself is down, because - * port state is checked when bridge is brought up. - */ -static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net_bridge_port *p; - struct net_bridge *br; - bool changed_addr; - int err; - - /* register of bridge completed, add sysfs entries */ - if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { - br_sysfs_addbr(dev); - return NOTIFY_DONE; - } - - /* not a port of a bridge */ - p = br_port_get_rtnl(dev); - if (!p) - return NOTIFY_DONE; - - br = p->br; - - switch (event) { - case NETDEV_CHANGEMTU: - dev_set_mtu(br->dev, br_min_mtu(br)); - break; - - case NETDEV_CHANGEADDR: - spin_lock_bh(&br->lock); - br_fdb_changeaddr(p, dev->dev_addr); - changed_addr = br_stp_recalculate_bridge_id(br); - spin_unlock_bh(&br->lock); - - if (changed_addr) - call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); - - break; - - case NETDEV_CHANGE: - br_port_carrier_check(p); - break; - - case NETDEV_FEAT_CHANGE: - netdev_update_features(br->dev); - break; - - case NETDEV_DOWN: - spin_lock_bh(&br->lock); - if (br->dev->flags & IFF_UP) - br_stp_disable_port(p); - spin_unlock_bh(&br->lock); - break; - - case NETDEV_UP: - if (netif_running(br->dev) && netif_oper_up(dev)) { - spin_lock_bh(&br->lock); - br_stp_enable_port(p); - spin_unlock_bh(&br->lock); - } - break; - - case NETDEV_UNREGISTER: - br_del_if(br, dev); - break; - - case NETDEV_CHANGENAME: - err = br_sysfs_renameif(p); - if (err) - return notifier_from_errno(err); - break; - - case NETDEV_PRE_TYPE_CHANGE: - /* Forbid underlaying device to change its type. */ - return NOTIFY_BAD; - - case NETDEV_RESEND_IGMP: - /* Propagate to master device */ - call_netdevice_notifiers(event, br->dev); - break; - } - - /* Events that may cause spanning tree to refresh */ - if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || - event == NETDEV_CHANGE || event == NETDEV_DOWN) - br_ifinfo_notify(RTM_NEWLINK, p); - - return NOTIFY_DONE; -} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 59d3a85c5873..23caf5b0309e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -35,6 +35,8 @@ #define BR_GROUPFWD_DEFAULT 0 /* Don't allow forwarding control protocols like STP and LLDP */ #define BR_GROUPFWD_RESTRICTED 0x4007u +/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ +#define BR_GROUPFWD_8021AD 0xB801u /* Path to usermode spanning tree program */ #define BR_STP_PROG "/sbin/bridge-stp" @@ -54,30 +56,24 @@ struct mac_addr unsigned char addr[ETH_ALEN]; }; -struct br_ip -{ - union { - __be32 ip4; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr ip6; -#endif - } u; - __be16 proto; - __u16 vid; -}; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING /* our own querier */ -struct bridge_mcast_query { +struct bridge_mcast_own_query { struct timer_list timer; u32 startup_sent; }; /* other querier */ -struct bridge_mcast_querier { +struct bridge_mcast_other_query { struct timer_list timer; unsigned long delay_time; }; + +/* selected querier */ +struct bridge_mcast_querier { + struct br_ip addr; + struct net_bridge_port __rcu *port; +}; #endif struct net_port_vlans { @@ -174,11 +170,13 @@ struct net_bridge_port #define BR_ADMIN_COST 0x00000010 #define BR_LEARNING 0x00000020 #define BR_FLOOD 0x00000040 +#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING) +#define BR_PROMISC 0x00000080 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - struct bridge_mcast_query ip4_query; + struct bridge_mcast_own_query ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) - struct bridge_mcast_query ip6_query; + struct bridge_mcast_own_query ip6_own_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ unsigned char multicast_router; struct timer_list multicast_router_timer; @@ -198,6 +196,9 @@ struct net_bridge_port #endif }; +#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) +#define br_promisc_port(p) ((p)->flags & BR_PROMISC) + #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) @@ -227,6 +228,7 @@ struct net_bridge bool nf_call_arptables; #endif u16 group_fwd_mask; + u16 group_fwd_mask_required; /* STP */ bridge_id designated_root; @@ -241,6 +243,7 @@ struct net_bridge unsigned long bridge_forward_delay; u8 group_addr[ETH_ALEN]; + bool group_addr_set; u16 root_port; enum { @@ -277,11 +280,13 @@ struct net_bridge struct hlist_head router_list; struct timer_list multicast_router_timer; + struct bridge_mcast_other_query ip4_other_query; + struct bridge_mcast_own_query ip4_own_query; struct bridge_mcast_querier ip4_querier; - struct bridge_mcast_query ip4_query; #if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_other_query ip6_other_query; + struct bridge_mcast_own_query ip6_own_query; struct bridge_mcast_querier ip6_querier; - struct bridge_mcast_query ip6_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif @@ -290,8 +295,10 @@ struct net_bridge struct timer_list topology_change_timer; struct timer_list gc_timer; struct kobject *ifobj; + u32 auto_cnt; #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_enabled; + __be16 vlan_proto; struct net_port_vlans __rcu *vlan_info; #endif }; @@ -327,8 +334,6 @@ struct br_input_skb_cb { #define br_debug(br, format, args...) \ pr_debug("%s: " format, (br)->dev->name, ##args) -extern struct notifier_block br_device_notifier; - /* called under bridge lock */ static inline int br_is_root_bridge(const struct net_bridge *br) { @@ -395,6 +400,8 @@ int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, int idx); +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); @@ -415,6 +422,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev); int br_min_mtu(const struct net_bridge *br); netdev_features_t br_features_recompute(struct net_bridge *br, netdev_features_t features); +void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); +void br_manage_promisc(struct net_bridge *br); /* br_input.c */ int br_handle_frame_finish(struct sk_buff *skb); @@ -485,7 +494,7 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_querier *querier) + struct bridge_mcast_other_query *querier) { return time_is_before_jiffies(querier->delay_time) && (br->multicast_querier || timer_pending(&querier->timer)); @@ -496,10 +505,10 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_querier); + return __br_multicast_querier_exists(br, &br->ip4_other_query); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_querier); + return __br_multicast_querier_exists(br, &br->ip6_other_query); #endif default: return false; @@ -589,7 +598,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); bool br_vlan_find(struct net_bridge *br, u16 vid); +void br_recalculate_fwd_mask(struct net_bridge *br); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int br_vlan_set_proto(struct net_bridge *br, unsigned long val); +void br_vlan_init(struct net_bridge *br); int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); @@ -633,6 +645,10 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) return v->pvid ?: VLAN_N_VID; } +static inline int br_vlan_enabled(struct net_bridge *br) +{ + return br->vlan_enabled; +} #else static inline bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, @@ -681,6 +697,14 @@ static inline bool br_vlan_find(struct net_bridge *br, u16 vid) return false; } +static inline void br_recalculate_fwd_mask(struct net_bridge *br) +{ +} + +static inline void br_vlan_init(struct net_bridge *br) +{ +} + static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { return -EOPNOTSUPP; @@ -719,6 +743,11 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) { return VLAN_N_VID; /* Returns invalid vid */ } + +static inline int br_vlan_enabled(struct net_bridge *br) +{ + return 0; +} #endif /* br_netfilter.c */ diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8dac65552f19..c9e2572b15f4 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -312,10 +312,19 @@ static ssize_t group_addr_store(struct device *d, new_addr[5] == 3) /* 802.1X PAE address */ return -EINVAL; + if (!rtnl_trylock()) + return restart_syscall(); + spin_lock_bh(&br->lock); for (i = 0; i < 6; i++) br->group_addr[i] = new_addr[i]; spin_unlock_bh(&br->lock); + + br->group_addr_set = true; + br_recalculate_fwd_mask(br); + + rtnl_unlock(); + return len; } @@ -700,6 +709,22 @@ static ssize_t vlan_filtering_store(struct device *d, return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } static DEVICE_ATTR_RW(vlan_filtering); + +static ssize_t vlan_protocol_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto)); +} + +static ssize_t vlan_protocol_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_proto); +} +static DEVICE_ATTR_RW(vlan_protocol); #endif static struct attribute *bridge_attrs[] = { @@ -745,6 +770,7 @@ static struct attribute *bridge_attrs[] = { #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING &dev_attr_vlan_filtering.attr, + &dev_attr_vlan_protocol.attr, #endif NULL }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index dd595bd7fa82..e561cd59b8a6 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -41,20 +41,30 @@ static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \ } \ static int store_##_name(struct net_bridge_port *p, unsigned long v) \ { \ - unsigned long flags = p->flags; \ - if (v) \ - flags |= _mask; \ - else \ - flags &= ~_mask; \ - if (flags != p->flags) { \ - p->flags = flags; \ - br_ifinfo_notify(RTM_NEWLINK, p); \ - } \ - return 0; \ + return store_flag(p, v, _mask); \ } \ static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \ show_##_name, store_##_name) +static int store_flag(struct net_bridge_port *p, unsigned long v, + unsigned long mask) +{ + unsigned long flags; + + flags = p->flags; + + if (v) + flags |= mask; + else + flags &= ~mask; + + if (flags != p->flags) { + p->flags = flags; + br_port_flags_change(p, mask); + br_ifinfo_notify(RTM_NEWLINK, p); + } + return 0; +} static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 5fee2feaf292..2b2774fe0703 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -60,7 +60,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) * that ever changes this code will allow tagged * traffic to enter the bridge. */ - err = vlan_vid_add(dev, htons(ETH_P_8021Q), vid); + err = vlan_vid_add(dev, br->vlan_proto, vid); if (err) return err; } @@ -80,7 +80,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) out_filt: if (p) - vlan_vid_del(dev, htons(ETH_P_8021Q), vid); + vlan_vid_del(dev, br->vlan_proto, vid); return err; } @@ -92,8 +92,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) __vlan_delete_pvid(v, vid); clear_bit(vid, v->untagged_bitmap); - if (v->port_idx) - vlan_vid_del(v->parent.port->dev, htons(ETH_P_8021Q), vid); + if (v->port_idx) { + struct net_bridge_port *p = v->parent.port; + vlan_vid_del(p->dev, p->br->vlan_proto, vid); + } clear_bit(vid, v->vlan_bitmap); v->num_vlans--; @@ -158,7 +160,8 @@ out: bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, struct sk_buff *skb, u16 *vid) { - int err; + bool tagged; + __be16 proto; /* If VLAN filtering is disabled on the bridge, all packets are * permitted. @@ -172,19 +175,41 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) goto drop; + proto = br->vlan_proto; + /* If vlan tx offload is disabled on bridge device and frame was * sent from vlan device on the bridge device, it does not have * HW accelerated vlan tag. */ if (unlikely(!vlan_tx_tag_present(skb) && - (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)))) { + skb->protocol == proto)) { skb = vlan_untag(skb); if (unlikely(!skb)) return false; } - err = br_vlan_get_tag(skb, vid); + if (!br_vlan_get_tag(skb, vid)) { + /* Tagged frame */ + if (skb->vlan_proto != proto) { + /* Protocol-mismatch, empty out vlan_tci for new tag */ + skb_push(skb, ETH_HLEN); + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + return false; + + skb_pull(skb, ETH_HLEN); + skb_reset_mac_len(skb); + *vid = 0; + tagged = false; + } else { + tagged = true; + } + } else { + /* Untagged frame */ + tagged = false; + } + if (!*vid) { u16 pvid = br_get_pvid(v); @@ -199,9 +224,9 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * ingress frame is considered to belong to this vlan. */ *vid = pvid; - if (likely(err)) + if (likely(!tagged)) /* Untagged Frame. */ - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + __vlan_hwaccel_put_tag(skb, proto, pvid); else /* Priority-tagged Frame. * At this point, We know that skb->vlan_tci had @@ -254,7 +279,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) if (!v) return false; - br_vlan_get_tag(skb, vid); + if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto) + *vid = 0; + if (!*vid) { *vid = br_get_pvid(v); if (*vid == VLAN_N_VID) @@ -351,6 +378,33 @@ out: return found; } +/* Must be protected by RTNL. */ +static void recalculate_group_addr(struct net_bridge *br) +{ + if (br->group_addr_set) + return; + + spin_lock_bh(&br->lock); + if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) { + /* Bridge Group Address */ + br->group_addr[5] = 0x00; + } else { /* vlan_enabled && ETH_P_8021AD */ + /* Provider Bridge Group Address */ + br->group_addr[5] = 0x08; + } + spin_unlock_bh(&br->lock); +} + +/* Must be protected by RTNL. */ +void br_recalculate_fwd_mask(struct net_bridge *br) +{ + if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; + else /* vlan_enabled && ETH_P_8021AD */ + br->group_fwd_mask_required = BR_GROUPFWD_8021AD & + ~(1u << br->group_addr[5]); +} + int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { if (!rtnl_trylock()) @@ -360,12 +414,88 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) goto unlock; br->vlan_enabled = val; + br_manage_promisc(br); + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); unlock: rtnl_unlock(); return 0; } +int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +{ + int err = 0; + struct net_bridge_port *p; + struct net_port_vlans *pv; + __be16 proto, oldproto; + u16 vid, errvid; + + if (val != ETH_P_8021Q && val != ETH_P_8021AD) + return -EPROTONOSUPPORT; + + if (!rtnl_trylock()) + return restart_syscall(); + + proto = htons(val); + if (br->vlan_proto == proto) + goto unlock; + + /* Add VLANs for the new proto to the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + err = vlan_vid_add(p->dev, proto, vid); + if (err) + goto err_filt; + } + } + + oldproto = br->vlan_proto; + br->vlan_proto = proto; + + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); + + /* Delete VLANs for the old proto from the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + vlan_vid_del(p->dev, oldproto, vid); + } + +unlock: + rtnl_unlock(); + return err; + +err_filt: + errvid = vid; + for_each_set_bit(vid, pv->vlan_bitmap, errvid) + vlan_vid_del(p->dev, proto, vid); + + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + vlan_vid_del(p->dev, proto, vid); + } + + goto unlock; +} + +void br_vlan_init(struct net_bridge *br) +{ + br->vlan_proto = htons(ETH_P_8021Q); +} + /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ @@ -432,7 +562,7 @@ void nbp_vlan_flush(struct net_bridge_port *port) return; for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) - vlan_vid_del(port->dev, htons(ETH_P_8021Q), vid); + vlan_vid_del(port->dev, port->br->vlan_proto, vid); __vlan_flush(pv); } diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 5ca74a0e595f..629dc77874a9 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -2,14 +2,23 @@ # Bridge netfilter configuration # # -config NF_TABLES_BRIDGE - depends on NF_TABLES +menuconfig NF_TABLES_BRIDGE + depends on BRIDGE && NETFILTER && NF_TABLES tristate "Ethernet Bridge nf_tables support" +if NF_TABLES_BRIDGE + +config NFT_BRIDGE_META + tristate "Netfilter nf_table bridge meta support" + depends on NFT_META + help + Add support for bridge dedicated meta key. + +endif # NF_TABLES_BRIDGE + menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && NETFILTER - select NETFILTER_XTABLES + depends on BRIDGE && NETFILTER && NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index ea7629f58b3d..6f2f3943d66f 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o +obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c new file mode 100644 index 000000000000..4f02109d708f --- /dev/null +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2014 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> + +#include "../br_private.h" + +static void nft_meta_bridge_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + const struct net_bridge_port *p; + + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + if (in == NULL || (p = br_port_get_rcu(in)) == NULL) + goto err; + break; + case NFT_META_BRI_OIFNAME: + if (out == NULL || (p = br_port_get_rcu(out)) == NULL) + goto err; + break; + default: + goto out; + } + + strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data)); + return; +out: + return nft_meta_get_eval(expr, data, pkt); +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + case NFT_META_BRI_OIFNAME: + break; + default: + return nft_meta_get_init(ctx, expr, tb); + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + return 0; +} + +static struct nft_expr_type nft_meta_bridge_type; +static const struct nft_expr_ops nft_meta_bridge_get_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_bridge_get_eval, + .init = nft_meta_bridge_get_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_bridge_set_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_set_init, + .dump = nft_meta_set_dump, +}; + +static const struct nft_expr_ops * +nft_meta_bridge_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_bridge_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_bridge_set_ops; + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_meta_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "meta", + .select_ops = &nft_meta_bridge_select_ops, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_meta_bridge_module_init(void) +{ + return nft_register_expr(&nft_meta_bridge_type); +} + +static void __exit nft_meta_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_meta_bridge_type); +} + +module_init(nft_meta_bridge_module_init); +module_exit(nft_meta_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); diff --git a/net/can/af_can.c b/net/can/af_can.c index a27f8aad9e99..ce82337521f6 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -338,6 +338,29 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) } /** + * effhash - hash function for 29 bit CAN identifier reduction + * @can_id: 29 bit CAN identifier + * + * Description: + * To reduce the linear traversal in one linked list of _single_ EFF CAN + * frame subscriptions the 29 bit identifier is mapped to 10 bits. + * (see CAN_EFF_RCV_HASH_BITS definition) + * + * Return: + * Hash value from 0x000 - 0x3FF ( enforced by CAN_EFF_RCV_HASH_BITS mask ) + */ +static unsigned int effhash(canid_t can_id) +{ + unsigned int hash; + + hash = can_id; + hash ^= can_id >> CAN_EFF_RCV_HASH_BITS; + hash ^= can_id >> (2 * CAN_EFF_RCV_HASH_BITS); + + return hash & ((1 << CAN_EFF_RCV_HASH_BITS) - 1); +} + +/** * find_rcv_list - determine optimal filterlist inside device filter struct * @can_id: pointer to CAN identifier of a given can_filter * @mask: pointer to CAN mask of a given can_filter @@ -400,10 +423,8 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, !(*can_id & CAN_RTR_FLAG)) { if (*can_id & CAN_EFF_FLAG) { - if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) { - /* RFC: a future use-case for hash-tables? */ - return &d->rx[RX_EFF]; - } + if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) + return &d->rx_eff[effhash(*can_id)]; } else { if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) return &d->rx_sff[*can_id]; @@ -632,7 +653,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) { + hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) { if (r->can_id == can_id) { deliver(skb, r); matches++; diff --git a/net/can/af_can.h b/net/can/af_can.h index 6de58b40535c..fca0fe9fc45a 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -59,12 +59,17 @@ struct receiver { char *ident; }; -enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX }; +#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) +#define CAN_EFF_RCV_HASH_BITS 10 +#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS) + +enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; /* per device receive filters linked at dev->ml_priv */ struct dev_rcv_lists { struct hlist_head rx[RX_MAX]; - struct hlist_head rx_sff[0x800]; + struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; + struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; int remove_on_zero_entries; int entries; }; diff --git a/net/can/proc.c b/net/can/proc.c index b543470c8f8b..1a19b985a868 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -80,7 +80,6 @@ static const char rx_list_name[][8] = { [RX_ALL] = "rx_all", [RX_FIL] = "rx_fil", [RX_INV] = "rx_inv", - [RX_EFF] = "rx_eff", }; /* @@ -389,25 +388,26 @@ static const struct file_operations can_rcvlist_proc_fops = { .release = single_release, }; -static inline void can_rcvlist_sff_proc_show_one(struct seq_file *m, - struct net_device *dev, - struct dev_rcv_lists *d) +static inline void can_rcvlist_proc_show_array(struct seq_file *m, + struct net_device *dev, + struct hlist_head *rcv_array, + unsigned int rcv_array_sz) { - int i; + unsigned int i; int all_empty = 1; /* check whether at least one list is non-empty */ - for (i = 0; i < 0x800; i++) - if (!hlist_empty(&d->rx_sff[i])) { + for (i = 0; i < rcv_array_sz; i++) + if (!hlist_empty(&rcv_array[i])) { all_empty = 0; break; } if (!all_empty) { can_print_recv_banner(m); - for (i = 0; i < 0x800; i++) { - if (!hlist_empty(&d->rx_sff[i])) - can_print_rcvlist(m, &d->rx_sff[i], dev); + for (i = 0; i < rcv_array_sz; i++) { + if (!hlist_empty(&rcv_array[i])) + can_print_rcvlist(m, &rcv_array[i], dev); } } else seq_printf(m, " (%s: no entry)\n", DNAME(dev)); @@ -425,12 +425,15 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) /* sff receive list for 'all' CAN devices (dev == NULL) */ d = &can_rx_alldev_list; - can_rcvlist_sff_proc_show_one(m, NULL, d); + can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); /* sff receive list for registered CAN devices */ for_each_netdev_rcu(&init_net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) - can_rcvlist_sff_proc_show_one(m, dev, dev->ml_priv); + if (dev->type == ARPHRD_CAN && dev->ml_priv) { + d = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, d->rx_sff, + ARRAY_SIZE(d->rx_sff)); + } } rcu_read_unlock(); @@ -452,6 +455,49 @@ static const struct file_operations can_rcvlist_sff_proc_fops = { .release = single_release, }; + +static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) +{ + struct net_device *dev; + struct dev_rcv_lists *d; + + /* RX_EFF */ + seq_puts(m, "\nreceive list 'rx_eff':\n"); + + rcu_read_lock(); + + /* eff receive list for 'all' CAN devices (dev == NULL) */ + d = &can_rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); + + /* eff receive list for registered CAN devices */ + for_each_netdev_rcu(&init_net, dev) { + if (dev->type == ARPHRD_CAN && dev->ml_priv) { + d = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, d->rx_eff, + ARRAY_SIZE(d->rx_eff)); + } + } + + rcu_read_unlock(); + + seq_putc(m, '\n'); + return 0; +} + +static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, can_rcvlist_eff_proc_show, NULL); +} + +static const struct file_operations can_rcvlist_eff_proc_fops = { + .owner = THIS_MODULE, + .open = can_rcvlist_eff_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * proc utility functions */ @@ -491,8 +537,8 @@ void can_init_proc(void) &can_rcvlist_proc_fops, (void *)RX_FIL); pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir, &can_rcvlist_proc_fops, (void *)RX_INV); - pde_rcvlist_eff = proc_create_data(CAN_PROC_RCVLIST_EFF, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_EFF); + pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir, + &can_rcvlist_eff_proc_fops); pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir, &can_rcvlist_sff_proc_fops); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b0dfce77656a..05be0c181695 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2491,7 +2491,7 @@ EXPORT_SYMBOL(ceph_osdc_sync); * Call all pending notify callbacks - for use after a watch is * unregistered, to make sure no more callbacks for it will be invoked */ -extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) +void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) { flush_workqueue(osdc->notify_wq); } diff --git a/net/core/Makefile b/net/core/Makefile index 826b925aa453..71093d94ad2b 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o + sock_diag.o dev_ioctl.o tso.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o diff --git a/net/core/datagram.c b/net/core/datagram.c index a16ed7bbe376..6b1c04ca1d50 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -739,11 +739,15 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) __sum16 sum; sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); - if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) - netdev_rx_csum_fault(skb->dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; - } + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + + /* Save checksum complete for later use */ + skb->csum = sum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); diff --git a/net/core/dev.c b/net/core/dev.c index 8908a68db449..30eedf677913 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(is_skb_forwardable); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + + if (unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->protocol = eth_type_trans(skb, dev); + + return 0; +} +EXPORT_SYMBOL_GPL(__dev_forward_skb); + /** * dev_forward_skb - loopback an skb to another netif * @@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - if (skb_copy_ubufs(skb, GFP_ATOMIC)) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - } - - if (unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - - skb_scrub_packet(skb, true); - skb->protocol = eth_type_trans(skb, dev); - - return netif_rx_internal(skb); + return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -2507,13 +2513,39 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } +/* If MPLS offload request, verify we are testing hardware MPLS features + * instead of standard features for the netdev. + */ +#ifdef CONFIG_NET_MPLS_GSO +static netdev_features_t net_mpls_features(struct sk_buff *skb, + netdev_features_t features, + __be16 type) +{ + if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) + features &= skb->dev->mpls_features; + + return features; +} +#else +static netdev_features_t net_mpls_features(struct sk_buff *skb, + netdev_features_t features, + __be16 type) +{ + return features; +} +#endif + static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t features) { int tmp; + __be16 type; + + type = skb_network_protocol(skb, &tmp); + features = net_mpls_features(skb, features, type); if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { + !can_checksum_protocol(features, type)) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; @@ -5689,10 +5721,6 @@ static void rollback_registered_many(struct list_head *head) */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); - /* * Flush the unicast and multicast chains */ @@ -5702,6 +5730,10 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); @@ -5927,10 +5959,7 @@ static void netdev_init_one_queue(struct net_device *dev, static void netif_free_tx_queues(struct net_device *dev) { - if (is_vmalloc_addr(dev->_tx)) - vfree(dev->_tx); - else - kfree(dev->_tx); + kvfree(dev->_tx); } static int netif_alloc_netdev_queues(struct net_device *dev) @@ -6404,10 +6433,7 @@ void netdev_freemem(struct net_device *dev) { char *addr = (char *)dev - dev->padded; - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); + kvfree(addr); } /** @@ -6512,11 +6538,6 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); - netif_free_tx_queues(dev); -#ifdef CONFIG_SYSFS - kfree(dev->_rx); -#endif - free_dev: netdev_freemem(dev); return NULL; @@ -6613,6 +6634,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdevice_many - unregister many devices * @head: list of devices + * + * Note: As most callers use a stack allocated list_head, + * we force a list_del() to make sure stack wont be corrupted later. */ void unregister_netdevice_many(struct list_head *head) { @@ -6622,6 +6646,7 @@ void unregister_netdevice_many(struct list_head *head) rollback_registered_many(head); list_for_each_entry(dev, head, unreg_list) net_set_todo(dev); + list_del(head); } } EXPORT_SYMBOL(unregister_netdevice_many); @@ -7077,7 +7102,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } } unregister_netdevice_many(&dev_kill_list); - list_del(&dev_kill_list); rtnl_unlock(); } diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 329d5794e7dc..b6b230600b97 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_unsync); +/** + * __hw_addr_sync_dev - Synchonize device's multicast list + * @list: address list to syncronize + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * This funciton is intended to be called from the ndo_set_rx_mode + * function of devices that require explicit address add/remove + * notifications. The unsync function may be NULL in which case + * the addresses requiring removal will simply be removed without + * any notification to the device. + **/ +int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + int err; + + /* first go through and flush out any stale entries */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt || ha->refcount != 1) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } + + /* go through and sync new entries to the list */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (ha->sync_cnt) + continue; + + err = sync(dev, ha->addr); + if (err) + return err; + + ha->sync_cnt++; + ha->refcount++; + } + + return 0; +} +EXPORT_SYMBOL(__hw_addr_sync_dev); + +/** + * __hw_addr_unsync_dev - Remove synchonized addresses from device + * @list: address list to remove syncronized addresses from + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by __hw_addr_sync_dev(). + * This function is intended to be called from the ndo_stop or ndo_open + * functions on devices that require explicit address add/remove + * notifications. If the unsync function pointer is NULL then this function + * can be used to just reset the sync_cnt for the addresses in the list. + **/ +void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } +} +EXPORT_SYMBOL(__hw_addr_unsync_dev); + static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 640ba0e5831c..17cb912793fa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -557,6 +557,23 @@ err_out: return ret; } +static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, + struct ethtool_rxnfc *rx_rings, + u32 size) +{ + int i; + + if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) + return -EFAULT; + + /* Validate ring indices */ + for (i = 0; i < size; i++) + if (indir[i] >= rx_rings->data) + return -EINVAL; + + return 0; +} + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -565,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, int ret; if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh_indir) + !dev->ethtool_ops->get_rxfh) return -EOPNOTSUPP; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) @@ -591,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, if (!indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL); if (ret) goto out; @@ -613,8 +630,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, u32 *indir; const struct ethtool_ops *ops = dev->ethtool_ops; int ret; + u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); - if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || + if (!ops->get_rxfh_indir_size || !ops->set_rxfh || !ops->get_rxnfc) return -EOPNOTSUPP; @@ -643,28 +661,184 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, for (i = 0; i < dev_size; i++) indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); } else { - if (copy_from_user(indir, - useraddr + - offsetof(struct ethtool_rxfh_indir, - ring_index[0]), - dev_size * sizeof(indir[0]))) { + ret = ethtool_copy_validate_indir(indir, + useraddr + ringidx_offset, + &rx_rings, + dev_size); + if (ret) + goto out; + } + + ret = ops->set_rxfh(dev, indir, NULL); + +out: + kfree(indir); + return ret; +} + +static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + u32 user_indir_size, user_key_size; + u32 dev_indir_size = 0, dev_key_size = 0; + struct ethtool_rxfh rxfh; + u32 total_size; + u32 indir_bytes; + u32 *indir = NULL; + u8 *hkey = NULL; + u8 *rss_config; + + if (!(dev->ethtool_ops->get_rxfh_indir_size || + dev->ethtool_ops->get_rxfh_key_size) || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + if (ops->get_rxfh_key_size) + dev_key_size = ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) + return -EFAULT; + user_indir_size = rxfh.indir_size; + user_key_size = rxfh.key_size; + + /* Check that reserved fields are 0 for now */ + if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + return -EINVAL; + + rxfh.indir_size = dev_indir_size; + rxfh.key_size = dev_key_size; + if (copy_to_user(useraddr, &rxfh, sizeof(rxfh))) + return -EFAULT; + + /* If the user buffer size is 0, this is just a query for the + * device table size and key size. Otherwise, if the User size is + * not equal to device table size or key size it's an error. + */ + if (!user_indir_size && !user_key_size) + return 0; + + if ((user_indir_size && (user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + indir_bytes = user_indir_size * sizeof(indir[0]); + total_size = indir_bytes + user_key_size; + rss_config = kzalloc(total_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + if (user_indir_size) + indir = (u32 *)rss_config; + + if (user_key_size) + hkey = rss_config + indir_bytes; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey); + if (!ret) { + if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, rss_config[0]), + rss_config, total_size)) ret = -EFAULT; + } + + kfree(rss_config); + + return ret; +} + +static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxnfc rx_rings; + struct ethtool_rxfh rxfh; + u32 dev_indir_size = 0, dev_key_size = 0, i; + u32 *indir = NULL, indir_bytes = 0; + u8 *hkey = NULL; + u8 *rss_config; + u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + + if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) || + !ops->get_rxnfc || !ops->set_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + if (ops->get_rxfh_key_size) + dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) + return -EFAULT; + + /* Check that reserved fields are 0 for now */ + if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + return -EINVAL; + + /* If either indir or hash key is valid, proceed further. + * It is not valid to request that both be unchanged. + */ + if ((rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && + rxfh.indir_size != dev_indir_size) || + (rxfh.key_size && (rxfh.key_size != dev_key_size)) || + (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && + rxfh.key_size == 0)) + return -EINVAL; + + if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) + indir_bytes = dev_indir_size * sizeof(indir[0]); + + rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + rx_rings.cmd = ETHTOOL_GRXRINGS; + ret = ops->get_rxnfc(dev, &rx_rings, NULL); + if (ret) + goto out; + + /* rxfh.indir_size == 0 means reset the indir table to default. + * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. + */ + if (rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { + indir = (u32 *)rss_config; + ret = ethtool_copy_validate_indir(indir, + useraddr + rss_cfg_offset, + &rx_rings, + rxfh.indir_size); + if (ret) goto out; - } + } else if (rxfh.indir_size == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } - /* Validate ring indices */ - for (i = 0; i < dev_size; i++) { - if (indir[i] >= rx_rings.data) { - ret = -EINVAL; - goto out; - } + if (rxfh.key_size) { + hkey = rss_config + indir_bytes; + if (copy_from_user(hkey, + useraddr + rss_cfg_offset + indir_bytes, + rxfh.key_size)) { + ret = -EFAULT; + goto out; } } - ret = ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh(dev, indir, hkey); out: - kfree(indir); + kfree(rss_config); return ret; } @@ -1491,6 +1665,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: case ETHTOOL_GRXFHINDIR: + case ETHTOOL_GRSSH: case ETHTOOL_GFEATURES: case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: @@ -1628,6 +1803,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GRSSH: + rc = ethtool_get_rxfh(dev, useraddr); + break; + case ETHTOOL_SRSSH: + rc = ethtool_set_rxfh(dev, useraddr); + break; case ETHTOOL_GFEATURES: rc = ethtool_get_features(dev, useraddr); break; diff --git a/net/core/filter.c b/net/core/filter.c index 4aec7b93f1a9..735fad897496 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -45,6 +45,27 @@ #include <linux/seccomp.h> #include <linux/if_vlan.h> +/* Registers */ +#define BPF_R0 regs[BPF_REG_0] +#define BPF_R1 regs[BPF_REG_1] +#define BPF_R2 regs[BPF_REG_2] +#define BPF_R3 regs[BPF_REG_3] +#define BPF_R4 regs[BPF_REG_4] +#define BPF_R5 regs[BPF_REG_5] +#define BPF_R6 regs[BPF_REG_6] +#define BPF_R7 regs[BPF_REG_7] +#define BPF_R8 regs[BPF_REG_8] +#define BPF_R9 regs[BPF_REG_9] +#define BPF_R10 regs[BPF_REG_10] + +/* Named registers */ +#define DST regs[insn->dst_reg] +#define SRC regs[insn->src_reg] +#define FP regs[BPF_REG_FP] +#define ARG1 regs[BPF_REG_ARG1] +#define CTX regs[BPF_REG_CTX] +#define IMM insn->imm + /* No hurry in this branch * * Exported for the bpf jit load helper. @@ -57,9 +78,9 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns ptr = skb_network_header(skb) + k - SKF_NET_OFF; else if (k >= SKF_LL_OFF) ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; + return NULL; } @@ -68,6 +89,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k, { if (k >= 0) return skb_header_pointer(skb, k, size, buffer); + return bpf_internal_load_pointer_neg_helper(skb, k, size); } @@ -122,13 +144,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } -/* Register mappings for user programs. */ -#define A_REG 0 -#define X_REG 7 -#define TMP_REG 8 -#define ARG2_REG 2 -#define ARG3_REG 3 - /** * __sk_run_filter - run a filter on a given context * @ctx: buffer to run the filter on @@ -138,447 +153,442 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) * keep, 0 for none. @ctx is the data we are operating on, @insn is the * array of filter instructions. */ -unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) +static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; - void *ptr; - int off; - -#define K insn->imm -#define A regs[insn->a_reg] -#define X regs[insn->x_reg] -#define R0 regs[0] - -#define CONT ({insn++; goto select_insn; }) -#define CONT_JMP ({insn++; goto select_insn; }) - static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ -#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C - DL(BPF_ALU, BPF_ADD, BPF_X), - DL(BPF_ALU, BPF_ADD, BPF_K), - DL(BPF_ALU, BPF_SUB, BPF_X), - DL(BPF_ALU, BPF_SUB, BPF_K), - DL(BPF_ALU, BPF_AND, BPF_X), - DL(BPF_ALU, BPF_AND, BPF_K), - DL(BPF_ALU, BPF_OR, BPF_X), - DL(BPF_ALU, BPF_OR, BPF_K), - DL(BPF_ALU, BPF_LSH, BPF_X), - DL(BPF_ALU, BPF_LSH, BPF_K), - DL(BPF_ALU, BPF_RSH, BPF_X), - DL(BPF_ALU, BPF_RSH, BPF_K), - DL(BPF_ALU, BPF_XOR, BPF_X), - DL(BPF_ALU, BPF_XOR, BPF_K), - DL(BPF_ALU, BPF_MUL, BPF_X), - DL(BPF_ALU, BPF_MUL, BPF_K), - DL(BPF_ALU, BPF_MOV, BPF_X), - DL(BPF_ALU, BPF_MOV, BPF_K), - DL(BPF_ALU, BPF_DIV, BPF_X), - DL(BPF_ALU, BPF_DIV, BPF_K), - DL(BPF_ALU, BPF_MOD, BPF_X), - DL(BPF_ALU, BPF_MOD, BPF_K), - DL(BPF_ALU, BPF_NEG, 0), - DL(BPF_ALU, BPF_END, BPF_TO_BE), - DL(BPF_ALU, BPF_END, BPF_TO_LE), - DL(BPF_ALU64, BPF_ADD, BPF_X), - DL(BPF_ALU64, BPF_ADD, BPF_K), - DL(BPF_ALU64, BPF_SUB, BPF_X), - DL(BPF_ALU64, BPF_SUB, BPF_K), - DL(BPF_ALU64, BPF_AND, BPF_X), - DL(BPF_ALU64, BPF_AND, BPF_K), - DL(BPF_ALU64, BPF_OR, BPF_X), - DL(BPF_ALU64, BPF_OR, BPF_K), - DL(BPF_ALU64, BPF_LSH, BPF_X), - DL(BPF_ALU64, BPF_LSH, BPF_K), - DL(BPF_ALU64, BPF_RSH, BPF_X), - DL(BPF_ALU64, BPF_RSH, BPF_K), - DL(BPF_ALU64, BPF_XOR, BPF_X), - DL(BPF_ALU64, BPF_XOR, BPF_K), - DL(BPF_ALU64, BPF_MUL, BPF_X), - DL(BPF_ALU64, BPF_MUL, BPF_K), - DL(BPF_ALU64, BPF_MOV, BPF_X), - DL(BPF_ALU64, BPF_MOV, BPF_K), - DL(BPF_ALU64, BPF_ARSH, BPF_X), - DL(BPF_ALU64, BPF_ARSH, BPF_K), - DL(BPF_ALU64, BPF_DIV, BPF_X), - DL(BPF_ALU64, BPF_DIV, BPF_K), - DL(BPF_ALU64, BPF_MOD, BPF_X), - DL(BPF_ALU64, BPF_MOD, BPF_K), - DL(BPF_ALU64, BPF_NEG, 0), - DL(BPF_JMP, BPF_CALL, 0), - DL(BPF_JMP, BPF_JA, 0), - DL(BPF_JMP, BPF_JEQ, BPF_X), - DL(BPF_JMP, BPF_JEQ, BPF_K), - DL(BPF_JMP, BPF_JNE, BPF_X), - DL(BPF_JMP, BPF_JNE, BPF_K), - DL(BPF_JMP, BPF_JGT, BPF_X), - DL(BPF_JMP, BPF_JGT, BPF_K), - DL(BPF_JMP, BPF_JGE, BPF_X), - DL(BPF_JMP, BPF_JGE, BPF_K), - DL(BPF_JMP, BPF_JSGT, BPF_X), - DL(BPF_JMP, BPF_JSGT, BPF_K), - DL(BPF_JMP, BPF_JSGE, BPF_X), - DL(BPF_JMP, BPF_JSGE, BPF_K), - DL(BPF_JMP, BPF_JSET, BPF_X), - DL(BPF_JMP, BPF_JSET, BPF_K), - DL(BPF_JMP, BPF_EXIT, 0), - DL(BPF_STX, BPF_MEM, BPF_B), - DL(BPF_STX, BPF_MEM, BPF_H), - DL(BPF_STX, BPF_MEM, BPF_W), - DL(BPF_STX, BPF_MEM, BPF_DW), - DL(BPF_STX, BPF_XADD, BPF_W), - DL(BPF_STX, BPF_XADD, BPF_DW), - DL(BPF_ST, BPF_MEM, BPF_B), - DL(BPF_ST, BPF_MEM, BPF_H), - DL(BPF_ST, BPF_MEM, BPF_W), - DL(BPF_ST, BPF_MEM, BPF_DW), - DL(BPF_LDX, BPF_MEM, BPF_B), - DL(BPF_LDX, BPF_MEM, BPF_H), - DL(BPF_LDX, BPF_MEM, BPF_W), - DL(BPF_LDX, BPF_MEM, BPF_DW), - DL(BPF_LD, BPF_ABS, BPF_W), - DL(BPF_LD, BPF_ABS, BPF_H), - DL(BPF_LD, BPF_ABS, BPF_B), - DL(BPF_LD, BPF_IND, BPF_W), - DL(BPF_LD, BPF_IND, BPF_H), - DL(BPF_LD, BPF_IND, BPF_B), -#undef DL + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, + [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, + [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, + [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, + [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, + [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, + [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, + [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, + [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, + [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, + [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, + [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, + [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, + [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, + [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, + [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, + [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, + [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, + [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, + [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, + [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, + [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, + [BPF_ALU | BPF_NEG] = &&ALU_NEG, + [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, + [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, + /* 64 bit ALU operations */ + [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, + [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, + [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, + [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, + [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, + [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, + [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, + [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, + [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, + [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, + [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, + [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, + [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, + [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, + [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, + [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, + [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, + [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, + [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, + [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, + [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, + [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, + [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, + [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, + [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, + /* Call instruction */ + [BPF_JMP | BPF_CALL] = &&JMP_CALL, + /* Jumps */ + [BPF_JMP | BPF_JA] = &&JMP_JA, + [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, + [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, + [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, + [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, + [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, + [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, + [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, + [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, + [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, + [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, + [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, + [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, + [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, + [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, + /* Program return */ + [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, + /* Store instructions */ + [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, + [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, + [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, + [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, + [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, + [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, + [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, + [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, + [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, + [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, + /* Load instructions */ + [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, + [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, + [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, + [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, + [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W, + [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H, + [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B, + [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, + [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, + [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, }; + void *ptr; + int off; + +#define CONT ({ insn++; goto select_insn; }) +#define CONT_JMP ({ insn++; goto select_insn; }) - regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; - regs[ARG1_REG] = (u64) (unsigned long) ctx; - regs[A_REG] = 0; - regs[X_REG] = 0; + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; + ARG1 = (u64) (unsigned long) ctx; + + /* Registers used in classic BPF programs need to be reset first. */ + regs[BPF_REG_A] = 0; + regs[BPF_REG_X] = 0; select_insn: goto *jumptable[insn->code]; /* ALU */ #define ALU(OPCODE, OP) \ - BPF_ALU64_##OPCODE##_BPF_X: \ - A = A OP X; \ + ALU64_##OPCODE##_X: \ + DST = DST OP SRC; \ CONT; \ - BPF_ALU_##OPCODE##_BPF_X: \ - A = (u32) A OP (u32) X; \ + ALU_##OPCODE##_X: \ + DST = (u32) DST OP (u32) SRC; \ CONT; \ - BPF_ALU64_##OPCODE##_BPF_K: \ - A = A OP K; \ + ALU64_##OPCODE##_K: \ + DST = DST OP IMM; \ CONT; \ - BPF_ALU_##OPCODE##_BPF_K: \ - A = (u32) A OP (u32) K; \ + ALU_##OPCODE##_K: \ + DST = (u32) DST OP (u32) IMM; \ CONT; - ALU(BPF_ADD, +) - ALU(BPF_SUB, -) - ALU(BPF_AND, &) - ALU(BPF_OR, |) - ALU(BPF_LSH, <<) - ALU(BPF_RSH, >>) - ALU(BPF_XOR, ^) - ALU(BPF_MUL, *) + ALU(ADD, +) + ALU(SUB, -) + ALU(AND, &) + ALU(OR, |) + ALU(LSH, <<) + ALU(RSH, >>) + ALU(XOR, ^) + ALU(MUL, *) #undef ALU - BPF_ALU_BPF_NEG_0: - A = (u32) -A; + ALU_NEG: + DST = (u32) -DST; CONT; - BPF_ALU64_BPF_NEG_0: - A = -A; + ALU64_NEG: + DST = -DST; CONT; - BPF_ALU_BPF_MOV_BPF_X: - A = (u32) X; + ALU_MOV_X: + DST = (u32) SRC; CONT; - BPF_ALU_BPF_MOV_BPF_K: - A = (u32) K; + ALU_MOV_K: + DST = (u32) IMM; CONT; - BPF_ALU64_BPF_MOV_BPF_X: - A = X; + ALU64_MOV_X: + DST = SRC; CONT; - BPF_ALU64_BPF_MOV_BPF_K: - A = K; + ALU64_MOV_K: + DST = IMM; CONT; - BPF_ALU64_BPF_ARSH_BPF_X: - (*(s64 *) &A) >>= X; + ALU64_ARSH_X: + (*(s64 *) &DST) >>= SRC; CONT; - BPF_ALU64_BPF_ARSH_BPF_K: - (*(s64 *) &A) >>= K; + ALU64_ARSH_K: + (*(s64 *) &DST) >>= IMM; CONT; - BPF_ALU64_BPF_MOD_BPF_X: - if (unlikely(X == 0)) + ALU64_MOD_X: + if (unlikely(SRC == 0)) return 0; - tmp = A; - A = do_div(tmp, X); + tmp = DST; + DST = do_div(tmp, SRC); CONT; - BPF_ALU_BPF_MOD_BPF_X: - if (unlikely(X == 0)) + ALU_MOD_X: + if (unlikely(SRC == 0)) return 0; - tmp = (u32) A; - A = do_div(tmp, (u32) X); + tmp = (u32) DST; + DST = do_div(tmp, (u32) SRC); CONT; - BPF_ALU64_BPF_MOD_BPF_K: - tmp = A; - A = do_div(tmp, K); + ALU64_MOD_K: + tmp = DST; + DST = do_div(tmp, IMM); CONT; - BPF_ALU_BPF_MOD_BPF_K: - tmp = (u32) A; - A = do_div(tmp, (u32) K); + ALU_MOD_K: + tmp = (u32) DST; + DST = do_div(tmp, (u32) IMM); CONT; - BPF_ALU64_BPF_DIV_BPF_X: - if (unlikely(X == 0)) + ALU64_DIV_X: + if (unlikely(SRC == 0)) return 0; - do_div(A, X); + do_div(DST, SRC); CONT; - BPF_ALU_BPF_DIV_BPF_X: - if (unlikely(X == 0)) + ALU_DIV_X: + if (unlikely(SRC == 0)) return 0; - tmp = (u32) A; - do_div(tmp, (u32) X); - A = (u32) tmp; + tmp = (u32) DST; + do_div(tmp, (u32) SRC); + DST = (u32) tmp; CONT; - BPF_ALU64_BPF_DIV_BPF_K: - do_div(A, K); + ALU64_DIV_K: + do_div(DST, IMM); CONT; - BPF_ALU_BPF_DIV_BPF_K: - tmp = (u32) A; - do_div(tmp, (u32) K); - A = (u32) tmp; + ALU_DIV_K: + tmp = (u32) DST; + do_div(tmp, (u32) IMM); + DST = (u32) tmp; CONT; - BPF_ALU_BPF_END_BPF_TO_BE: - switch (K) { + ALU_END_TO_BE: + switch (IMM) { case 16: - A = (__force u16) cpu_to_be16(A); + DST = (__force u16) cpu_to_be16(DST); break; case 32: - A = (__force u32) cpu_to_be32(A); + DST = (__force u32) cpu_to_be32(DST); break; case 64: - A = (__force u64) cpu_to_be64(A); + DST = (__force u64) cpu_to_be64(DST); break; } CONT; - BPF_ALU_BPF_END_BPF_TO_LE: - switch (K) { + ALU_END_TO_LE: + switch (IMM) { case 16: - A = (__force u16) cpu_to_le16(A); + DST = (__force u16) cpu_to_le16(DST); break; case 32: - A = (__force u32) cpu_to_le32(A); + DST = (__force u32) cpu_to_le32(DST); break; case 64: - A = (__force u64) cpu_to_le64(A); + DST = (__force u64) cpu_to_le64(DST); break; } CONT; /* CALL */ - BPF_JMP_BPF_CALL_0: - /* Function call scratches R1-R5 registers, preserves R6-R9, - * and stores return value into R0. + JMP_CALL: + /* Function call scratches BPF_R1-BPF_R5 registers, + * preserves BPF_R6-BPF_R9, and stores return value + * into BPF_R0. */ - R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3], - regs[4], regs[5]); + BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3, + BPF_R4, BPF_R5); CONT; /* JMP */ - BPF_JMP_BPF_JA_0: + JMP_JA: insn += insn->off; CONT; - BPF_JMP_BPF_JEQ_BPF_X: - if (A == X) { + JMP_JEQ_X: + if (DST == SRC) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JEQ_BPF_K: - if (A == K) { + JMP_JEQ_K: + if (DST == IMM) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JNE_BPF_X: - if (A != X) { + JMP_JNE_X: + if (DST != SRC) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JNE_BPF_K: - if (A != K) { + JMP_JNE_K: + if (DST != IMM) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGT_BPF_X: - if (A > X) { + JMP_JGT_X: + if (DST > SRC) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGT_BPF_K: - if (A > K) { + JMP_JGT_K: + if (DST > IMM) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGE_BPF_X: - if (A >= X) { + JMP_JGE_X: + if (DST >= SRC) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGE_BPF_K: - if (A >= K) { + JMP_JGE_K: + if (DST >= IMM) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGT_BPF_X: - if (((s64)A) > ((s64)X)) { + JMP_JSGT_X: + if (((s64) DST) > ((s64) SRC)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGT_BPF_K: - if (((s64)A) > ((s64)K)) { + JMP_JSGT_K: + if (((s64) DST) > ((s64) IMM)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGE_BPF_X: - if (((s64)A) >= ((s64)X)) { + JMP_JSGE_X: + if (((s64) DST) >= ((s64) SRC)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGE_BPF_K: - if (((s64)A) >= ((s64)K)) { + JMP_JSGE_K: + if (((s64) DST) >= ((s64) IMM)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSET_BPF_X: - if (A & X) { + JMP_JSET_X: + if (DST & SRC) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSET_BPF_K: - if (A & K) { + JMP_JSET_K: + if (DST & IMM) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_EXIT_0: - return R0; + JMP_EXIT: + return BPF_R0; /* STX and ST and LDX*/ -#define LDST(SIZEOP, SIZE) \ - BPF_STX_BPF_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = X; \ - CONT; \ - BPF_ST_BPF_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = K; \ - CONT; \ - BPF_LDX_BPF_MEM_##SIZEOP: \ - A = *(SIZE *)(unsigned long) (X + insn->off); \ +#define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ + CONT; \ + ST_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \ + CONT; \ + LDX_MEM_##SIZEOP: \ + DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ CONT; - LDST(BPF_B, u8) - LDST(BPF_H, u16) - LDST(BPF_W, u32) - LDST(BPF_DW, u64) + LDST(B, u8) + LDST(H, u16) + LDST(W, u32) + LDST(DW, u64) #undef LDST - BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */ - atomic_add((u32) X, (atomic_t *)(unsigned long) - (A + insn->off)); + STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ + atomic_add((u32) SRC, (atomic_t *)(unsigned long) + (DST + insn->off)); CONT; - BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ - atomic64_add((u64) X, (atomic64_t *)(unsigned long) - (A + insn->off)); + STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ + atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) + (DST + insn->off)); CONT; - BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ - off = K; + LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */ + off = IMM; load_word: - /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only - * appearing in the programs where ctx == skb. All programs - * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter() - * saves it in R6, internal BPF verifier will check that - * R6 == ctx. + /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are + * only appearing in the programs where ctx == + * skb. All programs keep 'ctx' in regs[BPF_REG_CTX] + * == BPF_R6, sk_convert_filter() saves it in BPF_R6, + * internal BPF verifier will check that BPF_R6 == + * ctx. * - * BPF_ABS and BPF_IND are wrappers of function calls, so - * they scratch R1-R5 registers, preserve R6-R9, and store - * return value into R0. + * BPF_ABS and BPF_IND are wrappers of function calls, + * so they scratch BPF_R1-BPF_R5 registers, preserve + * BPF_R6-BPF_R9, and store return value into BPF_R0. * * Implicit input: - * ctx + * ctx == skb == BPF_R6 == CTX * * Explicit input: - * X == any register - * K == 32-bit immediate + * SRC == any register + * IMM == 32-bit immediate * * Output: - * R0 - 8/16/32-bit skb data converted to cpu endianness + * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness */ - ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); + + ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); if (likely(ptr != NULL)) { - R0 = get_unaligned_be32(ptr); + BPF_R0 = get_unaligned_be32(ptr); CONT; } + return 0; - BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ - off = K; + LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ + off = IMM; load_half: - ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); + ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); if (likely(ptr != NULL)) { - R0 = get_unaligned_be16(ptr); + BPF_R0 = get_unaligned_be16(ptr); CONT; } + return 0; - BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */ - off = K; + LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ + off = IMM; load_byte: - ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); + ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); if (likely(ptr != NULL)) { - R0 = *(u8 *)ptr; + BPF_R0 = *(u8 *)ptr; CONT; } + return 0; - BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ - off = K + X; + LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; goto load_word; - BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ - off = K + X; + LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; goto load_half; - BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */ - off = K + X; + LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */ + off = IMM + SRC; goto load_byte; default_label: /* If we ever reach this, we have a bug somewhere. */ WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); return 0; -#undef CONT_JMP -#undef CONT - -#undef R0 -#undef X -#undef A -#undef K } -u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); - -u32 sk_run_filter_int_skb(const struct sk_buff *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); -EXPORT_SYMBOL_GPL(sk_run_filter_int_skb); - /* Helper to find the offset of pkt_type in sk_buff structure. We want * to make sure its still a 3bit field starting at a byte boundary; * taken from arch/x86/net/bpf_jit_comp.c. */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else #define PKT_TYPE_MAX 7 +#endif static unsigned int pkt_type_offset(void) { struct sk_buff skb_probe = { .pkt_type = ~0, }; @@ -594,16 +604,14 @@ static unsigned int pkt_type_offset(void) return -1; } -static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - struct sk_buff *skb = (struct sk_buff *)(long) ctx; - - return __skb_get_poff(skb); + return __skb_get_poff((struct sk_buff *)(unsigned long) ctx); } -static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -612,19 +620,19 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X); + nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -633,25 +641,31 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = (struct nlattr *) &skb->data[A]; - if (nla->nla_len > skb->len - A) + nla = (struct nlattr *) &skb->data[a]; + if (nla->nla_len > skb->len - a) return 0; - nla = nla_find_nested(nla, X); + nla = nla_find_nested(nla, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return raw_smp_processor_id(); } +/* note that this only generates 32-bit random numbers */ +static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +{ + return prandom_u32(); +} + static bool convert_bpf_extensions(struct sock_filter *fp, struct sock_filter_int **insnp) { @@ -661,119 +675,83 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_PROTOCOL: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, protocol); - insn++; - + /* A = *(u16 *) (CTX + offsetof(protocol)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, protocol)); /* A = ntohs(A) [emitting a nop or swap16] */ - insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; - insn->a_reg = A_REG; - insn->imm = 16; + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - insn->code = BPF_LDX | BPF_MEM | BPF_B; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = pkt_type_offset(); + *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, + pkt_type_offset()); if (insn->off < 0) return false; insn++; - - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = PKT_TYPE_MAX; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + insn++; + *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5); +#endif break; case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_HATYPE: - if (FIELD_SIZEOF(struct sk_buff, dev) == 8) - insn->code = BPF_LDX | BPF_MEM | BPF_DW; - else - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = TMP_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, dev); - insn++; - - insn->code = BPF_JMP | BPF_JNE | BPF_K; - insn->a_reg = TMP_REG; - insn->imm = 0; - insn->off = 1; - insn++; - - insn->code = BPF_JMP | BPF_EXIT; - insn++; - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; - - if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->off = offsetof(struct net_device, ifindex); - } else { - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->off = offsetof(struct net_device, type); - } + BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); + + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + BPF_REG_TMP, BPF_REG_CTX, + offsetof(struct sk_buff, dev)); + /* if (tmp != 0) goto pc + 1 */ + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); + *insn++ = BPF_EXIT_INSN(); + if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, ifindex)); + else + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, type)); break; case SKF_AD_OFF + SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, mark); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, mark)); break; case SKF_AD_OFF + SKF_AD_RXHASH: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, hash); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, hash)); break; case SKF_AD_OFF + SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, queue_mapping); + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, queue_mapping)); break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, vlan_tci); - insn++; - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_tci)); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = ~VLAN_TAG_PRESENT; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, + ~VLAN_TAG_PRESENT); } else { - insn->code = BPF_ALU | BPF_RSH | BPF_K; - insn->a_reg = A_REG; - insn->imm = 12; - insn++; - - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = 1; + /* A >>= 12 */ + *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); + /* A &= 1 */ + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); } break; @@ -781,46 +759,36 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: - /* arg1 = ctx */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG1_REG; - insn->x_reg = CTX_REG; - insn++; - + case SKF_AD_OFF + SKF_AD_RANDOM: + /* arg1 = CTX */ + *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); /* arg2 = A */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG2_REG; - insn->x_reg = A_REG; - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); /* arg3 = X */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG3_REG; - insn->x_reg = X_REG; - insn++; - - /* Emit call(ctx, arg2=A, arg3=X) */ - insn->code = BPF_JMP | BPF_CALL; + *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); + /* Emit call(arg1=CTX, arg2=A, arg3=X) */ switch (fp->k) { case SKF_AD_OFF + SKF_AD_PAY_OFFSET: - insn->imm = __skb_get_pay_offset - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_pay_offset); break; case SKF_AD_OFF + SKF_AD_NLATTR: - insn->imm = __skb_get_nlattr - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr); break; case SKF_AD_OFF + SKF_AD_NLATTR_NEST: - insn->imm = __skb_get_nlattr_nest - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); break; case SKF_AD_OFF + SKF_AD_CPU: - insn->imm = __get_raw_cpu_id - __bpf_call_base; + *insn = BPF_EMIT_CALL(__get_raw_cpu_id); + break; + case SKF_AD_OFF + SKF_AD_RANDOM: + *insn = BPF_EMIT_CALL(__get_random_u32); break; } break; case SKF_AD_OFF + SKF_AD_ALU_XOR_X: - insn->code = BPF_ALU | BPF_XOR | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + /* A ^= X */ + *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X); break; default: @@ -870,7 +838,7 @@ int sk_convert_filter(struct sock_filter *prog, int len, u8 bpf_src; BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); - BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG); + BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); if (len <= 0 || len >= BPF_MAXINSNS) return -EINVAL; @@ -885,11 +853,8 @@ do_pass: new_insn = new_prog; fp = prog; - if (new_insn) { - new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - new_insn->a_reg = CTX_REG; - new_insn->x_reg = ARG1_REG; - } + if (new_insn) + *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); new_insn++; for (i = 0; i < len; fp++, i++) { @@ -937,17 +902,16 @@ do_pass: convert_bpf_extensions(fp, &insn)) break; - insn->code = fp->code; - insn->a_reg = A_REG; - insn->x_reg = X_REG; - insn->imm = fp->k; + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; - /* Jump opcodes map as-is, but offsets need adjustment. */ - case BPF_JMP | BPF_JA: - target = i + fp->k + 1; - insn->code = fp->code; -#define EMIT_JMP \ + /* Jump transformation cannot use BPF block macros + * everywhere as offset calculation and target updates + * require a bit more work than the rest, i.e. jump + * opcodes map as-is, but offsets need adjustment. + */ + +#define BPF_EMIT_JMP \ do { \ if (target >= len || target < 0) \ goto err; \ @@ -956,7 +920,10 @@ do_pass: insn->off -= insn - tmp_insns; \ } while (0) - EMIT_JMP; + case BPF_JMP | BPF_JA: + target = i + fp->k + 1; + insn->code = fp->code; + BPF_EMIT_JMP; break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -972,17 +939,14 @@ do_pass: * immediate into tmp register and use it * in compare insn. */ - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = TMP_REG; - insn->imm = fp->k; - insn++; + *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_TMP; bpf_src = BPF_X; } else { - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_X; insn->imm = fp->k; bpf_src = BPF_SRC(fp->code); } @@ -991,7 +955,7 @@ do_pass: if (fp->jf == 0) { insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; target = i + fp->jt + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } @@ -999,127 +963,94 @@ do_pass: if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { insn->code = BPF_JMP | BPF_JNE | bpf_src; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } /* Other jumps are mapped into two insns: Jxx and JA. */ target = i + fp->jt + 1; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; - EMIT_JMP; + BPF_EMIT_JMP; insn++; insn->code = BPF_JMP | BPF_JA; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = TMP_REG; - insn->x_reg = A_REG; - insn++; - - insn->code = BPF_LD | BPF_ABS | BPF_B; - insn->a_reg = A_REG; - insn->imm = fp->k; - insn++; - - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = 0xf; - insn++; - - insn->code = BPF_ALU | BPF_LSH | BPF_K; - insn->a_reg = A_REG; - insn->imm = 2; - insn++; - - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; - insn++; - - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + /* tmp = A */ + *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); + /* A = BPF_R0 = *(u8 *) (skb->data + K) */ + *insn++ = BPF_LD_ABS(BPF_B, fp->k); + /* A &= 0xf */ + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); + /* A <<= 2 */ + *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); + /* X = A */ + *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); + /* A = tmp */ + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); break; /* RET_K, RET_A are remaped into 2 insns. */ case BPF_RET | BPF_A: case BPF_RET | BPF_K: - insn->code = BPF_ALU | BPF_MOV | - (BPF_RVAL(fp->code) == BPF_K ? - BPF_K : BPF_X); - insn->a_reg = 0; - insn->x_reg = A_REG; - insn->imm = fp->k; - insn++; - - insn->code = BPF_JMP | BPF_EXIT; + *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? + BPF_K : BPF_X, BPF_REG_0, + BPF_REG_A, fp->k); + *insn = BPF_EXIT_INSN(); break; /* Store to stack. */ case BPF_ST: case BPF_STX: - insn->code = BPF_STX | BPF_MEM | BPF_W; - insn->a_reg = FP_REG; - insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == + BPF_ST ? BPF_REG_A : BPF_REG_X, + -(BPF_MEMWORDS - fp->k) * 4); break; /* Load from stack. */ case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = FP_REG; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_FP, + -(BPF_MEMWORDS - fp->k) * 4); break; /* A = K or X = K */ case BPF_LD | BPF_IMM: case BPF_LDX | BPF_IMM: - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->imm = fp->k; + *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, fp->k); break; /* X = A */ case BPF_MISC | BPF_TAX: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; + *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); break; /* A = X */ case BPF_MISC | BPF_TXA: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); break; /* A = skb->len or X = skb->len */ case BPF_LD | BPF_W | BPF_LEN: case BPF_LDX | BPF_W | BPF_LEN: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, len); + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_CTX, + offsetof(struct sk_buff, len)); break; - /* access seccomp_data fields */ + /* Access seccomp_data fields. */ case BPF_LDX | BPF_ABS | BPF_W: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = fp->k; + /* A = *(u32 *) (ctx + K) */ + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; + /* Unkown instruction. */ default: goto err; } @@ -1128,7 +1059,6 @@ do_pass: if (new_prog) memcpy(new_insn, tmp_insns, sizeof(*insn) * (insn - tmp_insns)); - new_insn += insn - tmp_insns; } @@ -1143,7 +1073,6 @@ do_pass: new_flen = new_insn - new_prog; if (pass > 2) goto err; - goto do_pass; } @@ -1167,44 +1096,46 @@ err: */ static int check_load_and_stores(struct sock_filter *filter, int flen) { - u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ int pc, ret = 0; BUILD_BUG_ON(BPF_MEMWORDS > 16); + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); if (!masks) return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); for (pc = 0; pc < flen; pc++) { memvalid &= masks[pc]; switch (filter[pc].code) { - case BPF_S_ST: - case BPF_S_STX: + case BPF_ST: + case BPF_STX: memvalid |= (1 << filter[pc].k); break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: if (!(memvalid & (1 << filter[pc].k))) { ret = -EINVAL; goto error; } break; - case BPF_S_JMP_JA: - /* a jump must set masks on target */ + case BPF_JMP | BPF_JA: + /* A jump must set masks on target */ masks[pc + 1 + filter[pc].k] &= memvalid; memvalid = ~0; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* a jump must set masks on targets */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* A jump must set masks on targets */ masks[pc + 1 + filter[pc].jt] &= memvalid; masks[pc + 1 + filter[pc].jf] &= memvalid; memvalid = ~0; @@ -1216,6 +1147,72 @@ error: return ret; } +static bool chk_code_allowed(u16 code_to_probe) +{ + static const bool codes[] = { + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_K] = true, + [BPF_ALU | BPF_ADD | BPF_X] = true, + [BPF_ALU | BPF_SUB | BPF_K] = true, + [BPF_ALU | BPF_SUB | BPF_X] = true, + [BPF_ALU | BPF_MUL | BPF_K] = true, + [BPF_ALU | BPF_MUL | BPF_X] = true, + [BPF_ALU | BPF_DIV | BPF_K] = true, + [BPF_ALU | BPF_DIV | BPF_X] = true, + [BPF_ALU | BPF_MOD | BPF_K] = true, + [BPF_ALU | BPF_MOD | BPF_X] = true, + [BPF_ALU | BPF_AND | BPF_K] = true, + [BPF_ALU | BPF_AND | BPF_X] = true, + [BPF_ALU | BPF_OR | BPF_K] = true, + [BPF_ALU | BPF_OR | BPF_X] = true, + [BPF_ALU | BPF_XOR | BPF_K] = true, + [BPF_ALU | BPF_XOR | BPF_X] = true, + [BPF_ALU | BPF_LSH | BPF_K] = true, + [BPF_ALU | BPF_LSH | BPF_X] = true, + [BPF_ALU | BPF_RSH | BPF_K] = true, + [BPF_ALU | BPF_RSH | BPF_X] = true, + [BPF_ALU | BPF_NEG] = true, + /* Load instructions */ + [BPF_LD | BPF_W | BPF_ABS] = true, + [BPF_LD | BPF_H | BPF_ABS] = true, + [BPF_LD | BPF_B | BPF_ABS] = true, + [BPF_LD | BPF_W | BPF_LEN] = true, + [BPF_LD | BPF_W | BPF_IND] = true, + [BPF_LD | BPF_H | BPF_IND] = true, + [BPF_LD | BPF_B | BPF_IND] = true, + [BPF_LD | BPF_IMM] = true, + [BPF_LD | BPF_MEM] = true, + [BPF_LDX | BPF_W | BPF_LEN] = true, + [BPF_LDX | BPF_B | BPF_MSH] = true, + [BPF_LDX | BPF_IMM] = true, + [BPF_LDX | BPF_MEM] = true, + /* Store instructions */ + [BPF_ST] = true, + [BPF_STX] = true, + /* Misc instructions */ + [BPF_MISC | BPF_TAX] = true, + [BPF_MISC | BPF_TXA] = true, + /* Return instructions */ + [BPF_RET | BPF_K] = true, + [BPF_RET | BPF_A] = true, + /* Jump instructions */ + [BPF_JMP | BPF_JA] = true, + [BPF_JMP | BPF_JEQ | BPF_K] = true, + [BPF_JMP | BPF_JEQ | BPF_X] = true, + [BPF_JMP | BPF_JGE | BPF_K] = true, + [BPF_JMP | BPF_JGE | BPF_X] = true, + [BPF_JMP | BPF_JGT | BPF_K] = true, + [BPF_JMP | BPF_JGT | BPF_X] = true, + [BPF_JMP | BPF_JSET | BPF_K] = true, + [BPF_JMP | BPF_JSET | BPF_X] = true, + }; + + if (code_to_probe >= ARRAY_SIZE(codes)) + return false; + + return codes[code_to_probe]; +} + /** * sk_chk_filter - verify socket filter code * @filter: filter to verify @@ -1232,153 +1229,76 @@ error: */ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) { - /* - * Valid instructions are initialized to non-0. - * Invalid instructions are initialized to 0. - */ - static const u8 codes[] = { - [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, - [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, - [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, - [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, - [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, - [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, - [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, - [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, - [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, - [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, - [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, - [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, - [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, - [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, - [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, - [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, - [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, - [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, - [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, - [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, - [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, - [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, - [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, - [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, - [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, - [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, - [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, - [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, - [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, - [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, - [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, - [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, - [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, - [BPF_RET|BPF_K] = BPF_S_RET_K, - [BPF_RET|BPF_A] = BPF_S_RET_A, - [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, - [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, - [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, - [BPF_ST] = BPF_S_ST, - [BPF_STX] = BPF_S_STX, - [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, - [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, - [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, - [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, - [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, - [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, - [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, - [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, - [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, - }; - int pc; bool anc_found; + int pc; if (flen == 0 || flen > BPF_MAXINSNS) return -EINVAL; - /* check the filter code now */ + /* Check the filter code now */ for (pc = 0; pc < flen; pc++) { struct sock_filter *ftest = &filter[pc]; - u16 code = ftest->code; - if (code >= ARRAY_SIZE(codes)) - return -EINVAL; - code = codes[code]; - if (!code) + /* May we actually operate on this code? */ + if (!chk_code_allowed(ftest->code)) return -EINVAL; + /* Some instructions need special checks */ - switch (code) { - case BPF_S_ALU_DIV_K: - case BPF_S_ALU_MOD_K: - /* check for division by zero */ + switch (ftest->code) { + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + /* Check for division by zero */ if (ftest->k == 0) return -EINVAL; break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: - case BPF_S_ST: - case BPF_S_STX: - /* check for invalid memory addresses */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: + case BPF_STX: + /* Check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; break; - case BPF_S_JMP_JA: - /* - * Note, the large ftest->k might cause loops. + case BPF_JMP | BPF_JA: + /* Note, the large ftest->k might cause loops. * Compare this with conditional jumps below, * where offsets are limited. --ANK (981016) */ - if (ftest->k >= (unsigned int)(flen-pc-1)) + if (ftest->k >= (unsigned int)(flen - pc - 1)) return -EINVAL; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* for conditionals both must be safe */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* Both conditionals must be safe */ if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; break; - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: anc_found = false; -#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ - code = BPF_S_ANC_##CODE; \ - anc_found = true; \ - break - switch (ftest->k) { - ANCILLARY(PROTOCOL); - ANCILLARY(PKTTYPE); - ANCILLARY(IFINDEX); - ANCILLARY(NLATTR); - ANCILLARY(NLATTR_NEST); - ANCILLARY(MARK); - ANCILLARY(QUEUE); - ANCILLARY(HATYPE); - ANCILLARY(RXHASH); - ANCILLARY(CPU); - ANCILLARY(ALU_XOR_X); - ANCILLARY(VLAN_TAG); - ANCILLARY(VLAN_TAG_PRESENT); - ANCILLARY(PAY_OFFSET); - } - - /* ancillary operation unknown or unsupported */ + if (bpf_anc_helper(ftest) & BPF_ANC) + anc_found = true; + /* Ancillary operation unknown or unsupported */ if (anc_found == false && ftest->k >= SKF_AD_OFF) return -EINVAL; } - ftest->code = code; } - /* last instruction must be a RET code */ + /* Last instruction must be a RET code */ switch (filter[flen - 1].code) { - case BPF_S_RET_K: - case BPF_S_RET_A: + case BPF_RET | BPF_K: + case BPF_RET | BPF_A: return check_load_and_stores(filter, flen); } + return -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); @@ -1423,7 +1343,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu) struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); sk_release_orig_filter(fp); - bpf_jit_free(fp); + sk_filter_free(fp); } /** @@ -1461,7 +1381,7 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp, fp_new = sock_kmalloc(sk, len, GFP_KERNEL); if (fp_new) { - memcpy(fp_new, fp, sizeof(struct sk_filter)); + *fp_new = *fp; /* As we're kepping orig_prog in fp_new along, * we need to make sure we're not evicting it * from the old fp. @@ -1478,7 +1398,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, { struct sock_filter *old_prog; struct sk_filter *old_fp; - int i, err, new_len, old_len = fp->len; + int err, new_len, old_len = fp->len; /* We are free to overwrite insns et al right here as it * won't be used at this point in time anymore internally @@ -1488,13 +1408,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, BUILD_BUG_ON(sizeof(struct sock_filter) != sizeof(struct sock_filter_int)); - /* For now, we need to unfiddle BPF_S_* identifiers in place. - * This can sooner or later on be subject to removal, e.g. when - * JITs have been converted. - */ - for (i = 0; i < fp->len; i++) - sk_decode_filter(&fp->insns[i], &fp->insns[i]); - /* Conversion cannot happen on overlapping memory areas, * so we need to keep the user BPF around until the 2nd * pass. At this time, the user BPF is stored in fp->insns. @@ -1523,7 +1436,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, goto out_err_free; } - fp->bpf_func = sk_run_filter_int_skb; fp->len = new_len; /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ @@ -1536,6 +1448,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, */ goto out_err_free; + sk_filter_select_runtime(fp); + kfree(old_prog); return fp; @@ -1550,6 +1464,33 @@ out_err: return ERR_PTR(err); } +void __weak bpf_int_jit_compile(struct sk_filter *prog) +{ +} + +/** + * sk_filter_select_runtime - select execution runtime for BPF program + * @fp: sk_filter populated with internal BPF program + * + * try to JIT internal BPF program, if JIT is not available select interpreter + * BPF program will be executed via SK_RUN_FILTER() macro + */ +void sk_filter_select_runtime(struct sk_filter *fp) +{ + fp->bpf_func = (void *) __sk_run_filter; + + /* Probe if internal BPF can be JITed */ + bpf_int_jit_compile(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_select_runtime); + +/* free internal BPF program */ +void sk_filter_free(struct sk_filter *fp) +{ + bpf_jit_free(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_free); + static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, struct sock *sk) { @@ -1592,7 +1533,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, * a negative errno code is returned. On success the return is zero. */ int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog) + struct sock_fprog_kern *fprog) { unsigned int fsize = sk_filter_proglen(fprog); struct sk_filter *fp; @@ -1713,83 +1654,6 @@ int sk_detach_filter(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_detach_filter); -void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) -{ - static const u16 decodes[] = { - [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, - [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, - [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, - [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, - [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, - [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, - [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, - [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, - [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, - [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, - [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, - [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, - [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, - [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, - [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, - [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, - [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, - [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, - [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, - [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, - [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, - [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, - [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, - [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, - [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, - [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, - [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, - [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, - [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, - [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, - [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, - [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, - [BPF_S_RET_K] = BPF_RET|BPF_K, - [BPF_S_RET_A] = BPF_RET|BPF_A, - [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, - [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, - [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, - [BPF_S_ST] = BPF_ST, - [BPF_S_STX] = BPF_STX, - [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, - [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, - [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, - [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, - [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, - [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, - [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, - [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, - [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, - }; - u16 code; - - code = filt->code; - - to->code = decodes[code]; - to->jt = filt->jt; - to->jf = filt->jf; - to->k = filt->k; -} - int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7c8ffd974961..85b62691f4f2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -273,7 +273,7 @@ static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; struct net *net, *tmp; - LIST_HEAD(net_kill_list); + struct list_head net_kill_list; LIST_HEAD(net_exit_list); /* Atomically snapshot the list of namespaces to cleanup */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0304f981f7ff..fc17a9d309ac 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -573,7 +573,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) is_zero_ether_addr(pkt_dev->src_mac) ? pkt_dev->odev->dev_addr : pkt_dev->src_mac); - seq_printf(seq, "dst_mac: "); + seq_puts(seq, "dst_mac: "); seq_printf(seq, "%pM\n", pkt_dev->dst_mac); seq_printf(seq, @@ -588,7 +588,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->nr_labels) { unsigned int i; - seq_printf(seq, " mpls: "); + seq_puts(seq, " mpls: "); for (i = 0; i < pkt_dev->nr_labels; i++) seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), i == pkt_dev->nr_labels-1 ? "\n" : ", "); @@ -613,67 +613,67 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->node >= 0) seq_printf(seq, " node: %d\n", pkt_dev->node); - seq_printf(seq, " Flags: "); + seq_puts(seq, " Flags: "); if (pkt_dev->flags & F_IPV6) - seq_printf(seq, "IPV6 "); + seq_puts(seq, "IPV6 "); if (pkt_dev->flags & F_IPSRC_RND) - seq_printf(seq, "IPSRC_RND "); + seq_puts(seq, "IPSRC_RND "); if (pkt_dev->flags & F_IPDST_RND) - seq_printf(seq, "IPDST_RND "); + seq_puts(seq, "IPDST_RND "); if (pkt_dev->flags & F_TXSIZE_RND) - seq_printf(seq, "TXSIZE_RND "); + seq_puts(seq, "TXSIZE_RND "); if (pkt_dev->flags & F_UDPSRC_RND) - seq_printf(seq, "UDPSRC_RND "); + seq_puts(seq, "UDPSRC_RND "); if (pkt_dev->flags & F_UDPDST_RND) - seq_printf(seq, "UDPDST_RND "); + seq_puts(seq, "UDPDST_RND "); if (pkt_dev->flags & F_UDPCSUM) - seq_printf(seq, "UDPCSUM "); + seq_puts(seq, "UDPCSUM "); if (pkt_dev->flags & F_MPLS_RND) - seq_printf(seq, "MPLS_RND "); + seq_puts(seq, "MPLS_RND "); if (pkt_dev->flags & F_QUEUE_MAP_RND) - seq_printf(seq, "QUEUE_MAP_RND "); + seq_puts(seq, "QUEUE_MAP_RND "); if (pkt_dev->flags & F_QUEUE_MAP_CPU) - seq_printf(seq, "QUEUE_MAP_CPU "); + seq_puts(seq, "QUEUE_MAP_CPU "); if (pkt_dev->cflows) { if (pkt_dev->flags & F_FLOW_SEQ) - seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ + seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/ else - seq_printf(seq, "FLOW_RND "); + seq_puts(seq, "FLOW_RND "); } #ifdef CONFIG_XFRM if (pkt_dev->flags & F_IPSEC_ON) { - seq_printf(seq, "IPSEC "); + seq_puts(seq, "IPSEC "); if (pkt_dev->spi) seq_printf(seq, "spi:%u", pkt_dev->spi); } #endif if (pkt_dev->flags & F_MACSRC_RND) - seq_printf(seq, "MACSRC_RND "); + seq_puts(seq, "MACSRC_RND "); if (pkt_dev->flags & F_MACDST_RND) - seq_printf(seq, "MACDST_RND "); + seq_puts(seq, "MACDST_RND "); if (pkt_dev->flags & F_VID_RND) - seq_printf(seq, "VID_RND "); + seq_puts(seq, "VID_RND "); if (pkt_dev->flags & F_SVID_RND) - seq_printf(seq, "SVID_RND "); + seq_puts(seq, "SVID_RND "); if (pkt_dev->flags & F_NODE) - seq_printf(seq, "NODE_ALLOC "); + seq_puts(seq, "NODE_ALLOC "); seq_puts(seq, "\n"); @@ -716,7 +716,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->result[0]) seq_printf(seq, "Result: %s\n", pkt_dev->result); else - seq_printf(seq, "Result: Idle\n"); + seq_puts(seq, "Result: Idle\n"); return 0; } @@ -1735,14 +1735,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Running: "); + seq_puts(seq, "Running: "); if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); - seq_printf(seq, "\nStopped: "); + seq_puts(seq, "\nStopped: "); list_for_each_entry(pkt_dev, &t->if_list, list) if (!pkt_dev->running) @@ -1751,7 +1751,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) if (t->result[0]) seq_printf(seq, "\nResult: %s\n", t->result); else - seq_printf(seq, "\nResult: NA\n"); + seq_puts(seq, "\nResult: NA\n"); if_unlock(t); diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index eaba0f68f860..d3027a73fd4b 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -88,7 +88,7 @@ EXPORT_SYMBOL_GPL(ptp_classify_raw); void __init ptp_classifier_init(void) { - static struct sock_filter ptp_filter[] = { + static struct sock_filter ptp_filter[] __initdata = { { 0x28, 0, 0, 0x0000000c }, { 0x15, 0, 12, 0x00000800 }, { 0x30, 0, 0, 0x00000017 }, @@ -133,7 +133,7 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog ptp_prog = { + struct sock_fprog_kern ptp_prog = { .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d8d8fcfa060..1063996f8317 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -798,8 +798,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + - nla_total_size(sizeof(struct ifla_vf_tx_rate)) + - nla_total_size(sizeof(struct ifla_vf_spoofchk))); + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_rate))); return size; } else return 0; @@ -1065,6 +1065,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_info ivi; struct ifla_vf_mac vf_mac; struct ifla_vf_vlan vf_vlan; + struct ifla_vf_rate vf_rate; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; @@ -1085,6 +1086,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, break; vf_mac.vf = vf_vlan.vf = + vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = ivi.vf; @@ -1092,7 +1094,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; - vf_tx_rate.rate = ivi.tx_rate; + vf_tx_rate.rate = ivi.max_tx_rate; + vf_rate.min_tx_rate = ivi.min_tx_rate; + vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); @@ -1102,6 +1106,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || + nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), + &vf_rate) || nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), @@ -1208,6 +1214,10 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_rate) }, + [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_link_state) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1234,6 +1244,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; int err; + int hdrlen; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1241,8 +1252,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + /* A hack to preserve kernel<->userspace interface. + * The correct header is ifinfomsg. It is consistent with rtnl_getlink. + * However, before Linux v3.9 the code here assumed rtgenmsg and that's + * what iproute2 < v3.9.0 used. + * We can detect the old iproute2. Even including the IFLA_EXT_MASK + * attribute, its netlink message is shorter than struct ifinfomsg. + */ + hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); + + if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1367,11 +1387,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) } case IFLA_VF_TX_RATE: { struct ifla_vf_tx_rate *ivt; + struct ifla_vf_info ivf; ivt = nla_data(vf); err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, - ivt->rate); + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, + &ivf); + if (err) + break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + break; + } + case IFLA_VF_RATE: { + struct ifla_vf_rate *ivt; + ivt = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); break; } case IFLA_VF_SPOOFCHK: { @@ -1744,7 +1782,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) ops->dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); - list_del(&list_kill); return 0; } @@ -2019,11 +2056,15 @@ replay: if (ops->newlink) { err = ops->newlink(net, dev, tb, data); /* Drivers should call free_netdev() in ->destructor - * and unregister it on failure so that device could be - * finally freed in rtnl_unlock. + * and unregister it on failure after registration + * so that device could be finally freed in rtnl_unlock. */ - if (err < 0) + if (err < 0) { + /* If device is not registered at all, free it now */ + if (dev->reg_state == NETREG_UNINITIALIZED) + free_netdev(dev); goto out; + } } else { err = register_netdevice(dev); if (err < 0) { @@ -2095,9 +2136,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; + int hdrlen; + + /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ + hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 897da56f3aff..ba71212f0251 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -__u32 secure_ip_id(__be32 daddr) -{ - u32 hash[MD5_DIGEST_WORDS]; - - net_secret_init(); - hash[0] = (__force __u32) daddr; - hash[1] = net_secret[13]; - hash[2] = net_secret[14]; - hash[3] = net_secret[15]; - - md5_transform(hash, net_secret); - - return hash[0]; -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - __u32 hash[4]; - - net_secret_init(); - memcpy(hash, daddr, 16); - md5_transform(hash, net_secret); - - return hash[0]; -} __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8383b2bddeb9..bf92824af3f7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -694,7 +694,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif memcpy(new->cb, old->cb, sizeof(old->cb)); new->csum = old->csum; - new->local_df = old->local_df; + new->ignore_df = old->ignore_df; new->pkt_type = old->pkt_type; new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); @@ -951,10 +951,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) EXPORT_SYMBOL(skb_copy); /** - * __pskb_copy - create copy of an sk_buff with private head. + * __pskb_copy_fclone - create copy of an sk_buff with private head. * @skb: buffer to copy * @headroom: headroom of new skb * @gfp_mask: allocation priority + * @fclone: if true allocate the copy of the skb from the fclone + * cache instead of the head cache; it is recommended to set this + * to true for the cases where the copy will likely be cloned * * Make a copy of both an &sk_buff and part of its data, located * in header. Fragmented data remain shared. This is used when @@ -964,11 +967,12 @@ EXPORT_SYMBOL(skb_copy); * The returned buffer has a reference count of 1. */ -struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, + gfp_t gfp_mask, bool fclone) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); + struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); if (!n) goto out; @@ -1008,7 +1012,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) out: return n; } -EXPORT_SYMBOL(__pskb_copy); +EXPORT_SYMBOL(__pskb_copy_fclone); /** * pskb_expand_head - reallocate header of &sk_buff @@ -2881,12 +2885,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int pos; int dummy; + __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); - csum = !!can_checksum_protocol(features, proto); - __skb_push(head_skb, doffset); + csum = !head_skb->encap_hdr_csum && + !!can_checksum_protocol(features, proto); + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -2983,6 +2989,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + offset; continue; } @@ -3052,6 +3060,8 @@ perform_csum_check: nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); @@ -3913,7 +3923,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; - skb->local_df = 0; + skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 664ee4295b6f..026e01f70274 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -784,7 +784,7 @@ set_rcvbuf: break; case SO_NO_CHECK: - sk->sk_no_check = valbool; + sk->sk_no_check_tx = valbool; break; case SO_PRIORITY: @@ -1064,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_NO_CHECK: - v.val = sk->sk_no_check; + v.val = sk->sk_no_check_tx; break; case SO_PRIORITY: diff --git a/net/core/tso.c b/net/core/tso.c new file mode 100644 index 000000000000..8c3203c585b0 --- /dev/null +++ b/net/core/tso.c @@ -0,0 +1,77 @@ +#include <linux/export.h> +#include <net/ip.h> +#include <net/tso.h> + +/* Calculate expected number of TX descriptors */ +int tso_count_descs(struct sk_buff *skb) +{ + /* The Marvell Way */ + return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags; +} +EXPORT_SYMBOL(tso_count_descs); + +void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, + int size, bool is_last) +{ + struct iphdr *iph; + struct tcphdr *tcph; + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int mac_hdr_len = skb_network_offset(skb); + + memcpy(hdr, skb->data, hdr_len); + iph = (struct iphdr *)(hdr + mac_hdr_len); + iph->id = htons(tso->ip_id); + iph->tot_len = htons(size + hdr_len - mac_hdr_len); + tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); + tcph->seq = htonl(tso->tcp_seq); + tso->ip_id++; + + if (!is_last) { + /* Clear all special flags for not last packet */ + tcph->psh = 0; + tcph->fin = 0; + tcph->rst = 0; + } +} +EXPORT_SYMBOL(tso_build_hdr); + +void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) +{ + tso->tcp_seq += size; + tso->size -= size; + tso->data += size; + + if ((tso->size == 0) && + (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; + + /* Move to next segment */ + tso->size = frag->size; + tso->data = page_address(frag->page.p) + frag->page_offset; + tso->next_frag_idx++; + } +} +EXPORT_SYMBOL(tso_build_data); + +void tso_start(struct sk_buff *skb, struct tso_t *tso) +{ + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tso->ip_id = ntohs(ip_hdr(skb)->id); + tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); + tso->next_frag_idx = 0; + + /* Build first data */ + tso->size = skb_headlen(skb) - hdr_len; + tso->data = skb->data + hdr_len; + if ((tso->size == 0) && + (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; + + /* Move to next segment */ + tso->size = frag->size; + tso->data = page_address(frag->page.p) + frag->page_offset; + tso->next_frag_idx++; + } +} +EXPORT_SYMBOL(tso_start); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 22b5d818b200..6ca645c4b48e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1024,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .no_check = 0, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index eb892b4f4814..de2c1e719305 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1084,14 +1084,15 @@ EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int dccp_mib_init(void) { - return snmp_mib_init((void __percpu **)dccp_statistics, - sizeof(struct dccp_mib), - __alignof__(struct dccp_mib)); + dccp_statistics = alloc_percpu(struct dccp_mib); + if (!dccp_statistics) + return -ENOMEM; + return 0; } static inline void dccp_mib_exit(void) { - snmp_mib_free((void __percpu **)dccp_statistics); + free_percpu(dccp_statistics); } static int thash_entries; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 607ab71b5a0c..53731e45403c 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -20,6 +20,7 @@ /* Boundary values */ static int zero = 0, + one = 1, u8_max = 0xFF; static unsigned long seqw_min = DCCPF_SEQ_WMIN, seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ @@ -58,7 +59,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_request_retries), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = &one, .extra2 = &u8_max, }, { diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 16f0b223102e..1cd46a345cb0 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -280,7 +280,7 @@ static ktime_t dccp_timestamp_seed; */ u32 dccp_timestamp(void) { - s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); + u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); do_div(delta, 10); return delta; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 4c04848953bd..ae011b46c071 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -481,7 +481,7 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf sk->sk_backlog_rcv = dn_nsp_backlog_rcv; sk->sk_destruct = dn_destruct; - sk->sk_no_check = 1; + sk->sk_no_check_tx = 1; sk->sk_family = PF_DECnet; sk->sk_protocol = 0; sk->sk_allocation = gfp; diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index e7b6d53eef88..9acec61f5433 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -93,8 +93,8 @@ int dns_query(const char *type, const char *name, size_t namelen, } if (!namelen) - namelen = strlen(name); - if (namelen < 3) + namelen = strnlen(name, 256); + if (namelen < 3 || namelen > 255) return -EINVAL; desclen += namelen + 1; @@ -149,7 +149,9 @@ int dns_query(const char *type, const char *name, size_t namelen, if (!*_result) goto put; - memcpy(*_result, upayload->data, len + 1); + memcpy(*_result, upayload->data, len); + *_result[len] = '\0'; + if (_expiry) *_expiry = rkey->expiry; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 02c0e1716f64..64c5af0a10dd 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -346,7 +346,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, return slave_dev; slave_dev->features = master->vlan_features; - SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c index 0f5a69ed746d..fe6bd7a71081 100644 --- a/net/ieee802154/6lowpan_rtnl.c +++ b/net/ieee802154/6lowpan_rtnl.c @@ -92,6 +92,7 @@ static int lowpan_header_create(struct sk_buff *skb, const u8 *saddr = _saddr; const u8 *daddr = _daddr; struct ieee802154_addr sa, da; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); /* TODO: * if this package isn't ipv6 one, where should it be routed? @@ -115,8 +116,7 @@ static int lowpan_header_create(struct sk_buff *skb, * from MAC subif of the 'dev' and 'real_dev' network devices, but * this isn't implemented in mainline yet, so currently we assign 0xff */ - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + cb->type = IEEE802154_FC_TYPE_DATA; /* prepare wpan address data */ sa.mode = IEEE802154_ADDR_LONG; @@ -135,11 +135,10 @@ static int lowpan_header_create(struct sk_buff *skb, } else { da.mode = IEEE802154_ADDR_LONG; da.extended_addr = ieee802154_devaddr_from_raw(daddr); - - /* request acknowledgment */ - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; } + cb->ackreq = !lowpan_is_addr_broadcast(daddr); + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, type, (void *)&da, (void *)&sa, 0); } @@ -221,139 +220,149 @@ static int lowpan_set_address(struct net_device *dev, void *p) return 0; } -static int -lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, - int mlen, int plen, int offset, int type) +static struct sk_buff* +lowpan_alloc_frag(struct sk_buff *skb, int size, + const struct ieee802154_hdr *master_hdr) { + struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; struct sk_buff *frag; - int hlen; - - hlen = (type == LOWPAN_DISPATCH_FRAG1) ? - LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE; - - raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); + int rc; + + frag = alloc_skb(real_dev->hard_header_len + + real_dev->needed_tailroom + size, + GFP_ATOMIC); + + if (likely(frag)) { + frag->dev = real_dev; + frag->priority = skb->priority; + skb_reserve(frag, real_dev->hard_header_len); + skb_reset_network_header(frag); + *mac_cb(frag) = *mac_cb(skb); + + rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, + &master_hdr->source, size); + if (rc < 0) { + kfree_skb(frag); + return ERR_PTR(-rc); + } + } else { + frag = ERR_PTR(ENOMEM); + } - frag = netdev_alloc_skb(skb->dev, - hlen + mlen + plen + IEEE802154_MFR_SIZE); - if (!frag) - return -ENOMEM; + return frag; +} - frag->priority = skb->priority; +static int +lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, + u8 *frag_hdr, int frag_hdrlen, + int offset, int len) +{ + struct sk_buff *frag; - /* copy header, MFR and payload */ - skb_put(frag, mlen); - skb_copy_to_linear_data(frag, skb_mac_header(skb), mlen); + raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); - skb_put(frag, hlen); - skb_copy_to_linear_data_offset(frag, mlen, head, hlen); + frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); + if (IS_ERR(frag)) + return -PTR_ERR(frag); - skb_put(frag, plen); - skb_copy_to_linear_data_offset(frag, mlen + hlen, - skb_network_header(skb) + offset, plen); + memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); + memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); - raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len); + raw_dump_table(__func__, " fragment dump", frag->data, frag->len); return dev_queue_xmit(frag); } static int -lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) +lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, + const struct ieee802154_hdr *wpan_hdr) { - int err; - u16 dgram_offset, dgram_size, payload_length, header_length, - lowpan_size, frag_plen, offset; - __be16 tag; - u8 head[5]; - - header_length = skb->mac_len; - payload_length = skb->len - header_length; - tag = lowpan_dev_info(dev)->fragment_tag++; - lowpan_size = skb_network_header_len(skb); + u16 dgram_size, dgram_offset; + __be16 frag_tag; + u8 frag_hdr[5]; + int frag_cap, frag_len, payload_cap, rc; + int skb_unprocessed, skb_offset; + dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - - header_length; + skb->mac_len; + frag_tag = lowpan_dev_info(dev)->fragment_tag++; - /* first fragment header */ - head[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x7); - head[1] = dgram_size & 0xff; - memcpy(head + 2, &tag, sizeof(tag)); + frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); + frag_hdr[1] = dgram_size & 0xff; + memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); - /* calc the nearest payload length(divided to 8) for first fragment - * which fits into a IEEE802154_MTU - */ - frag_plen = round_down(IEEE802154_MTU - header_length - - LOWPAN_FRAG1_HEAD_SIZE - lowpan_size - - IEEE802154_MFR_SIZE, 8); - - err = lowpan_fragment_xmit(skb, head, header_length, - frag_plen + lowpan_size, 0, - LOWPAN_DISPATCH_FRAG1); - if (err) { - pr_debug("%s unable to send FRAG1 packet (tag: %d)", - __func__, tag); - goto exit; - } + payload_cap = ieee802154_max_payload(wpan_hdr); - offset = lowpan_size + frag_plen; - dgram_offset += frag_plen; + frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - + skb_network_header_len(skb), 8); - /* next fragment header */ - head[0] &= ~LOWPAN_DISPATCH_FRAG1; - head[0] |= LOWPAN_DISPATCH_FRAGN; + skb_offset = skb_network_header_len(skb); + skb_unprocessed = skb->len - skb->mac_len - skb_offset; - frag_plen = round_down(IEEE802154_MTU - header_length - - LOWPAN_FRAGN_HEAD_SIZE - IEEE802154_MFR_SIZE, 8); + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAG1_HEAD_SIZE, 0, + frag_len + skb_network_header_len(skb)); + if (rc) { + pr_debug("%s unable to send FRAG1 packet (tag: %d)", + __func__, frag_tag); + goto err; + } - while (payload_length - offset > 0) { - int len = frag_plen; + frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; + frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; + frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); - head[4] = dgram_offset >> 3; + do { + dgram_offset += frag_len; + skb_offset += frag_len; + skb_unprocessed -= frag_len; + frag_len = min(frag_cap, skb_unprocessed); - if (payload_length - offset < len) - len = payload_length - offset; + frag_hdr[4] = dgram_offset >> 3; - err = lowpan_fragment_xmit(skb, head, header_length, len, - offset, LOWPAN_DISPATCH_FRAGN); - if (err) { + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAGN_HEAD_SIZE, skb_offset, + frag_len); + if (rc) { pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", - __func__, tag, offset); - goto exit; + __func__, frag_tag, skb_offset); + goto err; } + } while (skb_unprocessed > frag_cap); - offset += len; - dgram_offset += len; - } + consume_skb(skb); + return NET_XMIT_SUCCESS; -exit: - return err; +err: + kfree_skb(skb); + return rc; } static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) { - int err = -1; + struct ieee802154_hdr wpan_hdr; + int max_single; pr_debug("package xmit\n"); - skb->dev = lowpan_dev_info(dev)->real_dev; - if (skb->dev == NULL) { - pr_debug("ERROR: no real wpan device found\n"); - goto error; + if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; } - /* Send directly if less than the MTU minus the 2 checksum bytes. */ - if (skb->len <= IEEE802154_MTU - IEEE802154_MFR_SIZE) { - err = dev_queue_xmit(skb); - goto out; - } + max_single = ieee802154_max_payload(&wpan_hdr); - pr_debug("frame is too big, fragmentation is needed\n"); - err = lowpan_skb_fragmentation(skb, dev); -error: - dev_kfree_skb(skb); -out: - if (err) - pr_debug("ERROR: xmit failed\n"); + if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { + skb->dev = lowpan_dev_info(dev)->real_dev; + return dev_queue_xmit(skb); + } else { + netdev_tx_t rc; + + pr_debug("frame is too big, fragmentation is needed\n"); + rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); - return (err < 0) ? NET_XMIT_DROP : err; + return rc < 0 ? NET_XMIT_DROP : rc; + } } static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 786437bc0c08..4f0ed8780194 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -21,6 +21,7 @@ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> */ +#include <linux/capability.h> #include <linux/net.h> #include <linux/module.h> #include <linux/if_arp.h> @@ -45,7 +46,12 @@ struct dgram_sock { struct ieee802154_addr dst_addr; unsigned int bound:1; + unsigned int connected:1; unsigned int want_ack:1; + unsigned int secen:1; + unsigned int secen_override:1; + unsigned int seclevel:3; + unsigned int seclevel_override:1; }; static inline struct dgram_sock *dgram_sk(const struct sock *sk) @@ -73,10 +79,7 @@ static int dgram_init(struct sock *sk) { struct dgram_sock *ro = dgram_sk(sk); - ro->dst_addr.mode = IEEE802154_ADDR_LONG; - ro->dst_addr.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); ro->want_ack = 1; - memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN); return 0; } @@ -183,6 +186,7 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, } ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); + ro->connected = 1; out: release_sock(sk); @@ -194,10 +198,7 @@ static int dgram_disconnect(struct sock *sk, int flags) struct dgram_sock *ro = dgram_sk(sk); lock_sock(sk); - - ro->dst_addr.mode = IEEE802154_ADDR_LONG; - memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN); - + ro->connected = 0; release_sock(sk); return 0; @@ -209,7 +210,9 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, struct net_device *dev; unsigned int mtu; struct sk_buff *skb; + struct ieee802154_mac_cb *cb; struct dgram_sock *ro = dgram_sk(sk); + struct ieee802154_addr dst_addr; int hlen, tlen; int err; @@ -218,6 +221,11 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; } + if (!ro->connected && !msg->msg_name) + return -EDESTADDRREQ; + else if (ro->connected && msg->msg_name) + return -EISCONN; + if (!ro->bound) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); else @@ -249,18 +257,28 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, skb_reset_network_header(skb); - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - if (ro->want_ack) - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; + cb = mac_cb_init(skb); + cb->type = IEEE802154_FC_TYPE_DATA; + cb->ackreq = ro->want_ack; + + if (msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name); - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); - err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr, - ro->bound ? &ro->src_addr : NULL, size); + ieee802154_addr_from_sa(&dst_addr, &daddr->addr); + } else { + dst_addr = ro->dst_addr; + } + + cb->secen = ro->secen; + cb->secen_override = ro->secen_override; + cb->seclevel = ro->seclevel; + cb->seclevel_override = ro->seclevel_override; + + err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, + ro->bound ? &ro->src_addr : NULL, size); if (err < 0) goto out_skb; - skb_reset_mac_header(skb); - err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto out_skb; @@ -419,6 +437,20 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: val = ro->want_ack; break; + case WPAN_SECURITY: + if (!ro->secen_override) + val = WPAN_SECURITY_DEFAULT; + else if (ro->secen) + val = WPAN_SECURITY_ON; + else + val = WPAN_SECURITY_OFF; + break; + case WPAN_SECURITY_LEVEL: + if (!ro->seclevel_override) + val = WPAN_SECURITY_LEVEL_DEFAULT; + else + val = ro->seclevel; + break; default: return -ENOPROTOOPT; } @@ -434,6 +466,7 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct dgram_sock *ro = dgram_sk(sk); + struct net *net = sock_net(sk); int val; int err = 0; @@ -449,6 +482,47 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: ro->want_ack = !!val; break; + case WPAN_SECURITY: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + switch (val) { + case WPAN_SECURITY_DEFAULT: + ro->secen_override = 0; + break; + case WPAN_SECURITY_ON: + ro->secen_override = 1; + ro->secen = 1; + break; + case WPAN_SECURITY_OFF: + ro->secen_override = 1; + ro->secen = 0; + break; + default: + err = -EINVAL; + break; + } + break; + case WPAN_SECURITY_LEVEL: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + if (val < WPAN_SECURITY_LEVEL_DEFAULT || + val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { + err = -EINVAL; + } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { + ro->seclevel_override = 0; + } else { + ro->seclevel_override = 1; + ro->seclevel = val; + } + break; default: err = -ENOPROTOOPT; break; diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c index bed42a48408c..c09294e39ca6 100644 --- a/net/ieee802154/header_ops.c +++ b/net/ieee802154/header_ops.c @@ -195,15 +195,16 @@ ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr) return pos; } +static int ieee802154_sechdr_lengths[4] = { + [IEEE802154_SCF_KEY_IMPLICIT] = 5, + [IEEE802154_SCF_KEY_INDEX] = 6, + [IEEE802154_SCF_KEY_SHORT_INDEX] = 10, + [IEEE802154_SCF_KEY_HW_INDEX] = 14, +}; + static int ieee802154_hdr_sechdr_len(u8 sc) { - switch (IEEE802154_SCF_KEY_ID_MODE(sc)) { - case IEEE802154_SCF_KEY_IMPLICIT: return 5; - case IEEE802154_SCF_KEY_INDEX: return 6; - case IEEE802154_SCF_KEY_SHORT_INDEX: return 10; - case IEEE802154_SCF_KEY_HW_INDEX: return 14; - default: return -EINVAL; - } + return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)]; } static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr) @@ -285,3 +286,40 @@ ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs); + +int +ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr) +{ + const u8 *buf = skb_mac_header(skb); + int pos; + + pos = ieee802154_hdr_peek_addrs(skb, hdr); + if (pos < 0) + return -EINVAL; + + if (hdr->fc.security_enabled) { + u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos)); + int want = pos + ieee802154_sechdr_lengths[key_id_mode]; + + if (buf + want > skb_tail_pointer(skb)) + return -EINVAL; + + pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec); + } + + return pos; +} +EXPORT_SYMBOL_GPL(ieee802154_hdr_peek); + +int ieee802154_max_payload(const struct ieee802154_hdr *hdr) +{ + int hlen = ieee802154_hdr_minlen(hdr); + + if (hdr->fc.security_enabled) { + hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1; + hlen += ieee802154_sechdr_authtag_len(&hdr->sec); + } + + return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE; +} +EXPORT_SYMBOL_GPL(ieee802154_max_payload); diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index 6693a5cf01ce..8b83a231299e 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -68,4 +68,23 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb); int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_keys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devs(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb); + #endif diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 04b20589d97a..26efcf4fd2ff 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -124,6 +124,26 @@ static const struct genl_ops ieee8021154_ops[] = { IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, ieee802154_dump_iface), IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams), + IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams), + IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL, + ieee802154_llsec_dump_keys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key), + IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL, + ieee802154_llsec_dump_devs), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL, + ieee802154_llsec_dump_devkeys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL, + ieee802154_llsec_dump_seclevels), + IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL, + ieee802154_llsec_add_seclevel), + IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL, + ieee802154_llsec_del_seclevel), }; static const struct genl_multicast_group ieee802154_mcgrps[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 5d285498c0f6..a3281b8bfd5b 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -715,3 +715,812 @@ out: dev_put(dev); return rc; } + + + +static int +ieee802154_llsec_parse_key_id(struct genl_info *info, + struct ieee802154_llsec_key_id *desc) +{ + memset(desc, 0, sizeof(*desc)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) + return -EINVAL; + + desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]); + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (!info->attrs[IEEE802154_ATTR_PAN_ID] && + !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] || + info->attrs[IEEE802154_ATTR_HW_ADDR])) + return -EINVAL; + + desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + + if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) { + desc->device_addr.mode = IEEE802154_ADDR_SHORT; + desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + desc->device_addr.mode = IEEE802154_ADDR_LONG; + desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + } + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]) + return -EINVAL; + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT) + desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]); + + switch (desc->mode) { + case IEEE802154_SCF_KEY_SHORT_INDEX: + { + u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]); + desc->short_source = cpu_to_le32(source); + break; + } + case IEEE802154_SCF_KEY_HW_INDEX: + desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]); + break; + } + + return 0; +} + +static int +ieee802154_llsec_fill_key_id(struct sk_buff *msg, + const struct ieee802154_llsec_key_id *desc) +{ + if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, + desc->device_addr.pan_id)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_SHORT && + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->device_addr.short_addr)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_LONG && + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, + desc->device_addr.extended_addr)) + return -EMSGSIZE; + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT, + le32_to_cpu(desc->short_source))) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, + desc->extended_source)) + return -EMSGSIZE; + + return 0; +} + +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct net_device *dev = NULL; + int rc = -ENOBUFS; + struct ieee802154_mlme_ops *ops; + void *hdr; + struct ieee802154_llsec_params params; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) { + rc = -EOPNOTSUPP; + goto out_dev; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0, + IEEE802154_LLSEC_GETPARAMS); + if (!hdr) + goto out_free; + + rc = ops->llsec->get_params(dev, ¶ms); + if (rc < 0) + goto out_free; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + be32_to_cpu(params.frame_counter)) || + ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) + goto out_free; + + dev_put(dev); + + return ieee802154_nl_reply(msg, info); +out_free: + nlmsg_free(msg); +out_dev: + dev_put(dev); + return rc; +} + +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_params params; + int changed = 0; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] && + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) + goto out; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) { + rc = -EOPNOTSUPP; + goto out; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] && + nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7) + goto out; + + if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) { + params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]); + changed |= IEEE802154_LLSEC_PARAM_ENABLED; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) { + if (ieee802154_llsec_parse_key_id(info, ¶ms.out_key)) + goto out; + + changed |= IEEE802154_LLSEC_PARAM_OUT_KEY; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) { + params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]); + changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) { + u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + params.frame_counter = cpu_to_be32(fc); + changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER; + } + + rc = ops->llsec->set_params(dev, ¶ms, changed); + + dev_put(dev); + + return rc; +out: + dev_put(dev); + return rc; +} + + + +struct llsec_dump_data { + struct sk_buff *skb; + int s_idx, s_idx2; + int portid; + int nlmsg_seq; + struct net_device *dev; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_table *table; +}; + +static int +ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb, + int (*step)(struct llsec_dump_data*)) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct llsec_dump_data data; + int idx = 0; + int first_dev = cb->args[0]; + int rc; + + for_each_netdev(net, dev) { + if (idx < first_dev || dev->type != ARPHRD_IEEE802154) + goto skip; + + data.ops = ieee802154_mlme_ops(dev); + if (!data.ops->llsec) + goto skip; + + data.skb = skb; + data.s_idx = cb->args[1]; + data.s_idx2 = cb->args[2]; + data.dev = dev; + data.portid = NETLINK_CB(cb->skb).portid; + data.nlmsg_seq = cb->nlh->nlmsg_seq; + + data.ops->llsec->lock_table(dev); + data.ops->llsec->get_table(data.dev, &data.table); + rc = step(&data); + data.ops->llsec->unlock_table(dev); + + if (rc < 0) + break; + +skip: + idx++; + } + cb->args[0] = idx; + + return skb->len; +} + +static int +ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info, + int (*fn)(struct net_device*, struct genl_info*)) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!ieee802154_mlme_ops(dev)->llsec) + rc = -EOPNOTSUPP; + else + rc = fn(dev, info); + + dev_put(dev); + return rc; +} + + + +static int +ieee802154_llsec_parse_key(struct genl_info *info, + struct ieee802154_llsec_key *key) +{ + u8 frames; + u32 commands[256 / 32]; + + memset(key, 0, sizeof(*key)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] || + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES]) + return -EINVAL; + + frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]); + if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) { + nla_memcpy(commands, + info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS], + 256 / 8); + + if (commands[0] || commands[1] || commands[2] || commands[3] || + commands[4] || commands[5] || commands[6] || + commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1)) + return -EINVAL; + + key->cmd_frame_ids = commands[7]; + } + + key->frame_types = frames; + + nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES], + IEEE802154_LLSEC_KEY_SIZE); + + return 0; +} + +static int llsec_add_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key key; + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key(info, &key) || + ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->add_key(dev, &id, &key); +} + +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_key); +} + +static int llsec_remove_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->del_key(dev, &id); +} + +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_remove_key); +} + +static int +ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_key_entry *key, + const struct net_device *dev) +{ + void *hdr; + u32 commands[256 / 32]; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_KEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + ieee802154_llsec_fill_key_id(msg, &key->id) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES, + key->key->frame_types)) + goto nla_put_failure; + + if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) { + memset(commands, 0, sizeof(commands)); + commands[7] = key->key->cmd_frame_ids; + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS, + sizeof(commands), commands)) + goto nla_put_failure; + } + + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES, + IEEE802154_LLSEC_KEY_SIZE, key->key->key)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_keys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_key_entry *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->keys, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_key(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys); +} + + + +static int +llsec_parse_dev(struct genl_info *info, + struct ieee802154_llsec_device *dev) +{ + memset(dev, 0, sizeof(*dev)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] || + (!!info->attrs[IEEE802154_ATTR_PAN_ID] != + !!info->attrs[IEEE802154_ATTR_SHORT_ADDR])) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_PAN_ID]) { + dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF); + } + + dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]); + + if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX) + return -EINVAL; + + return 0; +} + +static int llsec_add_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device desc; + + if (llsec_parse_dev(info, &desc)) + return -EINVAL; + + return ops->llsec->add_dev(dev, &desc); +} + +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_dev); +} + +static int llsec_del_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR]) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_dev(dev, devaddr); +} + +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_dev); +} + +static int +ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_device *desc, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEV); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->short_addr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + desc->frame_counter) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + desc->seclevel_exempt) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devs(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_dev(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs); +} + + + +static int llsec_add_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + return ops->llsec->add_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey); +} + +static int llsec_del_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey); +} + +static int +ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, + __le64 devaddr, + const struct ieee802154_llsec_device_key *devkey, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEVKEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + devkey->frame_counter) || + ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devkeys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *dpos; + struct ieee802154_llsec_device_key *kpos; + int rc = 0, idx = 0, idx2; + + list_for_each_entry(dpos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + idx2 = 0; + + list_for_each_entry(kpos, &dpos->keys, list) { + if (idx2++ < data->s_idx2) + continue; + + if (ieee802154_nl_fill_devkey(data->skb, data->portid, + data->nlmsg_seq, + dpos->hwaddr, kpos, + data->dev)) { + return rc = -EMSGSIZE; + } + + data->s_idx2++; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys); +} + + + +static int +llsec_parse_seclevel(struct genl_info *info, + struct ieee802154_llsec_seclevel *sl) +{ + memset(sl, 0, sizeof(*sl)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] || + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]) + return -EINVAL; + + sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]); + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) { + if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]) + return -EINVAL; + + sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]); + } + + sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]); + sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + + return 0; +} + +static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->add_seclevel(dev, &sl); +} + +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel); +} + +static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->del_seclevel(dev, &sl); +} + +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel); +} + +static int +ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_seclevel *sl, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_SECLEVEL); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + sl->device_override)) + goto nla_put_failure; + + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID, + sl->cmd_frame_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_seclevels(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_seclevel *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->security_levels, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_seclevel(data->skb, data->portid, + data->nlmsg_seq, pos, + data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels); +} diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index fd7be5e45cef..3a703ab88348 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -62,5 +62,21 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, }, [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, }, + + [IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 }, + [IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 }, + [IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, }, }; diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index ef2d54372b13..6f1428c4870b 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -36,7 +36,7 @@ struct lowpan_frag_info { u8 d_offset; }; -struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) +static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) { return (struct lowpan_frag_info *)skb->cb; } @@ -120,6 +120,8 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, struct inet_frag_queue *q; struct lowpan_create_arg arg; unsigned int hash; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); arg.tag = frag_info->d_tag; arg.d_size = frag_info->d_size; @@ -129,7 +131,7 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, read_lock(&lowpan_frags.lock); hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); - q = inet_frag_find(&net->ieee802154_lowpan.frags, + q = inet_frag_find(&ieee802154_lowpan->frags, &lowpan_frags, &arg, hash); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); @@ -357,6 +359,8 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) struct net *net = dev_net(skb->dev); struct lowpan_frag_info *frag_info = lowpan_cb(skb); struct ieee802154_addr source, dest; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); int err; source = mac_cb(skb)->source; @@ -366,10 +370,10 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) if (err < 0) goto err; - if (frag_info->d_size > net->ieee802154_lowpan.max_dsize) + if (frag_info->d_size > ieee802154_lowpan->max_dsize) goto err; - inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false); + inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false); fq = fq_find(net, frag_info, &source, &dest); if (fq != NULL) { @@ -436,6 +440,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); table = lowpan_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { @@ -444,10 +450,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &net->ieee802154_lowpan.frags.high_thresh; - table[1].data = &net->ieee802154_lowpan.frags.low_thresh; - table[2].data = &net->ieee802154_lowpan.frags.timeout; - table[3].data = &net->ieee802154_lowpan.max_dsize; + table[0].data = &ieee802154_lowpan->frags.high_thresh; + table[1].data = &ieee802154_lowpan->frags.low_thresh; + table[2].data = &ieee802154_lowpan->frags.timeout; + table[3].data = &ieee802154_lowpan->max_dsize; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -458,7 +464,7 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->ieee802154_lowpan.sysctl.frags_hdr = hdr; + ieee802154_lowpan->sysctl.frags_hdr = hdr; return 0; err_reg: @@ -471,9 +477,11 @@ err_alloc: static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); - table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr); + table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } @@ -514,20 +522,26 @@ static inline void lowpan_frags_sysctl_unregister(void) static int __net_init lowpan_frags_init_net(struct net *net) { - net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH; - net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT; - net->ieee802154_lowpan.max_dsize = 0xFFFF; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); - inet_frags_init_net(&net->ieee802154_lowpan.frags); + ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; + ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + ieee802154_lowpan->max_dsize = 0xFFFF; + + inet_frags_init_net(&ieee802154_lowpan->frags); return lowpan_frags_ns_sysctl_register(net); } static void __net_exit lowpan_frags_exit_net(struct net *net) { + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); + lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); } static struct pernet_operations lowpan_frags_ops = { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6d6dd345bc4d..d5e6836cf772 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; - char answer_no_check; int try_loading_module = 0; int err; @@ -312,7 +311,6 @@ lookup_protocol: sock->ops = answer->ops; answer_prot = answer->prot; - answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); @@ -324,7 +322,6 @@ lookup_protocol: goto out; err = 0; - sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; @@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, @@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, @@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_ICMP, .prot = &ping_prot, .ops = &inet_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }, @@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } }; @@ -1261,10 +1254,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | 0))) goto out; @@ -1476,22 +1471,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); -unsigned long snmp_fold_field(void __percpu *mib[], int offt) +unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; - int i, j; + int i; - for_each_possible_cpu(i) { - for (j = 0; j < SNMP_ARRAY_SZ; j++) - res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); - } + for_each_possible_cpu(i) + res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt); return res; } EXPORT_SYMBOL_GPL(snmp_fold_field); #if BITS_PER_LONG==32 -u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) +u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) { u64 res = 0; int cpu; @@ -1502,7 +1495,7 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) u64 v; unsigned int start; - bhptr = per_cpu_ptr(mib[0], cpu); + bhptr = per_cpu_ptr(mib, cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin_irq(syncp); @@ -1516,25 +1509,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) EXPORT_SYMBOL_GPL(snmp_fold_field64); #endif -int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) -{ - BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize, align); - if (!ptr[0]) - return -ENOMEM; - -#if SNMP_ARRAY_SZ == 2 - ptr[1] = __alloc_percpu(mibsize, align); - if (!ptr[1]) { - free_percpu(ptr[0]); - ptr[0] = NULL; - return -ENOMEM; - } -#endif - return 0; -} -EXPORT_SYMBOL_GPL(snmp_mib_init); - #ifdef CONFIG_IP_MULTICAST static const struct net_protocol igmp_protocol = { .handler = igmp_rcv, @@ -1570,40 +1544,30 @@ static __net_init int ipv4_mib_init_net(struct net *net) { int i; - if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, - sizeof(struct tcp_mib), - __alignof__(struct tcp_mib)) < 0) + net->mib.tcp_statistics = alloc_percpu(struct tcp_mib); + if (!net->mib.tcp_statistics) goto err_tcp_mib; - if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + net->mib.ip_statistics = alloc_percpu(struct ipstats_mib); + if (!net->mib.ip_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet_stats; - af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i); u64_stats_init(&af_inet_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); - u64_stats_init(&af_inet_stats->syncp); -#endif } - if (snmp_mib_init((void __percpu **)net->mib.net_statistics, - sizeof(struct linux_mib), - __alignof__(struct linux_mib)) < 0) + net->mib.net_statistics = alloc_percpu(struct linux_mib); + if (!net->mib.net_statistics) goto err_net_mib; - if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udp_statistics = alloc_percpu(struct udp_mib); + if (!net->mib.udp_statistics) goto err_udp_mib; - if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udplite_statistics = alloc_percpu(struct udp_mib); + if (!net->mib.udplite_statistics) goto err_udplite_mib; - if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, - sizeof(struct icmp_mib), - __alignof__(struct icmp_mib)) < 0) + net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); + if (!net->mib.icmp_statistics) goto err_icmp_mib; net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), GFP_KERNEL); @@ -1614,17 +1578,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void __percpu **)net->mib.icmp_statistics); + free_percpu(net->mib.icmp_statistics); err_icmp_mib: - snmp_mib_free((void __percpu **)net->mib.udplite_statistics); + free_percpu(net->mib.udplite_statistics); err_udplite_mib: - snmp_mib_free((void __percpu **)net->mib.udp_statistics); + free_percpu(net->mib.udp_statistics); err_udp_mib: - snmp_mib_free((void __percpu **)net->mib.net_statistics); + free_percpu(net->mib.net_statistics); err_net_mib: - snmp_mib_free((void __percpu **)net->mib.ip_statistics); + free_percpu(net->mib.ip_statistics); err_ip_mib: - snmp_mib_free((void __percpu **)net->mib.tcp_statistics); + free_percpu(net->mib.tcp_statistics); err_tcp_mib: return -ENOMEM; } @@ -1632,12 +1596,12 @@ err_tcp_mib: static __net_exit void ipv4_mib_exit_net(struct net *net) { kfree(net->mib.icmpmsg_statistics); - snmp_mib_free((void __percpu **)net->mib.icmp_statistics); - snmp_mib_free((void __percpu **)net->mib.udplite_statistics); - snmp_mib_free((void __percpu **)net->mib.udp_statistics); - snmp_mib_free((void __percpu **)net->mib.net_statistics); - snmp_mib_free((void __percpu **)net->mib.ip_statistics); - snmp_mib_free((void __percpu **)net->mib.tcp_statistics); + free_percpu(net->mib.icmp_statistics); + free_percpu(net->mib.udplite_statistics); + free_percpu(net->mib.udp_statistics); + free_percpu(net->mib.net_statistics); + free_percpu(net->mib.ip_statistics); + free_percpu(net->mib.tcp_statistics); } static __net_initdata struct pernet_operations ipv4_mib_ops = { @@ -1736,13 +1700,9 @@ static int __init inet_init(void) BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); - sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); - if (!sysctl_local_reserved_ports) - goto out; - rc = proto_register(&tcp_prot, 1); if (rc) - goto out_free_reserved_ports; + goto out; rc = proto_register(&udp_prot, 1); if (rc) @@ -1852,8 +1812,6 @@ out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); -out_free_reserved_ports: - kfree(sysctl_local_reserved_ports); goto out; } diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 8b5134c582f1..a3095fdefbed 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -86,18 +86,26 @@ out: } EXPORT_SYMBOL(ip4_datagram_connect); +/* Because UDP xmit path can manipulate sk_dst_cache without holding + * socket lock, we need to use sk_dst_set() here, + * even if we own the socket lock. + */ void ip4_datagram_release_cb(struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ip_options_rcu *inet_opt; __be32 daddr = inet->inet_daddr; + struct dst_entry *dst; struct flowi4 fl4; struct rtable *rt; - if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) - return; - rcu_read_lock(); + + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) { + rcu_read_unlock(); + return; + } inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; @@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk) inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); - if (!IS_ERR(rt)) - __sk_dst_set(sk, &rt->dst); + + dst = !IS_ERR(rt) ? &rt->dst : NULL; + sk_dst_set(sk, dst); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bdbf68bb2e2d..e9449376b58e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -106,7 +106,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; -static DEFINE_SPINLOCK(inet_addr_hash_lock); static u32 inet_addr_hash(struct net *net, __be32 addr) { @@ -119,16 +118,14 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) { u32 hash = inet_addr_hash(net, ifa->ifa_local); - spin_lock(&inet_addr_hash_lock); + ASSERT_RTNL(); hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); - spin_unlock(&inet_addr_hash_lock); } static void inet_hash_remove(struct in_ifaddr *ifa) { - spin_lock(&inet_addr_hash_lock); + ASSERT_RTNL(); hlist_del_init_rcu(&ifa->hash); - spin_unlock(&inet_addr_hash_lock); } /** @@ -830,7 +827,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifa_existing = find_matching_ifa(ifa); if (!ifa_existing) { /* It would be best to check for !NLM_F_CREATE here but - * userspace alreay relies on not having to provide this. + * userspace already relies on not having to provide this. */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 250be7421ab3..4e9619bca732 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -84,7 +84,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, ptr--; } if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) { *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, skb->len, 0)); @@ -93,28 +94,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, } EXPORT_SYMBOL_GPL(gre_build_header); -static __sum16 check_checksum(struct sk_buff *skb) -{ - __sum16 csum = 0; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - - if (!csum) - break; - /* Fall through. */ - - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - break; - } - - return csum; -} - static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -141,7 +120,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, options = (__be32 *)(greh + 1); if (greh->flags & GRE_CSUM) { - if (check_checksum(skb)) { + if (skb_checksum_simple_validate(skb)) { *csum_err = true; return -EINVAL; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f1d32280cb54..eb92deb12666 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP))) goto out; @@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; csum = !!(greh->flags & GRE_CSUM); + if (csum) + skb->encap_hdr_csum = 1; if (unlikely(!pskb_may_pull(skb, ghl))) goto out; @@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } } - greh = (struct gre_base_hdr *)(skb->data); + skb_reset_transport_header(skb); + + greh = (struct gre_base_hdr *) + skb_transport_header(skb); pcsum = (__be32 *)(greh + 1); *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + *(__sum16 *)pcsum = gso_make_checksum(skb, 0); } __skb_push(skb, tnl_hlen - ghl); @@ -125,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb) csum_partial(skb->data, skb_gro_offset(skb), 0)); sum = csum_fold(NAPI_GRO_CB(skb)->csum); if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { - if (unlikely(!sum)) + if (unlikely(!sum) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); - } else + } else { skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + } return sum; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0134663fdbce..79c3d947a481 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr, saddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; @@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = saddr; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param->skb = skb_in; icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; ipc.ttl = 0; ipc.tos = -1; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -908,16 +915,8 @@ int icmp_rcv(struct sk_buff *skb) ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_fold(skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - if (__skb_checksum_complete(skb)) - goto csum_error; - } + if (skb_checksum_simple_validate(skb)) + goto csum_error; if (!pskb_pull(skb, sizeof(*icmph))) goto error; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 97e4d1655d26..6748d420f714 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -988,16 +988,8 @@ int igmp_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct igmphdr))) goto drop; - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_fold(skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - if (__skb_checksum_complete(skb)) - goto drop; - } + if (skb_checksum_simple_validate(skb)) + goto drop; ih = igmp_hdr(skb); switch (ih->type) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a56b8e6e866a..14d02ea905b6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -29,9 +29,6 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif -unsigned long *sysctl_local_reserved_ports; -EXPORT_SYMBOL(sysctl_local_reserved_ports); - void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; @@ -113,7 +110,7 @@ again: smallest_size = -1; do { - if (inet_is_reserved_local_port(rover)) + if (inet_is_local_reserved_port(net, rover)) goto next_nolock; head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; @@ -408,7 +405,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, flags, @@ -445,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rcu_read_lock(); opt = rcu_dereference(newinet->inet_opt); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, @@ -680,6 +677,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8b9cf279450d..43116e8c8e13 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -274,7 +274,7 @@ struct sock *__inet_lookup_established(struct net *net, const __be32 daddr, const u16 hnum, const int dif) { - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_nulls_node *node; @@ -327,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, @@ -500,7 +500,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; - if (inet_is_reserved_local_port(port)) + if (inet_is_local_reserved_port(net, port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 56cd458a1b8c..bd5f5928167d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -26,20 +26,7 @@ * Theory of operations. * We keep one entry for each peer IP address. The nodes contains long-living * information about the peer which doesn't depend on routes. - * At this moment this information consists only of ID field for the next - * outgoing IP packet. This field is incremented with each packet as encoded - * in inet_getid() function (include/net/inetpeer.h). - * At the moment of writing this notes identifier of IP packets is generated - * to be unpredictable using this code only for packets subjected - * (actually or potentially) to defragmentation. I.e. DF packets less than - * PMTU in size when local fragmentation is disabled use a constant ID and do - * not use this code (see ip_select_ident() in include/net/ip.h). * - * Route cache entries hold references to our nodes. - * New cache entries get references via lookup by destination IP address in - * the avl tree. The reference is grabbed only when it's needed i.e. only - * when we try to output IP packet which needs an unpredictable ID (see - * __ip_select_ident() in net/ipv4/route.c). * Nodes are removed only when reference counter goes to 0. * When it's happened the node may be removed when a sufficient amount of * time has been passed since its last use. The less-recently-used entry can @@ -62,7 +49,6 @@ * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable - * ip_id_count: atomic value (no lock needed) */ static struct kmem_cache *peer_cachep __read_mostly; @@ -120,7 +106,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min static void inetpeer_gc_worker(struct work_struct *work) { struct inet_peer *p, *n, *c; - LIST_HEAD(list); + struct list_head list; spin_lock_bh(&gc_lock); list_replace_init(&gc_list, &list); @@ -497,10 +483,6 @@ relookup: p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, - (daddr->family == AF_INET) ? - secure_ip_id(daddr->addr.a4) : - secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 6f111e48e11c..3a83ce5efa80 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,7 +42,7 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - skb->local_df; + skb->ignore_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 94213c891565..9b842544aea3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -410,7 +410,7 @@ static int ipgre_open(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_gre(dev_net(dev), &fl4, + rt = ip_route_output_gre(t->net, &fl4, t->parms.iph.daddr, t->parms.iph.saddr, t->parms.o_key, @@ -434,7 +434,7 @@ static int ipgre_close(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; - in_dev = inetdev_by_index(dev_net(dev), t->mlink); + in_dev = inetdev_by_index(t->net, t->mlink); if (in_dev) ip_mc_dec_group(in_dev, t->parms.iph.daddr); } @@ -478,7 +478,7 @@ static void __gre_tunnel_init(struct net_device *dev) dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; + dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { @@ -649,6 +649,7 @@ static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &gre_tap_netdev_ops; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip_tunnel_setup(dev, gre_tap_net_id); } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index f4ab72e19af9..5e7aecea05cd 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -364,7 +364,7 @@ int ip_options_compile(struct net *net, } if (optptr[2] <= optlen) { unsigned char *timeptr = NULL; - if (optptr[2]+3 > optptr[1]) { + if (optptr[2]+3 > optlen) { pp_ptr = optptr + 2; goto error; } @@ -376,7 +376,7 @@ int ip_options_compile(struct net *net, optptr[2] += 4; break; case IPOPT_TS_TSANDADDR: - if (optptr[2]+7 > optptr[1]) { + if (optptr[2]+7 > optlen) { pp_ptr = optptr + 2; goto error; } @@ -390,7 +390,7 @@ int ip_options_compile(struct net *net, optptr[2] += 8; break; case IPOPT_TS_PRESPEC: - if (optptr[2]+7 > optptr[1]) { + if (optptr[2]+7 > optlen) { pp_ptr = optptr + 2; goto error; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a52f50187b54..8d3b6b0e9857 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -415,7 +415,7 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) + if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; @@ -430,8 +430,7 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(skb, &rt->dst, sk, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); + ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; @@ -501,7 +500,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(skb); - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); @@ -866,7 +865,7 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1189,7 +1188,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1350,10 +1349,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - skb->local_df = ip_sk_local_df(sk); + skb->ignore_df = ip_sk_ignore_df(sk); /* DF bit is set when we want to see DF on outgoing frames. - * If local_df is set too, we still allow to fragment this frame + * If ignore_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || inet->pmtudisc == IP_PMTUDISC_PROBE || @@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt) { iph->ihl += opt->optlen>>2; @@ -1546,7 +1545,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(net, skb->mark), RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2acc2337d38b..097b3e7c1e8f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -268,6 +268,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; + __be16 flags = parms->i_flags; int link = parms->link; struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); @@ -275,9 +276,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - key == t->parms.i_key && link == t->parms.link && - type == t->dev->type) + type == t->dev->type && + ip_tunnel_key_match(&t->parms, flags, key)) break; } return t; @@ -395,11 +396,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, struct ip_tunnel_net *itn, struct ip_tunnel_parm *parms) { - struct ip_tunnel *nt, *fbt; + struct ip_tunnel *nt; struct net_device *dev; BUG_ON(!itn->fb_tunnel_dev); - fbt = netdev_priv(itn->fb_tunnel_dev); dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); if (IS_ERR(dev)) return ERR_CAST(dev); @@ -668,6 +668,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); return; @@ -747,19 +748,19 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) goto done; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); - if (!(p->i_flags&TUNNEL_KEY)) - p->i_key = 0; - if (!(p->o_flags&TUNNEL_KEY)) - p->o_key = 0; + if (!(p->i_flags & VTI_ISVTI)) { + if (!(p->i_flags & TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags & TUNNEL_KEY)) + p->o_key = 0; + } t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); if (!t && (cmd == SIOCADDTUNNEL)) { t = ip_tunnel_create(net, itn, p); - if (IS_ERR(t)) { - err = PTR_ERR(t); - break; - } + err = PTR_ERR_OR_ZERO(t); + break; } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index bcf206c79005..f4c987bb7e94 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) @@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, return skb; } + /* If packet is not gso and we are resolving any partial checksum, + * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL + * on the outer header without confusing devices that implement + * NETIF_F_IP_CSUM with encapsulation. + */ + if (csum_help) + skb->encapsulation = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { err = skb_checksum_help(skb); if (unlikely(err)) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 13ef00f1e17b..b8960f3527f3 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -313,7 +313,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EINVAL; } - p.i_flags |= VTI_ISVTI; + if (!(p.i_flags & GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags & GRE_KEY)) + p.o_key = 0; + + p.i_flags = VTI_ISVTI; + err = ip_tunnel_ioctl(dev, &p, cmd); if (err) return err; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 812b18351462..62eaa005e146 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPIP, 0); + t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; @@ -486,4 +486,5 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ipip"); MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d84dc8d4c916..65bcaa789043 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -484,7 +484,7 @@ static void reg_vif_setup(struct net_device *dev) dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; - dev->netdev_ops = ®_vif_netdev_ops, + dev->netdev_ops = ®_vif_netdev_ops; dev->destructor = free_netdev; dev->features |= NETIF_F_NETNS_LOCAL; } @@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, skb_dst(skb), NULL); + ip_select_ident(skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ee2886126e3d..f1787c04a4dd 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, if (nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f40f321b41fc..b8f6381c7d0b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -34,7 +34,7 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) if (!err) { ip_send_check(ip_hdr(skb)); - skb->local_df = 1; + skb->ignore_df = 1; } return err; diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index b5b256d45e67..3964157d826c 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ad737fad6d8b..ae0af9386f7c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -345,15 +345,15 @@ static void icmp_put(struct seq_file *seq) for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); @@ -379,7 +379,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %llu", - snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp_fold_field64(net->mib.ip_statistics, snmp4_ipstats_list[i].entry, offsetof(struct ipstats_mib, syncp))); @@ -395,11 +395,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", - snmp_fold_field((void __percpu **)net->mib.tcp_statistics, + snmp_fold_field(net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); else seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.tcp_statistics, + snmp_fold_field(net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); } @@ -410,7 +410,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.udp_statistics, + snmp_fold_field(net->mib.udp_statistics, snmp4_udp_list[i].entry)); /* the UDP and UDP-Lite MIBs are the same */ @@ -421,7 +421,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.udplite_statistics, + snmp_fold_field(net->mib.udplite_statistics, snmp4_udp_list[i].entry)); seq_putc(seq, '\n'); @@ -458,7 +458,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nTcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.net_statistics, + snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); @@ -468,7 +468,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) seq_printf(seq, " %llu", - snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, offsetof(struct ipstats_mib, syncp))); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a9dbe58bdfe7..2c65160565e1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5e676be3daeb..082239ffe34a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -89,6 +89,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> +#include <linux/jhash.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -/* - * Peer allocation may fail only in serious out-of-memory conditions. However - * we still can generate some output. - * Random ID selection looks a bit dangerous because we have no chances to - * select ID being unique in a reasonable period of time. - * But broken packet identifier may be better than no packet at all. - */ -static void ip_select_fb_ident(struct iphdr *iph) -{ - static DEFINE_SPINLOCK(ip_fb_id_lock); - static u32 ip_fallback_id; - u32 salt; - - spin_lock_bh(&ip_fb_id_lock); - salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); - iph->id = htons(salt & 0xFFFF); - ip_fallback_id = salt; - spin_unlock_bh(&ip_fb_id_lock); -} +atomic_t *ip_idents __read_mostly; +EXPORT_SYMBOL(ip_idents); -void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) +void __ip_select_ident(struct iphdr *iph, int segs) { - struct net *net = dev_net(dst->dev); - struct inet_peer *peer; + static u32 ip_idents_hashrnd __read_mostly; + u32 hash, id; - peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); - if (peer) { - iph->id = htons(inet_getid(peer, more)); - inet_putpeer(peer); - return; - } + net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); - ip_select_fb_ident(iph); + hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); + id = ip_idents_reserve(hash, segs); + iph->id = htons(id); } EXPORT_SYMBOL(__ip_select_ident); @@ -993,6 +974,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct flowi4 fl4; struct rtable *rt; + if (!mark) + mark = IP4_REPLY_MARK(net, skb->mark); + __build_flow_key(&fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); @@ -1010,6 +994,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + + if (!fl4.flowi4_mark) + fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); + rt = __ip_route_output_key(sock_net(sk), &fl4); if (!IS_ERR(rt)) { __ip_rt_update_pmtu(rt, &fl4, mtu); @@ -2704,6 +2692,12 @@ int __init ip_rt_init(void) { int rc = 0; + ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); + if (!ip_idents) + panic("IP: failed to allocate ip_idents\n"); + + prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f2ed13c2125f..c86624b36a62 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->ir_rmt_port = th->source; ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_mark = inet_request_mark(sk, skb); ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5cde8f263d40..79a007c52558 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "ip_local_reserved_ports", - .data = NULL, /* initialized in sysctl_ipv4_init */ - .maxlen = 65536, - .mode = 0644, - .proc_handler = proc_do_large_bitmap, - }, - { .procname = "igmp_max_memberships", .data = &sysctl_igmp_max_memberships, .maxlen = sizeof(int), @@ -825,6 +818,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = ipv4_local_port_range, }, { + .procname = "ip_local_reserved_ports", + .data = &init_net.ipv4.sysctl_local_reserved_ports, + .maxlen = 65536, + .mode = 0644, + .proc_handler = proc_do_large_bitmap, + }, + { .procname = "ip_no_pmtu_disc", .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, .maxlen = sizeof(int), @@ -838,6 +838,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fwmark_accept", + .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; @@ -862,8 +876,14 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (net->ipv4.ipv4_hdr == NULL) goto err_reg; + net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); + if (!net->ipv4.sysctl_local_reserved_ports) + goto err_ports; + return 0; +err_ports: + unregister_net_sysctl_table(net->ipv4.ipv4_hdr); err_reg: if (!net_eq(net, &init_net)) kfree(table); @@ -875,6 +895,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) { struct ctl_table *table; + kfree(net->ipv4.sysctl_local_reserved_ports); table = net->ipv4.ipv4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.ipv4_hdr); kfree(table); @@ -888,16 +909,6 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { static __init int sysctl_ipv4_init(void) { struct ctl_table_header *hdr; - struct ctl_table *i; - - for (i = ipv4_table; i->procname; i++) { - if (strcmp(i->procname, "ip_local_reserved_ports") == 0) { - i->data = sysctl_local_reserved_ports; - break; - } - } - if (!i->procname) - return -EINVAL; hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); if (hdr == NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bd6d52eeffb..eb1dde37e678 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2916,6 +2916,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + + case TCP_FASTOPEN: + if (icsk->icsk_accept_queue.fastopenq != NULL) + val = icsk->icsk_accept_queue.fastopenq->max_qlen; + else + val = 0; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp + tp->tsoffset; break; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 821846fb0a7e..d5de69bc04f5 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2b9464c93b88..7b09d8b49fa5 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return err; } -/* RFC2861 Check whether we are limited by application or congestion window - * This is the inverse of cwnd check in tcp_tso_should_defer - */ -bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) -{ - const struct tcp_sock *tp = tcp_sk(sk); - u32 left; - - if (in_flight >= tp->snd_cwnd) - return true; - - left = tp->snd_cwnd - in_flight; - if (sk_can_gso(sk) && - left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left < tp->xmit_size_goal_segs) - return true; - return left <= tcp_max_tso_deferred_mss(tp); -} -EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); - /* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but @@ -337,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* In "safe" area, increase. */ diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b4f1b29b08bd..a9bd8a4828a9 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f195d9316e55..62e48cf84e60 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -72,25 +72,224 @@ error: kfree(ctx); return err; } -/* Computes the fastopen cookie for the IP path. - * The path is a 128 bits long (pad with zeros for IPv4). - * - * The caller must check foc->len to determine if a valid cookie - * has been generated successfully. -*/ -void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, - struct tcp_fastopen_cookie *foc) +static bool __tcp_fastopen_cookie_gen(const void *path, + struct tcp_fastopen_cookie *foc) { - __be32 path[4] = { src, dst, 0, 0 }; struct tcp_fastopen_context *ctx; + bool ok = false; tcp_fastopen_init_key_once(true); rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { - crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path); + crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; + ok = true; } rcu_read_unlock(); + return ok; +} + +/* Generate the fastopen cookie by doing aes128 encryption on both + * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6 + * addresses. For the longer IPv6 addresses use CBC-MAC. + * + * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. + */ +static bool tcp_fastopen_cookie_gen(struct request_sock *req, + struct sk_buff *syn, + struct tcp_fastopen_cookie *foc) +{ + if (req->rsk_ops->family == AF_INET) { + const struct iphdr *iph = ip_hdr(syn); + + __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; + return __tcp_fastopen_cookie_gen(path, foc); + } + +#if IS_ENABLED(CONFIG_IPV6) + if (req->rsk_ops->family == AF_INET6) { + const struct ipv6hdr *ip6h = ipv6_hdr(syn); + struct tcp_fastopen_cookie tmp; + + if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { + struct in6_addr *buf = (struct in6_addr *) tmp.val; + int i = 4; + + for (i = 0; i < 4; i++) + buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; + return __tcp_fastopen_cookie_gen(buf, foc); + } + } +#endif + return false; +} + +static bool tcp_fastopen_create_child(struct sock *sk, + struct sk_buff *skb, + struct dst_entry *dst, + struct request_sock *req) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + struct sock *child; + + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; + + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) + return false; + + spin_lock(&queue->fastopenq->lock); + queue->fastopenq->qlen++; + spin_unlock(&queue->fastopenq->lock); + + /* Initialize the child socket. Have to fix some values to take + * into account the child is a Fast Open socket and is created + * only out of the bits carried in the SYN packet. + */ + tp = tcp_sk(child); + + tp->fastopen_rsk = req; + /* Do a hold on the listner sk so that if the listener is being + * closed, the child that has been accepted can live on and still + * access listen_lock. + */ + sock_hold(sk); + tcp_rsk(req)->listener = sk; + + /* RFC1323: The window in SYN & SYN/ACK segments is never + * scaled. So correct it appropriately. + */ + tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + + /* Activate the retrans timer so that SYNACK can be retransmitted. + * The request socket is not added to the SYN table of the parent + * because it's been added to the accept queue directly. + */ + inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT, TCP_RTO_MAX); + + /* Add the child socket directly into the accept queue */ + inet_csk_reqsk_queue_add(sk, req, child); + + /* Now finish processing the fastopen child socket. */ + inet_csk(child)->icsk_af_ops->rebuild_header(child); + tcp_init_congestion_control(child); + tcp_mtup_init(child); + tcp_init_metrics(child); + tcp_init_buffer_space(child); + + /* Queue the data carried in the SYN packet. We need to first + * bump skb's refcnt because the caller will attempt to free it. + * + * XXX (TFO) - we honor a zero-payload TFO request for now, + * (any reason not to?) but no need to queue the skb since + * there is no data. How about SYN+FIN? + */ + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { + skb = skb_get(skb); + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + skb_set_owner_r(skb, child); + __skb_queue_tail(&child->sk_receive_queue, skb); + tp->syn_data_acked = 1; + } + tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + sk->sk_data_ready(sk); + bh_unlock_sock(child); + sock_put(child); + WARN_ON(req->sk == NULL); + return true; +} +EXPORT_SYMBOL(tcp_fastopen_create_child); + +static bool tcp_fastopen_queue_check(struct sock *sk) +{ + struct fastopen_queue *fastopenq; + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying + * to validating the cookie in order to avoid burning CPU cycles + * unnecessarily. + * + * XXX (TFO) - The implication of checking the max_qlen before + * processing a cookie request is that clients can't differentiate + * between qlen overflow causing Fast Open to be disabled + * temporarily vs a server not supporting Fast Open at all. + */ + fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; + if (fastopenq == NULL || fastopenq->max_qlen == 0) + return false; + + if (fastopenq->qlen >= fastopenq->max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; + if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + spin_unlock(&fastopenq->lock); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); + return false; + } + fastopenq->rskq_rst_head = req1->dl_next; + fastopenq->qlen--; + spin_unlock(&fastopenq->lock); + reqsk_free(req1); + } + return true; +} + +/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) + * may be updated and return the client in the SYN-ACK later. E.g., Fast Open + * cookie request (foc->len == 0). + */ +bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst) +{ + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + + if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + (syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; + return false; + } + + if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + goto fastopen; + + if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + foc->len == TCP_FASTOPEN_COOKIE_SIZE && + foc->len == valid_foc.len && + !memcmp(foc->val, valid_foc.val, foc->len)) { + /* Cookie is valid. Create a (full) child socket to accept + * the data in SYN before returning a SYN-ACK to ack the + * data. If we fail to create the socket, fall back and + * ack the ISN only but includes the same cookie. + * + * Note: Data-less SYN with valid cookie is allowed to send + * data in SYN_RECV state. + */ +fastopen: + if (tcp_fastopen_create_child(sk, skb, dst, req)) { + foc->len = -1; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVE); + return true; + } + } + + NET_INC_STATS_BH(sock_net(sk), foc->len ? + LINUX_MIB_TCPFASTOPENPASSIVEFAIL : + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + *foc = valid_foc; + return false; } +EXPORT_SYMBOL(tcp_try_fastopen); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b9e7bad77c0..1c4908280d92 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4a194acfd923..031361311a8b 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index a15a799bf768..d8f8f05a4951 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hybla *ca = inet_csk_ca(sk); @@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, ca->minrtt_us = tp->srtt_us; } - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (!ca->hybla_en) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 863d105e3015..5999b3972e64 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); @@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, update_params(sk); /* RFC2861 only increase cwnd if fully utilized */ - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* In slow start */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3a26b3b23f16..40661fc1e233 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1167,7 +1167,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, } pkt_len = new_len; } - err = tcp_fragment(sk, skb, pkt_len, mss); + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; } @@ -2241,7 +2241,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; mss = skb_shinfo(skb)->gso_size; - err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss); + err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, + mss, GFP_ATOMIC); if (err < 0) break; cnt = packets; @@ -2937,10 +2938,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight); + + icsk->icsk_ca_ops->cong_avoid(sk, ack, acked); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -3363,7 +3365,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; @@ -3396,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= FLAG_SND_UNA_ADVANCED; prior_fackets = tp->fackets_out; - prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3451,7 +3451,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* Advance cwnd if state allows */ if (tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, acked, prior_in_flight); + tcp_cong_avoid(sk, ack, acked); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -4702,28 +4702,6 @@ static int tcp_prune_queue(struct sock *sk) return -1; } -/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. - * As additional protections, we do not touch cwnd in retransmission phases, - * and if application hit its sndbuf limit recently. - */ -void tcp_cwnd_application_limited(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && - sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - /* Limited by application or receiver window. */ - u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); - u32 win_used = max(tp->snd_cwnd_used, init_win); - if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; - } - tp->snd_cwnd_used = 0; - } - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static bool tcp_should_expand_sndbuf(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 438f3b95143d..77cccda1ad0c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; struct sk_buff *skb; - struct request_sock *req; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; __u32 remaining; int err; struct net *net = dev_net(icmp_skb->dev); @@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); - req = tp->fastopen_rsk; seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt) && - (req == NULL || seq != tcp_rsk(req)->snt_isn)) { - /* For a Fast Open socket, allow seq to be snt_isn. */ + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -426,11 +426,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) break; if (seq != tp->snd_una || !icsk->icsk_retransmits || - !icsk->icsk_backoff) + !icsk->icsk_backoff || fastopen) break; - /* XXX (TFO) - revisit the following logic for TFO */ - if (sock_owned_by_user(sk)) break; @@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; } - /* XXX (TFO) - if it's a TFO socket and has been accepted, rather - * than following the TCP_SYN_RECV case and closing the socket, - * we ignore the ICMP error and keep trying like a fully established - * socket. Is this the right thing to do? - */ - if (req && req->sk == NULL) - goto out; - switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: @@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can f.e. if SYNs crossed, - or Fast Open. - */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; @@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -852,7 +846,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, 0); + int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif -static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc) -{ - bool skip_cookie = false; - struct fastopen_queue *fastopenq; - - if (likely(!fastopen_cookie_present(foc))) { - /* See include/net/tcp.h for the meaning of these knobs */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || - ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) - skip_cookie = true; /* no cookie to validate */ - else - return false; - } - fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; - /* A FO option is present; bump the counter. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); - - /* Make sure the listener has enabled fastopen, and we don't - * exceed the max # of pending TFO requests allowed before trying - * to validating the cookie in order to avoid burning CPU cycles - * unnecessarily. - * - * XXX (TFO) - The implication of checking the max_qlen before - * processing a cookie request is that clients can't differentiate - * between qlen overflow causing Fast Open to be disabled - * temporarily vs a server not supporting Fast Open at all. - */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || - fastopenq == NULL || fastopenq->max_qlen == 0) - return false; - - if (fastopenq->qlen >= fastopenq->max_qlen) { - struct request_sock *req1; - spin_lock(&fastopenq->lock); - req1 = fastopenq->rskq_rst_head; - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { - spin_unlock(&fastopenq->lock); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); - /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ - foc->len = -1; - return false; - } - fastopenq->rskq_rst_head = req1->dl_next; - fastopenq->qlen--; - spin_unlock(&fastopenq->lock); - reqsk_free(req1); - } - if (skip_cookie) { - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } - - if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { - if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || - memcmp(&foc->val[0], &valid_foc->val[0], - TCP_FASTOPEN_COOKIE_SIZE) != 0) - return false; - valid_foc->len = -1; - } - /* Acknowledge the data received from the peer. */ - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } else if (foc->len == 0) { /* Client requesting a cookie */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENCOOKIEREQD); - } else { - /* Client sent a cookie with wrong size. Treat it - * the same as invalid and return a valid one. - */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - } - return false; -} - -static int tcp_v4_conn_req_fastopen(struct sock *sk, - struct sk_buff *skb, - struct sk_buff *skb_synack, - struct request_sock *req) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; - const struct inet_request_sock *ireq = inet_rsk(req); - struct sock *child; - int err; - - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - kfree_skb(skb_synack); - return -1; - } - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); - if (!err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; - /* XXX (TFO) - is it ok to ignore error and continue? */ - - spin_lock(&queue->fastopenq->lock); - queue->fastopenq->qlen++; - spin_unlock(&queue->fastopenq->lock); - - /* Initialize the child socket. Have to fix some values to take - * into account the child is a Fast Open socket and is created - * only out of the bits carried in the SYN packet. - */ - tp = tcp_sk(child); - - tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; - - /* RFC1323: The window in SYN & SYN/ACK segments is never - * scaled. So correct it appropriately. - */ - tp->snd_wnd = ntohs(tcp_hdr(skb)->window); - - /* Activate the retrans timer so that SYNACK can be retransmitted. - * The request socket is not added to the SYN table of the parent - * because it's been added to the accept queue directly. - */ - inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, - TCP_TIMEOUT_INIT, TCP_RTO_MAX); - - /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, child); - - /* Now finish processing the fastopen child socket. */ - inet_csk(child)->icsk_af_ops->rebuild_header(child); - tcp_init_congestion_control(child); - tcp_mtup_init(child); - tcp_init_metrics(child); - tcp_init_buffer_space(child); - - /* Queue the data carried in the SYN packet. We need to first - * bump skb's refcnt because the caller will attempt to free it. - * - * XXX (TFO) - we honor a zero-payload TFO request for now. - * (Any reason not to?) - */ - if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { - /* Don't queue the skb if there is no payload in SYN. - * XXX (TFO) - How about SYN+FIN? - */ - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - } else { - skb = skb_get(skb); - skb_dst_drop(skb); - __skb_pull(skb, tcp_hdr(skb)->doff * 4); - skb_set_owner_r(skb, child); - __skb_queue_tail(&child->sk_receive_queue, skb); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - tp->syn_data_acked = 1; - } - sk->sk_data_ready(sk); - bh_unlock_sock(child); - sock_put(child); - WARN_ON(req->sk == NULL); - return 0; -} - int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tmp_opt; @@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; - bool want_cookie = false; + bool want_cookie = false, fastopen; struct flowi4 fl4; struct tcp_fastopen_cookie foc = { .len = -1 }; - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; - struct sk_buff *skb_synack; - int do_fastopen; + int err; /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1507,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ir_rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(skb); + ireq->ir_mark = inet_request_mark(sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; @@ -1555,52 +1367,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } - tcp_rsk(req)->snt_isn = isn; - - if (dst == NULL) { - dst = inet_csk_route_req(sk, &fl4, req); - if (dst == NULL) - goto drop_and_free; - } - do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); - - /* We don't call tcp_v4_send_synack() directly because we need - * to make sure a child socket can be created successfully before - * sending back synack! - * - * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() - * (or better yet, call tcp_send_synack() in the child context - * directly, but will have to fix bunch of other code first) - * after syn_recv_sock() except one will need to first fix the - * latter to remove its dependency on the current implementation - * of tcp_v4_send_synack()->tcp_select_initial_window(). - */ - skb_synack = tcp_make_synack(sk, dst, req, - fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); - - if (skb_synack) { - __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); - skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); - } else + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) goto drop_and_free; - if (likely(!do_fastopen)) { - int err; - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); + tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = tcp_v4_send_synack(sk, dst, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { if (err || want_cookie) goto drop_and_free; tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; - /* Add the request_sock to the SYN table */ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - if (fastopen_cookie_present(&foc) && foc.len != 0) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) - goto drop_and_free; + } return 0; @@ -1744,28 +1528,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v4_check(skb->len, iph->saddr, - iph->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, IPPROTO_TCP, 0); - - if (skb->len <= 76) { - return __skb_checksum_complete(skb); - } - return 0; -} - - /* The socket must have it's spinlock held when we get * here. * @@ -1960,7 +1722,8 @@ int tcp_v4_rcv(struct sk_buff *skb) * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is eliminated. * So, we defer the checks. */ - if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) + + if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index c9aecae31327..1e70fa8fa793 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } /** diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index dcaf72f10216..4fe041805989 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1159,10 +1159,7 @@ static void __net_exit tcp_net_metrics_exit(struct net *net) tm = next; } } - if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash)) - vfree(net->ipv4.tcp_metrics_hash); - else - kfree(net->ipv4.tcp_metrics_hash); + kvfree(net->ipv4.tcp_metrics_hash); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 05c1b155251d..e68e0d4af6c9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +void tcp_openreq_init_rwin(struct request_sock *req, + struct sock *sk, struct dst_entry *dst) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_sock *tp = tcp_sk(sk); + __u8 rcv_wscale; + int mss = dst_metric_advmss(dst); + + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) + mss = tp->rx_opt.user_mss; + + /* Set this up on the first call only */ + req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + + /* limit the window selection if the user enforce a smaller rx buffer */ + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) + req->window_clamp = tcp_full_space(sk); + + /* tcp_full_space because it is guaranteed to be the first packet */ + tcp_select_initial_window(tcp_full_space(sk), + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + &req->rcv_wnd, + &req->window_clamp, + ireq->wscale_ok, + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); + ireq->rcv_wscale = rcv_wscale; +} +EXPORT_SYMBOL(tcp_openreq_init_rwin); + static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, struct request_sock *req) { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b92b81718ca4..4e86c59ec7f7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -57,10 +57,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; @@ -97,9 +99,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->check = newcheck; if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); + th->check = gso_make_checksum(skb, ~th->check); seq += mss; if (copy_destructor) { @@ -133,8 +133,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); + th->check = gso_make_checksum(skb, ~th->check); out: return segs; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2d340bd2cd3d..d92bce0ea24e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - if (foc != NULL) { + if (foc != NULL && foc->len >= 0) { u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { @@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); if (clone_it) { - const struct sk_buff *fclone = skb + 1; - skb_mstamp_get(&skb->skb_mstamp); - if (unlikely(skb->fclone == SKB_FCLONE_ORIG && - fclone->fclone == SKB_FCLONE_CLONE)) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); - if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else @@ -1081,7 +1074,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de * Remember, these are still headerless SKBs at this point. */ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, - unsigned int mss_now) + unsigned int mss_now, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; @@ -1096,11 +1089,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone(skb, gfp)) return -ENOMEM; /* Get a new skb... force flag on. */ - buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC); + buff = sk_stream_alloc_skb(sk, nsize, gfp); if (buff == NULL) return -ENOMEM; /* We'll just try again later. */ @@ -1387,12 +1380,43 @@ unsigned int tcp_current_mss(struct sock *sk) return mss_now; } -/* Congestion window validation. (RFC2861) */ -static void tcp_cwnd_validate(struct sock *sk) +/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. + * As additional protections, we do not touch cwnd in retransmission phases, + * and if application hit its sndbuf limit recently. + */ +static void tcp_cwnd_application_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && + sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + /* Limited by application or receiver window. */ + u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); + u32 win_used = max(tp->snd_cwnd_used, init_win); + if (win_used < tp->snd_cwnd) { + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; + } + tp->snd_cwnd_used = 0; + } + tp->snd_cwnd_stamp = tcp_time_stamp; +} + +static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); - if (tp->packets_out >= tp->snd_cwnd) { + /* Track the maximum number of outstanding packets in each + * window, and remember whether we were cwnd-limited then. + */ + if (!before(tp->snd_una, tp->max_packets_seq) || + tp->packets_out > tp->max_packets_out) { + tp->max_packets_out = tp->packets_out; + tp->max_packets_seq = tp->snd_nxt; + tp->is_cwnd_limited = is_cwnd_limited; + } + + if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; @@ -1601,7 +1625,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) - return tcp_fragment(sk, skb, len, mss_now); + return tcp_fragment(sk, skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp); if (unlikely(buff == NULL)) @@ -1644,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * * This algorithm is from John Heffner. */ -static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) +static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, + bool *is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1708,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) if (!tp->tso_deferred) tp->tso_deferred = 1 | (jiffies << 1); + if (cong_win < send_win && cong_win < skb->len) + *is_cwnd_limited = true; + return true; send_now: @@ -1868,6 +1896,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unsigned int tso_segs, sent_pkts; int cwnd_quota; int result; + bool is_cwnd_limited = false; sent_pkts = 0; @@ -1892,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { + is_cwnd_limited = true; if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; @@ -1908,7 +1938,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (!push_one && tcp_tso_should_defer(sk, skb)) + if (!push_one && + tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) break; } @@ -1973,7 +2004,7 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); - tcp_cwnd_validate(sk); + tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); @@ -2037,6 +2068,25 @@ bool tcp_schedule_loss_probe(struct sock *sk) return true; } +/* Thanks to skb fast clones, we can detect if a prior transmit of + * a packet is still in a qdisc or driver queue. + * In this case, there is very little point doing a retransmit ! + * Note: This is called from BH context only. + */ +static bool skb_still_in_host_queue(const struct sock *sk, + const struct sk_buff *skb) +{ + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + return true; + } + return false; +} + /* When probe timeout (PTO) fires, send a new segment if one exists, else * retransmit the last segment. */ @@ -2062,12 +2112,16 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb)) goto rearm_timer; + if (skb_still_in_host_queue(sk, skb)) + goto rearm_timer; + pcount = tcp_skb_pcount(skb); if (WARN_ON(!pcount)) goto rearm_timer; if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { - if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) + if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, + GFP_ATOMIC))) goto rearm_timer; skb = tcp_write_queue_tail(sk); } @@ -2075,9 +2129,7 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; - /* Probe with zero data doesn't trigger fast recovery. */ - if (skb->len > 0) - err = __tcp_retransmit_skb(sk, skb); + err = __tcp_retransmit_skb(sk, skb); /* Record snd_nxt for loss detection. */ if (likely(!err)) @@ -2383,6 +2435,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; + if (skb_still_in_host_queue(sk, skb)) + return -EBUSY; + if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); @@ -2405,7 +2460,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -EAGAIN; if (skb->len > cur_mss) { - if (tcp_fragment(sk, skb, cur_mss, cur_mss)) + if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { int oldpcount = tcp_skb_pcount(skb); @@ -2476,7 +2531,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; - } else { + } else if (err != -EBUSY) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } return err; @@ -2754,27 +2809,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) mss = tp->rx_opt.user_mss; - if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ - __u8 rcv_wscale; - /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); - - /* limit the window selection if the user enforce a smaller rx buffer */ - if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && - (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) - req->window_clamp = tcp_full_space(sk); - - /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), - mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), - &req->rcv_wnd, - &req->window_clamp, - ireq->wscale_ok, - &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); - ireq->rcv_wscale = rcv_wscale; - } - memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) @@ -3207,7 +3241,7 @@ int tcp_write_wakeup(struct sock *sk) skb->len > mss) { seg_size = min(seg_size, mss); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; - if (tcp_fragment(sk, skb, seg_size, mss)) + if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 0ac50836da4d..8250949b8853 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,12 +15,11 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 48539fff6357..9a5e05f27f4f 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) return min(tp->snd_ssthresh, tp->snd_cwnd-1); } -static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } @@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } else { u32 rtt, diff; u64 target_cwnd; diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 1b8e28fcd7e1..27b9825753d1 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) tcp_veno_init(sk); } -static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } /* limited by applications */ - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* We do the Veno calculations only if we got enough rtt samples */ @@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } else { u64 target_cwnd; u32 rtt; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 5ede0e727945..599b79b8eac0 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); } -static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4468e1adc094..185ed3e59802 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, do { if (low <= snum && snum <= high && !test_bit(snum >> udptable->log, bitmap) && - !inet_is_reserved_local_port(snum)) + !inet_is_local_reserved_port(net, snum)) goto found; snum += rand; } while (snum != first); @@ -727,13 +727,12 @@ EXPORT_SYMBOL(udp_flush_pending_frames); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { struct udphdr *uh = udp_hdr(skb); - struct sk_buff *frags = skb_shinfo(skb)->frag_list; int offset = skb_transport_offset(skb); int len = skb->len - offset; int hlen = len; __wsum csum = 0; - if (!frags) { + if (!skb_has_frag_list(skb)) { /* * Only one fragment on the socket. */ @@ -742,15 +741,17 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); } else { + struct sk_buff *frags; + /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ - do { + skb_walk_frags(skb, frags) { csum = csum_add(csum, frags->csum); hlen -= frags->len; - } while ((frags = frags->next)); + } csum = skb_checksum(skb, offset, hlen, csum); skb->ip_summed = CHECKSUM_NONE; @@ -762,6 +763,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) } EXPORT_SYMBOL_GPL(udp4_hwcsum); +/* Function to set UDP checksum for an IPv4 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp_set_csum(bool nocheck, struct sk_buff *skb, + __be32 saddr, __be32 daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v4_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} +EXPORT_SYMBOL(udp_set_csum); + static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) { struct sock *sk = skb->sk; @@ -785,7 +823,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; @@ -1495,6 +1533,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { int ret; + /* Verify checksum before giving to encap */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + ret = encap_rcv(sk, skb); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), @@ -1672,7 +1714,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) { - const struct iphdr *iph; int err; UDP_SKB_CB(skb)->partial_cov = 0; @@ -1684,22 +1725,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - iph = ip_hdr(skb); - if (uh->check == 0) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - if (!skb_csum_unnecessary(skb)) - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, proto, 0); - /* Probably, we should checksum udp header (it should be in cache - * in any case) and data in tiny packets (< rx copybreak). - */ - - return 0; + return skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* @@ -1886,7 +1913,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; - INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr) + INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); rcu_read_lock(); @@ -1979,7 +2006,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); - int val; + int val, valbool; int err = 0; int is_udplite = IS_UDPLITE(sk); @@ -1989,6 +2016,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; + valbool = val ? 1 : 0; + switch (optname) { case UDP_CORK: if (val != 0) { @@ -2018,6 +2047,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } break; + case UDP_NO_CHECK6_TX: + up->no_check6_tx = valbool; + break; + + case UDP_NO_CHECK6_RX: + up->no_check6_rx = valbool; + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -2100,6 +2137,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->encap_type; break; + case UDP_NO_CHECK6_TX: + val = up->no_check6_tx; + break; + + case UDP_NO_CHECK6_RX: + val = up->no_check6_rx; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: @@ -2484,7 +2529,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; netdev_features_t enc_features; - int outer_hlen; + int udp_offset, outer_hlen; + unsigned int oldlen; + bool need_csum; + + oldlen = (u16)~skb->len; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -2496,6 +2545,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb->mac_len = skb_inner_network_offset(skb); skb->protocol = htons(ETH_P_TEB); + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); + if (need_csum) + skb->encap_hdr_csum = 1; + /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); @@ -2506,10 +2559,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, } outer_hlen = skb_tnl_header_len(skb); + udp_offset = outer_hlen - tnl_hlen; skb = segs; do { struct udphdr *uh; - int udp_offset = outer_hlen - tnl_hlen; + int len; skb_reset_inner_headers(skb); skb->encapsulation = 1; @@ -2520,31 +2574,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_set_transport_header(skb, udp_offset); + len = skb->len - udp_offset; uh = udp_hdr(skb); - uh->len = htons(skb->len - udp_offset); - - /* csum segment if tunnel sets skb with csum. */ - if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { - struct iphdr *iph = ip_hdr(skb); + uh->len = htons(len); - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - udp_offset, - IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, - skb->len - udp_offset, 0)); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; + if (need_csum) { + __be32 delta = htonl(oldlen + len); - } else if (protocol == htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 len = skb->len - udp_offset; + uh->check = ~csum_fold((__force __wsum) + ((__force u32)uh->check + + (__force u32)delta)); + uh->check = gso_make_checksum(skb, ~uh->check); - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - len, IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; } skb->protocol = protocol; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 88b4023ecfcf..546d2d439dda 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, __wsum csum; if (skb->encapsulation && - skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + (skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { segs = skb_udp_tunnel_segment(skb, features); goto out; } @@ -71,8 +72,10 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_MPLS) || + SKB_GSO_GRE | SKB_GSO_GRE_CSUM | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; @@ -197,6 +200,7 @@ unflush: } skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); pp = uo_priv->offload->callbacks.gro_receive(head, skb); out_unlock: diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 2c46acd4cc36..3b3efbda48e1 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, - .no_check = 0, /* must checksum (RFC 3828) */ .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 05f2b484954f..91771a7c802f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - ip_select_ident(skb, dst->child, NULL); top_iph->ttl = ip4_dst_hoplimit(dst->child); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; + ip_select_ident(skb, NULL); return 0; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 186a8ecf92fa..d5f6bd9a210a 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) goto out; mtu = dst_mtu(skb_dst(skb)); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c7fa0853fc7..5667b3003af9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -275,19 +275,14 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) { int i; - if (snmp_mib_init((void __percpu **)idev->stats.ipv6, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + idev->stats.ipv6 = alloc_percpu(struct ipstats_mib); + if (!idev->stats.ipv6) goto err_ip; for_each_possible_cpu(i) { struct ipstats_mib *addrconf_stats; - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i); u64_stats_init(&addrconf_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); - u64_stats_init(&addrconf_stats->syncp); -#endif } @@ -305,7 +300,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) err_icmpmsg: kfree(idev->stats.icmpv6dev); err_icmp: - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); err_ip: return -ENOMEM; } @@ -2504,8 +2499,8 @@ static int inet6_addr_add(struct net *net, int ifindex, return PTR_ERR(ifp); } -static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx, - unsigned int plen) +static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, + const struct in6_addr *pfx, unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2528,7 +2523,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p in6_ifa_hold(ifp); read_unlock_bh(&idev->lock); + if (!(ifp->flags & IFA_F_TEMPORARY) && + (ifa_flags & IFA_F_MANAGETEMPADDR)) + manage_tempaddrs(idev, ifp, 0, 0, false, + jiffies); ipv6_del_addr(ifp); + addrconf_verify_rtnl(); return 0; } } @@ -2568,7 +2568,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr, ireq.ifr6_prefixlen); rtnl_unlock(); return err; @@ -2813,18 +2813,6 @@ static void addrconf_gre_config(struct net_device *dev) } #endif -static inline int -ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) -{ - struct in6_addr lladdr; - - if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { - addrconf_add_linklocal(idev, &lladdr); - return 0; - } - return -1; -} - static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { @@ -3743,6 +3731,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx, *peer_pfx; + u32 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3754,7 +3743,13 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (pfx == NULL) return -EINVAL; - return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); + ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + + /* We ignore other flags so far. */ + ifa_flags &= IFA_F_MANAGETEMPADDR; + + return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx, + ifm->ifa_prefixlen); } static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, @@ -4363,7 +4358,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, memset(&stats[items], 0, pad); } -static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, +static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, int items, int bytes, size_t syncpoff) { int i; @@ -4383,7 +4378,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, { switch (attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, + __snmp6_fill_stats64(stats, idev->stats.ipv6, IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 4c11cbcf8308..e6960457f625 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev) { kfree(idev->stats.icmpv6msgdev); kfree(idev->stats.icmpv6dev); - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); } /* Nobody refers to this device, we may destroy it. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d935889f1008..7cb4392690dd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; - char answer_no_check; int try_loading_module = 0; int err; @@ -162,7 +161,6 @@ lookup_protocol: sock->ops = answer->ops; answer_prot = answer->prot; - answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); @@ -176,7 +174,6 @@ lookup_protocol: sock_init_data(sock, sk); err = 0; - sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; @@ -715,33 +712,25 @@ static int __net_init ipv6_init_mibs(struct net *net) { int i; - if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udp_stats_in6) return -ENOMEM; - if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udplite_stats_in6) goto err_udplite_mib; - if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); + if (!net->mib.ipv6_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet6_stats; - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); u64_stats_init(&af_inet6_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); - u64_stats_init(&af_inet6_stats->syncp); -#endif } - if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, - sizeof(struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); + if (!net->mib.icmpv6_statistics) goto err_icmp_mib; net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), GFP_KERNEL); @@ -750,22 +739,22 @@ static int __net_init ipv6_init_mibs(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); + free_percpu(net->mib.ipv6_statistics); err_ip_mib: - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); + free_percpu(net->mib.udplite_stats_in6); err_udplite_mib: - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); + free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.udp_stats_in6); + free_percpu(net->mib.udplite_stats_in6); + free_percpu(net->mib.ipv6_statistics); + free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7b326529e6a2..f6c84a6eb238 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) @@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) fl6.daddr = hdr->saddr; if (saddr) fl6.saddr = *saddr; + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -493,12 +496,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); msg.skb = skb; msg.offset = skb_network_offset(skb); @@ -556,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) int err = 0; int hlimit; u8 tclass; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -574,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.saddr = *saddr; fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -593,12 +594,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); idev = __in6_dev_get(skb->dev); @@ -702,22 +698,11 @@ static int icmpv6_rcv(struct sk_buff *skb) saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; - /* Perform checksum. */ - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, - skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, - IPPROTO_ICMPV6, 0)); - if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); - goto csum_error; - } + if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", + saddr, daddr); + goto csum_error; } if (!pskb_pull(skb, sizeof(*hdr))) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d4ade34ab375..a245e5ddffbd 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(fl6, np->opt, &final); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; - fl6->flowi6_mark = sk->sk_mark; + fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index ee7a97f510cb..9a4d7322fb22 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -75,25 +75,50 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) return err; } - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", - &ipv6_hdr(skb)->saddr, ntohs(uh->source), - &ipv6_hdr(skb)->daddr, ntohs(uh->dest)); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; + /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) + * we accept a checksum of zero here. When we find the socket + * for the UDP packet we'll check if that socket allows zero checksum + * for IPv6 (set by socket option). + */ + return skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); +} +EXPORT_SYMBOL(udp6_csum_init); + +/* Function to set UDP checksum for an IPv6 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp6_set_csum(bool nocheck, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - return 0; + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v6_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } } -EXPORT_SYMBOL(udp6_csum_init); +EXPORT_SYMBOL(udp6_set_csum); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 87891f5f57b5..cb4459bd1d29 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -71,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock); #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt); +static void fib6_prune_clones(struct net *net, struct fib6_node *fn); static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); static int fib6_walk(struct fib6_walker_t *w); @@ -941,7 +940,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) - fib6_prune_clones(info->nl_net, pn, rt); + fib6_prune_clones(info->nl_net, pn); } out: @@ -1375,7 +1374,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) pn = pn->parent; } #endif - fib6_prune_clones(info->nl_net, pn, rt); + fib6_prune_clones(info->nl_net, pn); } /* @@ -1601,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg) return 0; } -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt) +static void fib6_prune_clones(struct net *net, struct fib6_node *fn) { - fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); + fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); } /* diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 0961b5ef866d..4052694c6f2c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -26,7 +26,6 @@ #include <net/sock.h> #include <net/ipv6.h> -#include <net/addrconf.h> #include <net/rawv6.h> #include <net/transp_v6.h> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9d921462b57f..3873181ed856 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -72,6 +72,7 @@ struct ip6gre_net { }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); @@ -353,10 +354,10 @@ failed_free: static void ip6gre_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); - ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + ip6gre_tunnel_unlink(ign, t); dev_put(dev); } @@ -467,17 +468,7 @@ static int ip6gre_rcv(struct sk_buff *skb) goto drop; if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } + csum = skb_checksum_simple_validate(skb); offset += 4; } if (flags&GRE_KEY) { @@ -611,8 +602,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { - struct net *net = dev_net(dev); struct ip6_tnl *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ unsigned int max_headroom = 0; /* The extra header space needed */ @@ -979,7 +970,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(dev_net(dev), + struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); @@ -1063,13 +1054,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, int err = 0; struct ip6_tnl_parm2 p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); switch (cmd) { case SIOCGETTUNNEL: - t = NULL; if (dev == ign->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; @@ -1077,9 +1067,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -1242,7 +1232,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->iflink = 0; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -1297,11 +1286,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, - struct list_head *head) +static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) { + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6gre_link_ops || + dev->rtnl_link_ops == &ip6gre_tap_ops) + unregister_netdevice_queue(dev, head); + for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1310,7 +1305,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, t = rtnl_dereference(ign->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1329,6 +1329,11 @@ static int __net_init ip6gre_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(ign->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; @@ -1349,12 +1354,10 @@ err_alloc_dev: static void __net_exit ip6gre_exit_net(struct net *net) { - struct ip6gre_net *ign; LIST_HEAD(list); - ign = net_generic(net, ip6gre_net_id); rtnl_lock(); - ip6gre_destroy_tunnels(ign, &list); + ip6gre_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } @@ -1531,15 +1534,14 @@ out: static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip6_tnl *t, *nt; - struct net *net = dev_net(dev); + struct ip6_tnl *t, *nt = netdev_priv(dev); + struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct __ip6_tnl_parm p; if (dev == ign->fb_tunnel_dev) return -EINVAL; - nt = netdev_priv(dev); ip6gre_netlink_parms(data, &p); t = ip6gre_tunnel_locate(net, &p, 0); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b2f091566f88..65eda2a8af48 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -97,9 +97,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fbf11562b54c..cb9df0eb4023 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -219,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -347,11 +347,11 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->len <= mtu) return false; - /* ipv6 conntrack defrag sets max_frag_size + local_df */ + /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; - if (skb->local_df) + if (skb->ignore_df) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) @@ -537,6 +537,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + u32 hash, id; + + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -559,7 +571,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu) || + if (unlikely(!skb->ignore_df && skb->len > mtu) || (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -1234,7 +1246,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (ip6_sk_local_df(sk)) + if (ip6_sk_ignore_df(sk)) maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; else maxnonfragsize = mtu; @@ -1544,7 +1556,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - skb->local_df = ip6_sk_local_df(sk); + skb->ignore_df = ip6_sk_ignore_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f6a66bb4114d..afa082458360 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -61,6 +61,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 6cc9f9371cc5..9aaa6bb229e4 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -795,15 +795,12 @@ static const struct net_device_ops vti6_netdev_ops = { **/ static void vti6_dev_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &vti6_netdev_ops; dev->destructor = vti6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); dev->mtu = ETH_DATA_LEN; - t = netdev_priv(dev); dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 84c7f33d0cf8..387d8b8fc18d 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 767ab8da8218..0d5279fd852a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -451,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } sub_frag_mem_limit(&fq->q, head->truesize); - head->local_df = 1; + head->ignore_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 9c3297a768fd..d189fcb437fe 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (ct == NULL || nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 56596ce390a1..5ec867e4a8b7 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,32 +8,6 @@ #include <net/addrconf.h> #include <net/secure_seq.h> -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static atomic_t ipv6_fragmentation_id; - struct in6_addr addr; - int ident; - -#if IS_ENABLED(CONFIG_IPV6) - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } -#endif - ident = atomic_inc_return(&ipv6_fragmentation_id); - - addr = rt->rt6i_dst.addr; - addr.s6_addr32[0] ^= (__force __be32)ident; - fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32)); -} -EXPORT_SYMBOL(ipv6_select_ident); - int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); @@ -104,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb) if (len > IPV6_MAXPLEN) len = 0; ipv6_hdr(skb)->payload_len = htons(len); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bda74291c3e0..5b7a1ed2aba9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = { .protocol = IPPROTO_ICMPV6, .prot = &pingv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; @@ -168,12 +167,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.wcheck = 0; pfh.family = AF_INET6; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 091d066a57b3..3317440ea341 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -186,7 +186,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) /* can be called either with percpu mib (pcpumib != NULL), * or shared one (smib != NULL) */ -static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, +static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { @@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, } } -static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, +static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { int i; @@ -215,14 +215,14 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, + snmp6_seq_show_item64(seq, net->mib.ipv6_statistics, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, + snmp6_seq_show_item(seq, net->mib.icmpv6_statistics, NULL, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, + snmp6_seq_show_item(seq, net->mib.udp_stats_in6, NULL, snmp6_udp6_list); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, + snmp6_seq_show_item(seq, net->mib.udplite_stats_in6, NULL, snmp6_udplite6_list); return 0; } @@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v) struct inet6_dev *idev = (struct inet6_dev *)seq->private; seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); - snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6, + snmp6_seq_show_item64(seq, idev->stats.ipv6, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, snmp6_icmp6_list); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1f29996e368a..b2dc60b0c764 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -873,14 +873,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, err = PTR_ERR(dst); goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -1328,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6ebdb7b6744c..f23fbd28a501 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1176,7 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; + fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1455,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e5a453ca302e..4f408176dc64 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -560,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; @@ -1828,4 +1828,5 @@ xfrm_tunnel_failed: module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb53a5e73c1a..a822b880689b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->num_retrans = 0; ireq->ecn_ok = ecn_ok; @@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7f405a168822..058f3eca2e53 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e289830ed6e3..229239ad96b1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -340,7 +340,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sock *sk; int err; struct tcp_sock *tp; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, @@ -371,8 +372,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp = tcp_sk(sk); seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -436,8 +440,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can, it SYNs are crossed. --ANK */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -463,7 +472,8 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -474,7 +484,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -498,7 +508,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); @@ -802,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_oif = inet6_iif(skb); else fl6.flowi6_oif = oif; + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -917,7 +928,12 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ + tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); @@ -969,8 +985,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; + struct tcp_fastopen_cookie foc = { .len = -1 }; + bool want_cookie = false, fastopen; struct flowi6 fl6; - bool want_cookie = false; + int err; if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1001,7 +1019,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1016,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb, sock_net(sk)); ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_mark = inet_request_mark(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && @@ -1074,19 +1093,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v6_init_sequence(skb); } have_isn: - tcp_rsk(req)->snt_isn = isn; if (security_inet_conn_request(sk, skb, req)) goto drop_and_release; - if (tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb)) || - want_cookie) + if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL) goto drop_and_free; + tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = tcp_v6_send_synack(sk, dst, &fl6, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + } return 0; drop_and_release: @@ -1294,25 +1321,6 @@ out: return NULL; } -static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = ~csum_unfold(tcp_v6_check(skb->len, - &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0)); - - if (skb->len <= 76) - return __skb_checksum_complete(skb); - return 0; -} - /* The socket must have it's spinlock held when we get * here. * @@ -1486,7 +1494,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; - if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) + if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); @@ -1779,6 +1787,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); + struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; @@ -1821,7 +1830,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh + sp->sk_state == TCP_LISTEN ? + (fastopenq ? fastopenq->max_qlen : 0) : + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } @@ -1981,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e586d92260e..95c834799288 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -634,6 +634,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { int ret; + /* Verify checksum before giving to encap */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + ret = encap_rcv(sk, skb); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), @@ -701,17 +705,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, int dif) { struct hlist_nulls_node *node; - struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_nulls_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); + sk_nulls_for_each_from(sk, node) { + struct inet_sock *inet = inet_sk(sk); - if (!net_eq(sock_net(s), net)) + if (!net_eq(sock_net(sk), net)) continue; - if (udp_sk(s)->udp_port_hash == num && - s->sk_family == PF_INET6) { + if (udp_sk(sk)->udp_port_hash == num && + sk->sk_family == PF_INET6) { if (inet->inet_dport) { if (inet->inet_dport != rmt_port) continue; @@ -720,16 +723,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; - if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) continue; } - if (!inet6_mc_check(s, loc_addr, rmt_addr)) + if (!inet6_mc_check(sk, loc_addr, rmt_addr)) continue; - return s; + return sk; } } return NULL; @@ -760,6 +763,17 @@ static void flush_stack(struct sock **stack, unsigned int count, if (unlikely(skb1)) kfree_skb(skb1); } + +static void udp6_csum_zero_error(struct sk_buff *skb) +{ + /* RFC 2460 section 8.1 says that we SHOULD log + * this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); +} + /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. @@ -779,7 +793,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); while (sk) { - stack[count++] = sk; + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + if (uh->check || udp_sk(sk)->no_check6_rx) + stack[count++] = sk; + sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, uh->source, saddr, dif); if (unlikely(count == ARRAY_SIZE(stack))) { @@ -867,6 +886,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; + if (!uh->check && !udp_sk(sk)->no_check6_rx) { + sock_put(sk); + udp6_csum_zero_error(skb); + goto csum_error; + } + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -879,6 +904,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; } + if (!uh->check) { + udp6_csum_zero_error(skb); + goto csum_error; + } + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; @@ -1006,7 +1036,10 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (is_udplite) csum = udplite_csum_outgoing(sk, skb); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ + else if (up->no_check6_tx) { /* UDP csum disabled */ + skb->ip_summed = CHECKSUM_NONE; + goto send; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, up->len); goto send; @@ -1232,14 +1265,8 @@ do_udp_sendmsg: goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -1479,7 +1506,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b261ee8b83fc..0ae3d98f83e0 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,7 +63,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS) || @@ -76,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; } - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + if (skb->encapsulation && skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features); else { /* Do software UFO. Complete and fill in the UDP checksum as HW cannot diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index dfcc4be46898..9cf097e206e9 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b930d080c66f..433672d07d0b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->local_df && skb->len > mtu) { + if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; if (xfrm6_local_dontfrag(skb)) @@ -114,7 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - skb->local_df = 1; + skb->ignore_df = 1; return x->outer_mode->output2(x, skb); } @@ -153,7 +153,7 @@ static int __xfrm6_output(struct sk_buff *skb) if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->local_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 41e4e93cb3aa..91729b807c7d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1353,7 +1353,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol, sk_refcnt_debug_inc(sk); sock_init_data(sock, sk); - sk->sk_no_check = 1; /* Checksum off by default */ + sk->sk_no_check_tx = 1; /* Checksum off by default */ sock->ops = &ipx_dgram_ops; rc = 0; out: diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index c1f03185c5e1..67e7ad3d46b1 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -236,7 +236,8 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, } /* Apply checksum. Not allowed on 802.3 links. */ - if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023)) + if (sk->sk_no_check_tx || + intrfc->if_dlink_type == htons(IPX_FRAME_8023)) ipx->ipx_checksum = htons(0xFFFF); else ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr)); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 8c9d7302c846..7a95fa4a3de1 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -682,6 +682,18 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) return NULL; } +static void __iucv_auto_name(struct iucv_sock *iucv) +{ + char name[12]; + + sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); + while (__iucv_get_sock_by_name(name)) { + sprintf(name, "%08x", + atomic_inc_return(&iucv_sk_list.autobind_name)); + } + memcpy(iucv->src_name, name, 8); +} + /* Bind an unbound socket */ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) @@ -724,8 +736,12 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if (!memcmp(dev->perm_addr, uid, 8)) { - memcpy(iucv->src_name, sa->siucv_name, 8); memcpy(iucv->src_user_id, sa->siucv_user_id, 8); + /* Check for unitialized siucv_name */ + if (strncmp(sa->siucv_name, " ", 8) == 0) + __iucv_auto_name(iucv); + else + memcpy(iucv->src_name, sa->siucv_name, 8); sk->sk_bound_dev_if = dev->ifindex; iucv->hs_dev = dev; dev_hold(dev); @@ -763,7 +779,6 @@ done: static int iucv_sock_autobind(struct sock *sk) { struct iucv_sock *iucv = iucv_sk(sk); - char name[12]; int err = 0; if (unlikely(!pr_iucv)) @@ -772,17 +787,9 @@ static int iucv_sock_autobind(struct sock *sk) memcpy(iucv->src_user_id, iucv_userid, 8); write_lock_bh(&iucv_sk_list.lock); - - sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); - while (__iucv_get_sock_by_name(name)) { - sprintf(name, "%08x", - atomic_inc_return(&iucv_sk_list.autobind_name)); - } - + __iucv_auto_name(iucv); write_unlock_bh(&iucv_sk_list.lock); - memcpy(&iucv->src_name, name, 8); - if (!iucv->msglimit) iucv->msglimit = IUCV_QUEUELEN_DEFAULT; @@ -1936,11 +1943,10 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) sk_acceptq_is_full(sk) || !nsk) { /* error on server socket - connection refused */ - if (nsk) - sk_free(nsk); afiucv_swap_src_dest(skb); trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; err = dev_queue_xmit(skb); + iucv_sock_kill(nsk); bh_unlock_sock(sk); goto out; } diff --git a/net/key/af_key.c b/net/key/af_key.c index f3c83073afc4..ba2a2f95911c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1476,9 +1476,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else err = xfrm_state_update(x); - xfrm_audit_state_add(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -1532,9 +1530,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: - xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; @@ -1726,17 +1722,13 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m struct net *net = sock_net(sk); unsigned int proto; struct km_event c; - struct xfrm_audit audit_info; int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = 0; - err = xfrm_state_flush(net, proto, &audit_info); + err = xfrm_state_flush(net, proto, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ @@ -2288,9 +2280,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); - xfrm_audit_policy_add(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) goto out; @@ -2372,9 +2362,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (xp == NULL) return -ENOENT; - xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; @@ -2553,7 +2541,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, sel.sport_mask = htons(0xffff); /* set destination address info of selector */ - sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1], + sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); @@ -2622,9 +2610,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ return -ENOENT; if (delete) { - xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; @@ -2733,13 +2719,9 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad { struct net *net = sock_net(sk); struct km_event c; - struct xfrm_audit audit_info; int err, err2; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = 0; - err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a4e37d7158dc..bea259043205 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -495,52 +495,6 @@ out: spin_unlock_bh(&session->reorder_q.lock); } -static inline int l2tp_verify_udp_checksum(struct sock *sk, - struct sk_buff *skb) -{ - struct udphdr *uh = udp_hdr(skb); - u16 ulen = ntohs(uh->len); - __wsum psum; - - if (sk->sk_no_check || skb_csum_unnecessary(skb)) - return 0; - -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { - if (!uh->check) { - LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); - return 1; - } - if ((skb->ip_summed == CHECKSUM_COMPLETE) && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, ulen, - IPPROTO_UDP, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, IPPROTO_UDP, - 0)); - } else -#endif - { - struct inet_sock *inet; - if (!uh->check) - return 0; - inet = inet_sk(sk); - psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, - ulen, IPPROTO_UDP, 0); - - if ((skb->ip_summed == CHECKSUM_COMPLETE) && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - skb->csum = psum; - } - - return __skb_checksum_complete(skb); -} - static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) { u32 nws; @@ -895,8 +849,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, u16 version; int length; - if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) - goto discard_bad_csum; + /* UDP has verifed checksum */ /* UDP always verifies the packet length. */ __skb_pull(skb, sizeof(struct udphdr)); @@ -979,14 +932,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, return 0; -discard_bad_csum: - LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); - UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - atomic_long_inc(&tunnel->stats.rx_errors); - kfree_skb(skb); - - return 0; - error: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); @@ -1128,7 +1073,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, } /* Queue the packet to IP for output */ - skb->local_df = 1; + skb->ignore_df = 1; #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(tunnel->sock, skb, NULL); @@ -1150,31 +1095,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, return 0; } -#if IS_ENABLED(CONFIG_IPV6) -static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, - int udp_len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct udphdr *uh = udp_hdr(skb); - - if (!skb_dst(skb) || !skb_dst(skb)->dev || - !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - __wsum csum = skb_checksum(skb, 0, udp_len, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, - IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, - udp_len, IPPROTO_UDP, 0); - } -} -#endif - /* If caller requires the skb to have a ppp header, the header must be * inserted in the skb data before calling this function. */ @@ -1186,7 +1106,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len struct flowi *fl; struct udphdr *uh; struct inet_sock *inet; - __wsum csum; int headroom; int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; int udp_len; @@ -1235,33 +1154,17 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len uh->dest = inet->inet_dport; udp_len = uhlen + hdr_len + data_len; uh->len = htons(udp_len); - uh->check = 0; /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) - l2tp_xmit_ipv6_csum(sk, skb, udp_len); + udp6_set_csum(udp_get_no_check6_tx(sk), + skb, &inet6_sk(sk)->saddr, + &sk->sk_v6_daddr, udp_len); else #endif - if (sk->sk_no_check == UDP_CSUM_NOXMIT) - skb->ip_summed = CHECKSUM_NONE; - else if ((skb_dst(skb) && skb_dst(skb)->dev) && - (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) { - skb->ip_summed = CHECKSUM_COMPLETE; - csum = skb_checksum(skb, 0, udp_len, 0); - uh->check = csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, 0); - } + udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr, + inet->inet_daddr, udp_len); break; case L2TP_ENCAPTYPE_IP: @@ -1490,6 +1393,11 @@ static int l2tp_tunnel_sock_create(struct net *net, sizeof(udp6_addr), 0); if (err < 0) goto out; + + if (cfg->udp6_zero_tx_checksums) + udp_set_no_check6_tx(sock->sk, true); + if (cfg->udp6_zero_rx_checksums) + udp_set_no_check6_rx(sock->sk, true); } else #endif { @@ -1518,7 +1426,7 @@ static int l2tp_tunnel_sock_create(struct net *net, } if (!cfg->use_udp_checksums) - sock->sk->sk_no_check = UDP_CSUM_NOXMIT; + sock->sk->sk_no_check_tx = 1; break; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 3f93ccd6ba97..68aa9ffd4ae4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -162,7 +162,9 @@ struct l2tp_tunnel_cfg { #endif u16 local_udp_port; u16 peer_udp_port; - unsigned int use_udp_checksums:1; + unsigned int use_udp_checksums:1, + udp6_zero_tx_checksums:1, + udp6_zero_rx_checksums:1; }; struct l2tp_tunnel { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 3397fe6897c0..369a9822488c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -606,7 +606,6 @@ static struct inet_protosw l2tp_ip_protosw = { .protocol = IPPROTO_L2TP, .prot = &l2tp_ip_prot, .ops = &l2tp_ip_ops, - .no_check = 0, }; static struct net_protocol l2tp_ip_protocol __read_mostly = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 7704ea9502fd..f3f98a156cee 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -605,14 +605,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -761,7 +755,6 @@ static struct inet_protosw l2tp_ip6_protosw = { .protocol = IPPROTO_L2TP, .prot = &l2tp_ip6_prot, .ops = &l2tp_ip6_ops, - .no_check = 0, }; static struct inet6_protocol l2tp_ip6_protocol __read_mostly = { diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bd7387adea9e..0ac907adb2f4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -161,6 +161,13 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); if (info->attrs[L2TP_ATTR_UDP_CSUM]) cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]); + +#if IS_ENABLED(CONFIG_IPV6) + if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]) + cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]) + cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); +#endif } if (info->attrs[L2TP_ATTR_DEBUG]) @@ -297,8 +304,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla case L2TP_ENCAPTYPE_UDP: if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) || - nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, - (sk->sk_no_check != UDP_CSUM_NOXMIT))) + nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) goto nla_put_failure; /* NOBREAK */ case L2TP_ENCAPTYPE_IP: diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 9d7d840aac6d..1e46ffa69167 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -25,7 +25,8 @@ mac80211-y := \ wme.o \ event.o \ chan.o \ - trace.o mlme.o + trace.o mlme.o \ + tdls.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 7c7df475a401..ec24378caaaf 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -23,12 +23,13 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist assoc, pt, ct[2]; - struct { - struct aead_request req; - u8 priv[crypto_aead_reqsize(tfm)]; - } aead_req; - memset(&aead_req, 0, sizeof(aead_req)); + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *) aead_req_data; + + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); @@ -36,23 +37,23 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_set_buf(&ct[0], data, data_len); sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); - aead_request_set_tfm(&aead_req.req, tfm); - aead_request_set_assoc(&aead_req.req, &assoc, assoc.length); - aead_request_set_crypt(&aead_req.req, &pt, ct, data_len, b_0); + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0); - crypto_aead_encrypt(&aead_req.req); + crypto_aead_encrypt(aead_req); } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist assoc, pt, ct[2]; - struct { - struct aead_request req; - u8 priv[crypto_aead_reqsize(tfm)]; - } aead_req; + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *) aead_req_data; - memset(&aead_req, 0, sizeof(aead_req)); + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); @@ -60,12 +61,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_set_buf(&ct[0], data, data_len); sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); - aead_request_set_tfm(&aead_req.req, tfm); - aead_request_set_assoc(&aead_req.req, &assoc, assoc.length); - aead_request_set_crypt(&aead_req.req, ct, &pt, + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, ct, &pt, data_len + IEEE80211_CCMP_MIC_LEN, b_0); - return crypto_aead_decrypt(&aead_req.req); + return crypto_aead_decrypt(aead_req); } struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index aaa59d719592..d7513a503be1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -109,6 +109,15 @@ static int ieee80211_change_iface(struct wiphy *wiphy, static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + mutex_lock(&sdata->local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&sdata->local->chanctx_mtx); + if (ret < 0) + return ret; + return ieee80211_do_open(wdev, true); } @@ -463,8 +472,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + struct rate_control_ref *ref = local->rate_ctrl; struct timespec uptime; u64 packets = 0; + u32 thr = 0; int i, ac; sinfo->generation = sdata->local->sta_generation; @@ -578,6 +589,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); + + /* check if the driver has a SW RC implementation */ + if (ref && ref->ops->get_expected_throughput) + thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); + else + thr = drv_get_expected_throughput(local, &sta->sta); + + if (thr != 0) { + sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT; + sinfo->expected_throughput = thr; + } } static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { @@ -768,7 +790,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy, } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *mac, struct station_info *sinfo) + int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -798,7 +820,7 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -972,13 +994,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; mutex_lock(&local->mtx); - sdata->radar_required = params->radar_required; err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); + if (!err) + ieee80211_vif_copy_chanctx_to_vlans(sdata, false); mutex_unlock(&local->mtx); if (err) return err; - ieee80211_vif_copy_chanctx_to_vlans(sdata, false); /* * Apply control port protocol, this allows us to @@ -1075,6 +1097,31 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, return 0; } +bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->mtx); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + + if (!sdata->vif.csa_active) + continue; + + if (!sdata->csa_block_tx) + continue; + + rcu_read_unlock(); + return true; + } + rcu_read_unlock(); + + return false; +} + static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1092,7 +1139,14 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata); /* abort any running channel switch */ + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); + kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; @@ -1131,8 +1185,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); skb_queue_purge(&sdata->u.ap.ps.bc_buf); - ieee80211_vif_copy_chanctx_to_vlans(sdata, true); mutex_lock(&local->mtx); + ieee80211_vif_copy_chanctx_to_vlans(sdata, true); ieee80211_vif_release_channel(sdata); mutex_unlock(&local->mtx); @@ -1416,7 +1470,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_parameters *params) + const u8 *mac, + struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; @@ -1450,6 +1505,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + } else { + sta->sta.tdls = true; } err = sta_apply_parameters(local, sta, params); @@ -1483,7 +1540,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac) + const u8 *mac) { struct ieee80211_sub_if_data *sdata; @@ -1497,7 +1554,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1566,7 +1623,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sta->sdata->u.vlan.sta) { - rcu_assign_pointer(sta->sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); prev_4addr = true; } @@ -1622,7 +1679,7 @@ out_err: #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst, u8 *next_hop) + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1650,7 +1707,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst) + const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1661,9 +1718,8 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_change_mpath(struct wiphy *wiphy, - struct net_device *dev, - u8 *dst, u8 *next_hop) +static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1755,8 +1811,8 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *dst, u8 *next_hop, - struct mpath_info *pinfo) + int idx, u8 *dst, u8 *next_hop, + struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -2930,7 +2986,6 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, /* whatever, but channel contexts should not complain about that one */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = local->rx_chains; - sdata->radar_required = true; err = ieee80211_vif_use_channel(sdata, chandef, IEEE80211_CHANCTX_SHARED); @@ -3011,26 +3066,11 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_finish); -static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, + u32 *changed) { - struct ieee80211_local *local = sdata->local; - int err, changed = 0; - - sdata_assert_lock(sdata); - - mutex_lock(&local->mtx); - sdata->radar_required = sdata->csa_radar_required; - err = ieee80211_vif_change_channel(sdata, &changed); - mutex_unlock(&local->mtx); - if (WARN_ON(err < 0)) - return; - - if (!local->use_chanctx) { - local->_oper_chandef = sdata->csa_chandef; - ieee80211_hw_config(local, 0); - } + int err; - sdata->vif.csa_active = false; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon); @@ -3038,35 +3078,74 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) sdata->u.ap.next_beacon = NULL; if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; case NL80211_IFTYPE_ADHOC: err = ieee80211_ibss_finish_csa(sdata); if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata); if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; #endif default: WARN_ON(1); - return; + return -EINVAL; } + return 0; +} + +static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u32 changed = 0; + int err; + + sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); + + sdata->radar_required = sdata->csa_radar_required; + err = ieee80211_vif_change_channel(sdata, &changed); + if (err < 0) + return err; + + if (!local->use_chanctx) { + local->_oper_chandef = sdata->csa_chandef; + ieee80211_hw_config(local, 0); + } + + sdata->vif.csa_active = false; + + err = ieee80211_set_after_csa_beacon(sdata, &changed); + if (err) + return err; + ieee80211_bss_info_change_notify(sdata, changed); + cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); - ieee80211_wake_queues_by_reason(&sdata->local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + return 0; +} + +static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +{ + if (__ieee80211_csa_finalize(sdata)) { + sdata_info(sdata, "failed to finalize CSA, disconnecting\n"); + cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, + GFP_KERNEL); + } } void ieee80211_csa_finalize_work(struct work_struct *work) @@ -3074,8 +3153,11 @@ void ieee80211_csa_finalize_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, csa_finalize_work); + struct ieee80211_local *local = sdata->local; sdata_lock(sdata); + mutex_lock(&local->mtx); + /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.csa_active) goto unlock; @@ -3086,6 +3168,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work) ieee80211_csa_finalize(sdata); unlock: + mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -3121,9 +3204,25 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, if (params->count <= 1) break; - sdata->csa_counter_offset_beacon = - params->counter_offset_beacon; - sdata->csa_counter_offset_presp = params->counter_offset_presp; + if ((params->n_counter_offsets_beacon > + IEEE80211_MAX_CSA_COUNTERS_NUM) || + (params->n_counter_offsets_presp > + IEEE80211_MAX_CSA_COUNTERS_NUM)) + return -EINVAL; + + /* make sure we don't have garbage in other counters */ + memset(sdata->csa_counter_offset_beacon, 0, + sizeof(sdata->csa_counter_offset_beacon)); + memset(sdata->csa_counter_offset_presp, 0, + sizeof(sdata->csa_counter_offset_presp)); + + memcpy(sdata->csa_counter_offset_beacon, + params->counter_offsets_beacon, + params->n_counter_offsets_beacon * sizeof(u16)); + memcpy(sdata->csa_counter_offset_presp, + params->counter_offsets_presp, + params->n_counter_offsets_presp * sizeof(u16)); + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa); if (err < 0) { kfree(sdata->u.ap.next_beacon); @@ -3212,16 +3311,18 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_csa_settings *params) +static int +__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; int err, num_chanctx, changed = 0; sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; @@ -3233,23 +3334,24 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, &sdata->vif.bss_conf.chandef)) return -EINVAL; - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) { - rcu_read_unlock(); + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + mutex_unlock(&local->chanctx_mtx); return -EBUSY; } /* don't handle for multi-VIF cases */ - chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { - rcu_read_unlock(); + chanctx = container_of(conf, struct ieee80211_chanctx, conf); + if (ieee80211_chanctx_refcount(local, chanctx) > 1) { + mutex_unlock(&local->chanctx_mtx); return -EBUSY; } num_chanctx = 0; list_for_each_entry_rcu(chanctx, &local->chanctx_list, list) num_chanctx++; - rcu_read_unlock(); + mutex_unlock(&local->chanctx_mtx); if (num_chanctx > 1) return -EBUSY; @@ -3263,15 +3365,16 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return err; sdata->csa_radar_required = params->radar_required; - - if (params->block_tx) - ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_chandef = params->chandef; + sdata->csa_block_tx = params->block_tx; + sdata->csa_current_counter = params->count; sdata->vif.csa_active = true; + if (sdata->csa_block_tx) + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + if (changed) { ieee80211_bss_info_change_notify(sdata, changed); drv_channel_switch_beacon(sdata, ¶ms->chandef); @@ -3283,6 +3386,20 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return 0; } +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int err; + + mutex_lock(&local->mtx); + err = __ieee80211_channel_switch(wiphy, dev, params); + mutex_unlock(&local->mtx); + + return err; +} + static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) @@ -3295,6 +3412,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, bool need_offchan = false; u32 flags; int ret; + u8 *data; if (params->dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; @@ -3388,7 +3506,20 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } skb_reserve(skb, local->hw.extra_tx_headroom); - memcpy(skb_put(skb, params->len), params->buf, params->len); + data = skb_put(skb, params->len); + memcpy(data, params->buf, params->len); + + /* Update CSA counters */ + if (sdata->vif.csa_active && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_ADHOC) && + params->n_csa_offsets) { + int i; + u8 c = sdata->csa_current_counter; + + for (i = 0; i < params->n_csa_offsets; i++) + data[params->csa_offsets[i]] = c; + } IEEE80211_SKB_CB(skb)->flags = flags; @@ -3497,320 +3628,6 @@ static int ieee80211_set_rekey_data(struct wiphy *wiphy, return 0; } -static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) -{ - u8 *pos = (void *)skb_put(skb, 7); - - *pos++ = WLAN_EID_EXT_CAPABILITY; - *pos++ = 5; /* len */ - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; -} - -static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - u16 capab; - - capab = 0; - if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) - return capab; - - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; - - return capab; -} - -static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, - u8 *peer, u8 *bssid) -{ - struct ieee80211_tdls_lnkie *lnkid; - - lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); - - lnkid->ie_type = WLAN_EID_LINK_ID; - lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; - - memcpy(lnkid->bssid, bssid, ETH_ALEN); - memcpy(lnkid->init_sta, src_addr, ETH_ALEN); - memcpy(lnkid->resp_sta, peer, ETH_ALEN); -} - -static int -ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, struct sk_buff *skb) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_tdls_data *tf; - - tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); - - memcpy(tf->da, peer, ETH_ALEN); - memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); - tf->ether_type = cpu_to_be16(ETH_P_TDLS); - tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; - - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_REQUEST; - - skb_put(skb, sizeof(tf->u.setup_req)); - tf->u.setup_req.dialog_token = dialog_token; - tf->u.setup_req.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - case WLAN_TDLS_SETUP_RESPONSE: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_RESPONSE; - - skb_put(skb, sizeof(tf->u.setup_resp)); - tf->u.setup_resp.status_code = cpu_to_le16(status_code); - tf->u.setup_resp.dialog_token = dialog_token; - tf->u.setup_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - case WLAN_TDLS_SETUP_CONFIRM: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_CONFIRM; - - skb_put(skb, sizeof(tf->u.setup_cfm)); - tf->u.setup_cfm.status_code = cpu_to_le16(status_code); - tf->u.setup_cfm.dialog_token = dialog_token; - break; - case WLAN_TDLS_TEARDOWN: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_TEARDOWN; - - skb_put(skb, sizeof(tf->u.teardown)); - tf->u.teardown.reason_code = cpu_to_le16(status_code); - break; - case WLAN_TDLS_DISCOVERY_REQUEST: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; - - skb_put(skb, sizeof(tf->u.discover_req)); - tf->u.discover_req.dialog_token = dialog_token; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int -ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, struct sk_buff *skb) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_mgmt *mgmt; - - mgmt = (void *)skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, peer, ETH_ALEN); - memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); - - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_ACTION); - - switch (action_code) { - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); - mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; - mgmt->u.action.u.tdls_discover_resp.action_code = - WLAN_PUB_ACTION_TDLS_DISCOVER_RES; - mgmt->u.action.u.tdls_discover_resp.dialog_token = - dialog_token; - mgmt->u.action.u.tdls_discover_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, u32 peer_capability, - const u8 *extra_ies, size_t extra_ies_len) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = NULL; - bool send_direct; - int ret; - - if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; - - /* make sure we are in managed mode, and associated */ - if (sdata->vif.type != NL80211_IFTYPE_STATION || - !sdata->u.mgd.associated) - return -EINVAL; - - tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n", - action_code, peer); - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - max(sizeof(struct ieee80211_mgmt), - sizeof(struct ieee80211_tdls_data)) + - 50 + /* supported rates */ - 7 + /* ext capab */ - extra_ies_len + - sizeof(struct ieee80211_tdls_lnkie)); - if (!skb) - return -ENOMEM; - - skb_reserve(skb, local->hw.extra_tx_headroom); - - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_RESPONSE: - case WLAN_TDLS_SETUP_CONFIRM: - case WLAN_TDLS_TEARDOWN: - case WLAN_TDLS_DISCOVERY_REQUEST: - ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, - action_code, dialog_token, - status_code, skb); - send_direct = false; - break; - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, - dialog_token, status_code, - skb); - send_direct = true; - break; - default: - ret = -ENOTSUPP; - break; - } - - if (ret < 0) - goto fail; - - if (extra_ies_len) - memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); - - /* the TDLS link IE is always added last */ - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_CONFIRM: - case WLAN_TDLS_TEARDOWN: - case WLAN_TDLS_DISCOVERY_REQUEST: - /* we are the initiator */ - ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer, - sdata->u.mgd.bssid); - break; - case WLAN_TDLS_SETUP_RESPONSE: - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - /* we are the responder */ - ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr, - sdata->u.mgd.bssid); - break; - default: - ret = -ENOTSUPP; - goto fail; - } - - if (send_direct) { - ieee80211_tx_skb(sdata, skb); - return 0; - } - - /* - * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise - * we should default to AC_VI. - */ - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; - break; - default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; - break; - } - - /* disable bottom halves when entering the Tx path */ - local_bh_disable(); - ret = ieee80211_subif_start_xmit(skb, dev); - local_bh_enable(); - - return ret; - -fail: - dev_kfree_skb(skb); - return ret; -} - -static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, enum nl80211_tdls_operation oper) -{ - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; - - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EINVAL; - - tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); - - switch (oper) { - case NL80211_TDLS_ENABLE_LINK: - rcu_read_lock(); - sta = sta_info_get(sdata, peer); - if (!sta) { - rcu_read_unlock(); - return -ENOLINK; - } - - set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); - rcu_read_unlock(); - break; - case NL80211_TDLS_DISABLE_LINK: - return sta_info_destroy_addr(sdata, peer); - case NL80211_TDLS_TEARDOWN: - case NL80211_TDLS_SETUP: - case NL80211_TDLS_DISCOVERY_REQ: - /* We don't support in-driver setup/teardown/discovery */ - return -ENOTSUPP; - default: - return -ENOTSUPP; - } - - return 0; -} - static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie) { @@ -3949,6 +3766,21 @@ static int ieee80211_set_qos_map(struct wiphy *wiphy, return 0; } +static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int ret; + u32 changed = 0; + + ret = ieee80211_vif_change_bandwidth(sdata, chandef, &changed); + if (ret == 0) + ieee80211_bss_info_change_notify(sdata, changed); + + return ret; +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4029,4 +3861,5 @@ const struct cfg80211_ops mac80211_config_ops = { .start_radar_detection = ieee80211_start_radar_detection, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, + .set_ap_chanwidth = ieee80211_set_ap_chanwidth, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 75b5dd2c9267..a310e33972de 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -9,6 +9,170 @@ #include "ieee80211_i.h" #include "driver-ops.h" +static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list) + num++; + + return num; +} + +static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list) + num++; + + return num; +} + +int ieee80211_chanctx_refcount(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + return ieee80211_chanctx_num_assigned(local, ctx) + + ieee80211_chanctx_num_reserved(local, ctx); +} + +static int ieee80211_num_chanctx(struct ieee80211_local *local) +{ + struct ieee80211_chanctx *ctx; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(ctx, &local->chanctx_list, list) + num++; + + return num; +} + +static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local) +{ + lockdep_assert_held(&local->chanctx_mtx); + return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local); +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->reserved_vifs, + reserved_chanctx_list) { + if (!compat) + compat = &sdata->reserved_chandef; + + compat = cfg80211_chandef_compatible(&sdata->reserved_chandef, + compat); + if (!compat) + break; + } + + return compat; +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->assigned_vifs, + assigned_chanctx_list) { + if (sdata->reserved_chanctx != NULL) + continue; + + if (!compat) + compat = &sdata->vif.bss_conf.chandef; + + compat = cfg80211_chandef_compatible( + &sdata->vif.bss_conf.chandef, compat); + if (!compat) + break; + } + + return compat; +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_combined_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + lockdep_assert_held(&local->chanctx_mtx); + + compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat); + if (!compat) + return NULL; + + compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat); + if (!compat) + return NULL; + + return compat; +} + +static bool +ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *def) +{ + lockdep_assert_held(&local->chanctx_mtx); + + if (ieee80211_chanctx_combined_chandef(local, ctx, def)) + return true; + + if (!list_empty(&ctx->reserved_vifs) && + ieee80211_chanctx_reserved_chandef(local, ctx, def)) + return true; + + return false; +} + +static struct ieee80211_chanctx * +ieee80211_find_reservation_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) +{ + struct ieee80211_chanctx *ctx; + + lockdep_assert_held(&local->chanctx_mtx); + + if (mode == IEEE80211_CHANCTX_EXCLUSIVE) + return NULL; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) + continue; + + if (!ieee80211_chanctx_can_reserve_chandef(local, ctx, + chandef)) + continue; + + return ctx; + } + + return NULL; +} + static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta) { switch (sta->bandwidth) { @@ -190,6 +354,11 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (!compat) continue; + compat = ieee80211_chanctx_reserved_chandef(local, ctx, + compat); + if (!compat) + continue; + ieee80211_change_chanctx(local, ctx, compat); return ctx; @@ -217,62 +386,91 @@ static bool ieee80211_is_radar_required(struct ieee80211_local *local) } static struct ieee80211_chanctx * -ieee80211_new_chanctx(struct ieee80211_local *local, - const struct cfg80211_chan_def *chandef, - enum ieee80211_chanctx_mode mode) +ieee80211_alloc_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; - u32 changed; - int err; lockdep_assert_held(&local->chanctx_mtx); ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL); if (!ctx) - return ERR_PTR(-ENOMEM); + return NULL; + INIT_LIST_HEAD(&ctx->assigned_vifs); + INIT_LIST_HEAD(&ctx->reserved_vifs); ctx->conf.def = *chandef; ctx->conf.rx_chains_static = 1; ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = ieee80211_is_radar_required(local); ieee80211_recalc_chanctx_min_def(local, ctx); + + return ctx; +} + +static int ieee80211_add_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + u32 changed; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; - /* we hold the mutex to prevent idle from changing */ - lockdep_assert_held(&local->mtx); /* turn idle off *before* setting channel -- some drivers need that */ changed = ieee80211_idle_off(local); if (changed) ieee80211_hw_config(local, changed); if (!local->use_chanctx) { - local->_oper_chandef = *chandef; + local->_oper_chandef = ctx->conf.def; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } else { err = drv_add_chanctx(local, ctx); if (err) { - kfree(ctx); ieee80211_recalc_idle(local); - return ERR_PTR(err); + return err; } } - /* and keep the mutex held until the new chanctx is on the list */ - list_add_rcu(&ctx->list, &local->chanctx_list); + return 0; +} +static struct ieee80211_chanctx * +ieee80211_new_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) +{ + struct ieee80211_chanctx *ctx; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + ctx = ieee80211_alloc_chanctx(local, chandef, mode); + if (!ctx) + return ERR_PTR(-ENOMEM); + + err = ieee80211_add_chanctx(local, ctx); + if (err) { + kfree(ctx); + return ERR_PTR(err); + } + + list_add_rcu(&ctx->list, &local->chanctx_list); return ctx; } -static void ieee80211_free_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx) +static void ieee80211_del_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { - bool check_single_channel = false; lockdep_assert_held(&local->chanctx_mtx); - WARN_ON_ONCE(ctx->refcount != 0); - if (!local->use_chanctx) { struct cfg80211_chan_def *chandef = &local->_oper_chandef; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; @@ -282,8 +480,9 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, /* NOTE: Disabling radar is only valid here for * single channel context. To be sure, check it ... */ - if (local->hw.conf.radar_enabled) - check_single_channel = true; + WARN_ON(local->hw.conf.radar_enabled && + !list_empty(&local->chanctx_list)); + local->hw.conf.radar_enabled = false; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); @@ -291,39 +490,19 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, drv_remove_chanctx(local, ctx); } - list_del_rcu(&ctx->list); - kfree_rcu(ctx, rcu_head); - - /* throw a warning if this wasn't the only channel context. */ - WARN_ON(check_single_channel && !list_empty(&local->chanctx_list)); - ieee80211_recalc_idle(local); } -static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_chanctx *ctx) +static void ieee80211_free_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { - struct ieee80211_local *local = sdata->local; - int ret; - lockdep_assert_held(&local->chanctx_mtx); - ret = drv_assign_vif_chanctx(local, sdata, ctx); - if (ret) - return ret; + WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0); - rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf); - ctx->refcount++; - - ieee80211_recalc_txpower(sdata); - ieee80211_recalc_chanctx_min_def(local, ctx); - sdata->vif.bss_conf.idle = false; - - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); - - return 0; + list_del_rcu(&ctx->list); + ieee80211_del_chanctx(local, ctx); + kfree_rcu(ctx, rcu_head); } static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, @@ -384,30 +563,58 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); } -static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_chanctx *ctx) +static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chanctx *new_ctx) { struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *curr_ctx = NULL; + int ret = 0; - lockdep_assert_held(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); - ctx->refcount--; - rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + if (conf) { + curr_ctx = container_of(conf, struct ieee80211_chanctx, conf); - sdata->vif.bss_conf.idle = true; + drv_unassign_vif_chanctx(local, sdata, curr_ctx); + conf = NULL; + list_del(&sdata->assigned_chanctx_list); + } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + if (new_ctx) { + ret = drv_assign_vif_chanctx(local, sdata, new_ctx); + if (ret) + goto out; - drv_unassign_vif_chanctx(local, sdata, ctx); + conf = &new_ctx->conf; + list_add(&sdata->assigned_chanctx_list, + &new_ctx->assigned_vifs); + } + +out: + rcu_assign_pointer(sdata->vif.chanctx_conf, conf); + + sdata->vif.bss_conf.idle = !conf; + + if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) { + ieee80211_recalc_chanctx_chantype(local, curr_ctx); + ieee80211_recalc_smps_chanctx(local, curr_ctx); + ieee80211_recalc_radar_chanctx(local, curr_ctx); + ieee80211_recalc_chanctx_min_def(local, curr_ctx); + } - if (ctx->refcount > 0) { - ieee80211_recalc_chanctx_chantype(sdata->local, ctx); - ieee80211_recalc_smps_chanctx(local, ctx); - ieee80211_recalc_radar_chanctx(local, ctx); - ieee80211_recalc_chanctx_min_def(local, ctx); + if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) { + ieee80211_recalc_txpower(sdata); + ieee80211_recalc_chanctx_min_def(local, new_ctx); } + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_bss_info_change_notify(sdata, + BSS_CHANGED_IDLE); + + return ret; } static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) @@ -425,8 +632,11 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ctx = container_of(conf, struct ieee80211_chanctx, conf); - ieee80211_unassign_vif_chanctx(sdata, ctx); - if (ctx->refcount == 0) + if (sdata->reserved_chanctx) + ieee80211_vif_unreserve_chanctx(sdata); + + ieee80211_assign_vif_chanctx(sdata, NULL); + if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); } @@ -526,6 +736,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *ctx; + u8 radar_detect_width = 0; int ret; lockdep_assert_held(&local->mtx); @@ -533,6 +744,22 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); mutex_lock(&local->chanctx_mtx); + + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, + chandef, + sdata->wdev.iftype); + if (ret < 0) + goto out; + if (ret > 0) + radar_detect_width = BIT(chandef->width); + + sdata->radar_required = ret; + + ret = ieee80211_check_combinations(sdata, chandef, mode, + radar_detect_width); + if (ret < 0) + goto out; + __ieee80211_vif_release_channel(sdata); ctx = ieee80211_find_chanctx(local, chandef, mode); @@ -548,7 +775,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, ret = ieee80211_assign_vif_chanctx(sdata, ctx); if (ret) { /* if assign fails refcount stays the same */ - if (ctx->refcount == 0) + if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); goto out; } @@ -560,15 +787,47 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, return ret; } +static int __ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chanctx *ctx, + u32 *changed) +{ + struct ieee80211_local *local = sdata->local; + const struct cfg80211_chan_def *chandef = &sdata->csa_chandef; + u32 chanctx_changed = 0; + + if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) + return -EINVAL; + + if (ieee80211_chanctx_refcount(local, ctx) != 1) + return -EINVAL; + + if (sdata->vif.bss_conf.chandef.width != chandef->width) { + chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH; + *changed |= BSS_CHANGED_BANDWIDTH; + } + + sdata->vif.bss_conf.chandef = *chandef; + ctx->conf.def = *chandef; + + chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL; + drv_change_chanctx(local, ctx, chanctx_changed); + + ieee80211_recalc_chanctx_chantype(local, ctx); + ieee80211_recalc_smps_chanctx(local, ctx); + ieee80211_recalc_radar_chanctx(local, ctx); + ieee80211_recalc_chanctx_min_def(local, ctx); + + return 0; +} + int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, u32 *changed) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; - const struct cfg80211_chan_def *chandef = &sdata->csa_chandef; int ret; - u32 chanctx_changed = 0; lockdep_assert_held(&local->mtx); @@ -576,11 +835,94 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!sdata->vif.csa_active)) return -EINVAL; - if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + ret = -EINVAL; + goto out; + } + + ctx = container_of(conf, struct ieee80211_chanctx, conf); + + ret = __ieee80211_vif_change_channel(sdata, ctx, changed); + out: + mutex_unlock(&local->chanctx_mtx); + return ret; +} + +static void +__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) +{ + struct ieee80211_local *local __maybe_unused = sdata->local; + struct ieee80211_sub_if_data *vlan; + struct ieee80211_chanctx_conf *conf; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) + return; + + lockdep_assert_held(&local->mtx); + + /* Check that conf exists, even when clearing this function + * must be called with the AP's channel context still there + * as it would otherwise cause VLANs to have an invalid + * channel context pointer for a while, possibly pointing + * to a channel context that has already been freed. + */ + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + WARN_ON(!conf); + + if (clear) + conf = NULL; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_assign_pointer(vlan->vif.chanctx_conf, conf); +} + +void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) +{ + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->chanctx_mtx); + + __ieee80211_vif_copy_chanctx_to_vlans(sdata, clear); + + mutex_unlock(&local->chanctx_mtx); +} + +int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_chanctx *ctx = sdata->reserved_chanctx; + + lockdep_assert_held(&sdata->local->chanctx_mtx); + + if (WARN_ON(!ctx)) return -EINVAL; + list_del(&sdata->reserved_chanctx_list); + sdata->reserved_chanctx = NULL; + + if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) + ieee80211_free_chanctx(sdata->local, ctx); + + return 0; +} + +int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode, + bool radar_required) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *new_ctx, *curr_ctx; + int ret = 0; + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (!conf) { @@ -588,30 +930,108 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, goto out; } - ctx = container_of(conf, struct ieee80211_chanctx, conf); - if (ctx->refcount != 1) { + curr_ctx = container_of(conf, struct ieee80211_chanctx, conf); + + new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode); + if (!new_ctx) { + if (ieee80211_chanctx_refcount(local, curr_ctx) == 1 && + (local->hw.flags & IEEE80211_HW_CHANGE_RUNNING_CHANCTX)) { + /* if we're the only users of the chanctx and + * the driver supports changing a running + * context, reserve our current context + */ + new_ctx = curr_ctx; + } else if (ieee80211_can_create_new_chanctx(local)) { + /* create a new context and reserve it */ + new_ctx = ieee80211_new_chanctx(local, chandef, mode); + if (IS_ERR(new_ctx)) { + ret = PTR_ERR(new_ctx); + goto out; + } + } else { + ret = -EBUSY; + goto out; + } + } + + list_add(&sdata->reserved_chanctx_list, &new_ctx->reserved_vifs); + sdata->reserved_chanctx = new_ctx; + sdata->reserved_chandef = *chandef; + sdata->reserved_radar_required = radar_required; +out: + mutex_unlock(&local->chanctx_mtx); + return ret; +} + +int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata, + u32 *changed) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx *ctx; + struct ieee80211_chanctx *old_ctx; + struct ieee80211_chanctx_conf *conf; + int ret; + u32 tmp_changed = *changed; + + /* TODO: need to recheck if the chandef is usable etc.? */ + + lockdep_assert_held(&local->mtx); + + mutex_lock(&local->chanctx_mtx); + + ctx = sdata->reserved_chanctx; + if (WARN_ON(!ctx)) { ret = -EINVAL; goto out; } - if (sdata->vif.bss_conf.chandef.width != chandef->width) { - chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH; - *changed |= BSS_CHANGED_BANDWIDTH; + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + ret = -EINVAL; + goto out; } - sdata->vif.bss_conf.chandef = *chandef; - ctx->conf.def = *chandef; + old_ctx = container_of(conf, struct ieee80211_chanctx, conf); - chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL; - drv_change_chanctx(local, ctx, chanctx_changed); + if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width) + tmp_changed |= BSS_CHANGED_BANDWIDTH; + + sdata->vif.bss_conf.chandef = sdata->reserved_chandef; + + /* unref our reservation */ + sdata->reserved_chanctx = NULL; + sdata->radar_required = sdata->reserved_radar_required; + list_del(&sdata->reserved_chanctx_list); + + if (old_ctx == ctx) { + /* This is our own context, just change it */ + ret = __ieee80211_vif_change_channel(sdata, old_ctx, + &tmp_changed); + if (ret) + goto out; + } else { + ret = ieee80211_assign_vif_chanctx(sdata, ctx); + if (ieee80211_chanctx_refcount(local, old_ctx) == 0) + ieee80211_free_chanctx(local, old_ctx); + if (ret) { + /* if assign fails refcount stays the same */ + if (ieee80211_chanctx_refcount(local, ctx) == 0) + ieee80211_free_chanctx(local, ctx); + goto out; + } + + if (sdata->vif.type == NL80211_IFTYPE_AP) + __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + } + + *changed = tmp_changed; ieee80211_recalc_chanctx_chantype(local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); ieee80211_recalc_chanctx_min_def(local, ctx); - - ret = 0; - out: +out: mutex_unlock(&local->chanctx_mtx); return ret; } @@ -695,40 +1115,6 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } -void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, - bool clear) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_sub_if_data *vlan; - struct ieee80211_chanctx_conf *conf; - - ASSERT_RTNL(); - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) - return; - - mutex_lock(&local->chanctx_mtx); - - /* - * Check that conf exists, even when clearing this function - * must be called with the AP's channel context still there - * as it would otherwise cause VLANs to have an invalid - * channel context pointer for a while, possibly pointing - * to a channel context that has already been freed. - */ - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - WARN_ON(!conf); - - if (clear) - conf = NULL; - - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - rcu_assign_pointer(vlan->vif.chanctx_conf, conf); - - mutex_unlock(&local->chanctx_mtx); -} - void ieee80211_iter_chan_contexts_atomic( struct ieee80211_hw *hw, void (*iter)(struct ieee80211_hw *hw, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index fa16e54980a1..0e963bc1ceac 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -128,7 +128,7 @@ static ssize_t sta_tx_latency_stat_write(struct file *file, if (!strcmp(buf, TX_LATENCY_DISABLED)) { if (!tx_latency) goto unlock; - rcu_assign_pointer(local->tx_latency, NULL); + RCU_INIT_POINTER(local->tx_latency, NULL); synchronize_rcu(); kfree(tx_latency); goto unlock; diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h index 214ed4ecd739..60c35afee29d 100644 --- a/net/mac80211/debugfs.h +++ b/net/mac80211/debugfs.h @@ -1,6 +1,8 @@ #ifndef __MAC80211_DEBUGFS_H #define __MAC80211_DEBUGFS_H +#include "ieee80211_i.h" + #ifdef CONFIG_MAC80211_DEBUGFS void debugfs_hw_add(struct ieee80211_local *local); int __printf(4, 5) mac80211_format_buffer(char __user *userbuf, size_t count, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 40a648938985..e205ebabfa50 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -34,8 +34,7 @@ static ssize_t ieee80211_if_read( ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -62,8 +61,7 @@ static ssize_t ieee80211_if_write( ret = -ENODEV; rtnl_lock(); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*write)(sdata, buf, count); + ret = (*write)(sdata, buf, count); rtnl_unlock(); return ret; diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index 79025e79f4d6..9f5501a9a795 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -3,6 +3,8 @@ #ifndef __IEEE80211_DEBUGFS_NETDEV_H #define __IEEE80211_DEBUGFS_NETDEV_H +#include "ieee80211_i.h" + #ifdef CONFIG_MAC80211_DEBUGFS void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index fc689f5d971e..bd782dcffcc7 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -5,11 +5,11 @@ #include "ieee80211_i.h" #include "trace.h" -static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) +static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) { - WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), - "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", - sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); + return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), + "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); } static inline struct ieee80211_sub_if_data * @@ -168,7 +168,8 @@ static inline int drv_change_interface(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_change_interface(local, sdata, type, p2p); ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p); @@ -181,7 +182,8 @@ static inline void drv_remove_interface(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); @@ -219,7 +221,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, sdata->vif.type == NL80211_IFTYPE_MONITOR)) return; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_bss_info_changed(local, sdata, info, changed); if (local->ops->bss_info_changed) @@ -278,7 +281,8 @@ static inline int drv_set_key(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_key(local, cmd, sdata, sta, key); ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); @@ -298,7 +302,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, ista = &sta->sta; sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); if (local->ops->update_tkip_key) @@ -315,7 +320,8 @@ static inline int drv_hw_scan(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_hw_scan(local, sdata); ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); @@ -328,7 +334,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_cancel_hw_scan(local, sdata); local->ops->cancel_hw_scan(&local->hw, &sdata->vif); @@ -345,7 +352,8 @@ drv_sched_scan_start(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_start(local, sdata); ret = local->ops->sched_scan_start(&local->hw, &sdata->vif, @@ -361,7 +369,8 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_stop(local, sdata); ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif); @@ -462,7 +471,8 @@ static inline void drv_sta_notify(struct ieee80211_local *local, struct ieee80211_sta *sta) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_notify(local, sdata, cmd, sta); if (local->ops->sta_notify) @@ -479,7 +489,8 @@ static inline int drv_sta_add(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_add(local, sdata, sta); if (local->ops->sta_add) @@ -497,7 +508,8 @@ static inline void drv_sta_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_remove(local, sdata, sta); if (local->ops->sta_remove) @@ -515,7 +527,8 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; if (local->ops->sta_add_debugfs) local->ops->sta_add_debugfs(&local->hw, &sdata->vif, @@ -545,7 +558,8 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta); if (local->ops->sta_pre_rcu_remove) @@ -566,7 +580,8 @@ int drv_sta_state(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state); if (local->ops->sta_state) { @@ -590,7 +605,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, struct ieee80211_sta *sta, u32 changed) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && (sdata->vif.type != NL80211_IFTYPE_ADHOC && @@ -612,7 +628,8 @@ static inline int drv_conf_tx(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_conf_tx(local, sdata, ac, params); if (local->ops->conf_tx) @@ -629,7 +646,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return ret; trace_drv_get_tsf(local, sdata); if (local->ops->get_tsf) @@ -644,7 +662,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_tsf(local, sdata, tsf); if (local->ops->set_tsf) @@ -657,7 +676,8 @@ static inline void drv_reset_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_reset_tsf(local, sdata); if (local->ops->reset_tsf) @@ -689,7 +709,8 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); @@ -726,13 +747,19 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) } static inline void drv_flush(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, u32 queues, bool drop) { + struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; + might_sleep(); + if (sdata && !check_sdata_in_driver(sdata)) + return; + trace_drv_flush(local, queues, drop); if (local->ops->flush) - local->ops->flush(&local->hw, queues, drop); + local->ops->flush(&local->hw, vif, queues, drop); trace_drv_return_void(local); } @@ -848,7 +875,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_bitrate_mask(local, sdata, mask); if (local->ops->set_bitrate_mask) @@ -863,7 +891,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_rekey_data(local, sdata, data); if (local->ops->set_rekey_data) @@ -931,7 +960,8 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); trace_drv_mgd_prepare_tx(local, sdata); @@ -958,6 +988,9 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, static inline void drv_remove_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { + if (WARN_ON(!ctx->driver_present)) + return; + trace_drv_remove_chanctx(local, ctx); if (local->ops->remove_chanctx) local->ops->remove_chanctx(&local->hw, &ctx->conf); @@ -983,7 +1016,8 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_assign_vif_chanctx(local, sdata, ctx); if (local->ops->assign_vif_chanctx) { @@ -1001,7 +1035,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_unassign_vif_chanctx(local, sdata, ctx); if (local->ops->unassign_vif_chanctx) { @@ -1013,12 +1048,66 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int +drv_switch_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode) +{ + int ret = 0; + int i; + + if (!local->ops->switch_vif_chanctx) + return -EOPNOTSUPP; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + WARN_ON_ONCE(!old_ctx->driver_present); + WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS && + new_ctx->driver_present) || + (mode == CHANCTX_SWMODE_REASSIGN_VIF && + !new_ctx->driver_present)); + } + + trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode); + ret = local->ops->switch_vif_chanctx(&local->hw, + vifs, n_vifs, mode); + trace_drv_return_int(local, ret); + + if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) { + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + new_ctx->driver_present = true; + old_ctx->driver_present = false; + } + } + + return ret; +} + static inline int drv_start_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf); if (local->ops->start_ap) @@ -1030,7 +1119,8 @@ static inline int drv_start_ap(struct ieee80211_local *local, static inline void drv_stop_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_stop_ap(local, sdata); if (local->ops->stop_ap) @@ -1053,7 +1143,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int key_idx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(key_idx < -1 || key_idx > 3); @@ -1095,7 +1186,8 @@ static inline int drv_join_ibss(struct ieee80211_local *local, int ret = 0; might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf); if (local->ops->join_ibss) @@ -1108,7 +1200,8 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_leave_ibss(local, sdata); if (local->ops->leave_ibss) @@ -1116,4 +1209,17 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, + struct ieee80211_sta *sta) +{ + u32 ret = 0; + + trace_drv_get_expected_throughput(sta); + if (local->ops->get_expected_throughput) + ret = local->ops->get_expected_throughput(sta); + trace_drv_return_u32(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index c150b68436d7..15702ff64a4c 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -31,6 +31,18 @@ static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa, } } +static void __check_htcap_enable(struct ieee80211_ht_cap *ht_capa, + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_sta_ht_cap *ht_cap, + u16 flag) +{ + __le16 le_flag = cpu_to_le16(flag); + + if ((ht_capa_mask->cap_info & le_flag) && + (ht_capa->cap_info & le_flag)) + ht_cap->cap |= flag; +} + void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap) { @@ -59,7 +71,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, smask = (u8 *)(&ht_capa_mask->mcs.rx_mask); /* NOTE: If you add more over-rides here, update register_hw - * ht_capa_mod_msk logic in main.c as well. + * ht_capa_mod_mask logic in main.c as well. * And, if this method can ever change ht_cap.ht_supported, fix * the check in ieee80211_add_ht_ie. */ @@ -86,6 +98,14 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); + /* Allow user to disable LDPC */ + __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, + IEEE80211_HT_CAP_LDPC_CODING); + + /* Allow user to enable 40 MHz intolerant bit. */ + __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, + IEEE80211_HT_CAP_40MHZ_INTOLERANT); + /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR) { diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 06d28787945b..18ee0a256b1e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -143,7 +143,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = csa_settings->block_tx ? 1 : 0; *pos++ = ieee80211_frequency_to_channel( csa_settings->chandef.chan->center_freq); - sdata->csa_counter_offset_beacon = (pos - presp->head); + sdata->csa_counter_offset_beacon[0] = (pos - presp->head); *pos++ = csa_settings->count; } @@ -228,7 +228,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; enum nl80211_bss_scan_width scan_width; bool have_higher_than_11mbit; - bool radar_required = false; + bool radar_required; int err; sdata_assert_lock(sdata); @@ -253,7 +253,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, presp = rcu_dereference_protected(ifibss->presp, lockdep_is_held(&sdata->wdev.mtx)); - rcu_assign_pointer(ifibss->presp, NULL); + RCU_INIT_POINTER(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); @@ -262,7 +262,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* make a copy of the chandef, it could be modified below. */ chandef = *req_chandef; chan = chandef.chan; - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { if (chandef.width == NL80211_CHAN_WIDTH_5 || chandef.width == NL80211_CHAN_WIDTH_10 || chandef.width == NL80211_CHAN_WIDTH_20_NOHT || @@ -274,7 +275,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; /* check again for downgraded chandef */ - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); return; @@ -282,21 +284,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &chandef); + &chandef, NL80211_IFTYPE_ADHOC); if (err < 0) { sdata_info(sdata, "Failed to join IBSS, invalid chandef\n"); return; } - if (err > 0) { - if (!ifibss->userspace_handles_dfs) { - sdata_info(sdata, - "Failed to join IBSS, DFS channel without control program\n"); - return; - } - radar_required = true; + if (err > 0 && !ifibss->userspace_handles_dfs) { + sdata_info(sdata, + "Failed to join IBSS, DFS channel without control program\n"); + return; } + radar_required = err; + mutex_lock(&local->mtx); if (ieee80211_vif_use_channel(sdata, &chandef, ifibss->fixed_channel ? @@ -775,7 +776,8 @@ static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) * unavailable. */ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &ifibss->chandef); + &ifibss->chandef, + NL80211_IFTYPE_ADHOC); if (err > 0) cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef, GFP_ATOMIC); @@ -861,7 +863,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, goto disconnect; } - if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef)) { + if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", ifibss->bssid, @@ -873,17 +876,17 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_ADHOC); if (err < 0) goto disconnect; - if (err) { + if (err > 0 && !ifibss->userspace_handles_dfs) { /* IBSS-DFS only allowed with a control program */ - if (!ifibss->userspace_handles_dfs) - goto disconnect; - - params.radar_required = true; + goto disconnect; } + params.radar_required = err; + if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { ibss_dbg(sdata, @@ -1636,7 +1639,33 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, u32 changed = 0; u32 rate_flags; struct ieee80211_supported_band *sband; + enum ieee80211_chanctx_mode chanmode; + struct ieee80211_local *local = sdata->local; + int radar_detect_width = 0; int i; + int ret; + + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, + ¶ms->chandef, + sdata->wdev.iftype); + if (ret < 0) + return ret; + + if (ret > 0) { + if (!params->userspace_handles_dfs) + return -EINVAL; + radar_detect_width = BIT(params->chandef.width); + } + + chanmode = (params->channel_fixed && !ret) ? + IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE; + + mutex_lock(&local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, ¶ms->chandef, chanmode, + radar_detect_width); + mutex_unlock(&local->chanctx_mtx); + if (ret < 0) + return ret; if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); @@ -1648,10 +1677,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.control_port = params->control_port; sdata->u.ibss.userspace_handles_dfs = params->userspace_handles_dfs; sdata->u.ibss.basic_rates = params->basic_rates; + sdata->u.ibss.last_scan_completed = jiffies; /* fix basic_rates if channel does not support these rates */ rate_flags = ieee80211_chandef_rate_flags(¶ms->chandef); - sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band]; + sband = local->hw.wiphy->bands[params->chandef.chan->band]; for (i = 0; i < sband->n_bitrates; i++) { if ((rate_flags & sband->bitrates[i].flags) != rate_flags) sdata->u.ibss.basic_rates &= ~BIT(i); @@ -1700,9 +1730,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, changed); sdata->smps_mode = IEEE80211_SMPS_OFF; - sdata->needed_rx_chains = sdata->local->rx_chains; + sdata->needed_rx_chains = local->rx_chains; - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + ieee80211_queue_work(&local->hw, &sdata->work); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f169b6ee94ee..ac9836e0aab3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -260,7 +260,7 @@ struct ieee80211_if_ap { /* to be used after channel switch. */ struct cfg80211_beacon_data *next_beacon; - struct list_head vlans; + struct list_head vlans; /* write-protected with RTNL and local->mtx */ struct ps_data ps; atomic_t num_mcast_sta; /* number of stations receiving multicast */ @@ -276,7 +276,7 @@ struct ieee80211_if_wds { }; struct ieee80211_if_vlan { - struct list_head list; + struct list_head list; /* write-protected with RTNL and local->mtx */ /* used for all tx if the VLAN is configured to 4-addr mode */ struct sta_info __rcu *sta; @@ -692,8 +692,10 @@ struct ieee80211_chanctx { struct list_head list; struct rcu_head rcu_head; + struct list_head assigned_vifs; + struct list_head reserved_vifs; + enum ieee80211_chanctx_mode mode; - int refcount; bool driver_present; struct ieee80211_chanctx_conf conf; @@ -752,11 +754,21 @@ struct ieee80211_sub_if_data { struct mac80211_qos_map __rcu *qos_map; struct work_struct csa_finalize_work; - int csa_counter_offset_beacon; - int csa_counter_offset_presp; + u16 csa_counter_offset_beacon[IEEE80211_MAX_CSA_COUNTERS_NUM]; + u16 csa_counter_offset_presp[IEEE80211_MAX_CSA_COUNTERS_NUM]; bool csa_radar_required; + bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ struct cfg80211_chan_def csa_chandef; + struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ + struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */ + + /* context reservation -- protected with chanctx_mtx */ + struct ieee80211_chanctx *reserved_chanctx; + struct cfg80211_chan_def reserved_chandef; + bool reserved_radar_required; + u8 csa_current_counter; + /* used to reconfigure hardware SM PS */ struct work_struct recalc_smps; @@ -1449,6 +1461,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); +void ieee80211_sched_scan_end(struct ieee80211_local *local); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ @@ -1463,6 +1476,7 @@ void ieee80211_sw_roc_work(struct work_struct *work); void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* channel switch handling */ +bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local); void ieee80211_csa_finalize_work(struct work_struct *work); int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); @@ -1772,6 +1786,16 @@ ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode mode); int __must_check +ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode, + bool radar_required); +int __must_check +ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata, + u32 *changed); +int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata); + +int __must_check ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, u32 *changed); @@ -1783,6 +1807,8 @@ void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, bool clear); +int ieee80211_chanctx_refcount(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx); void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); @@ -1806,6 +1832,20 @@ int ieee80211_cs_headroom(struct ieee80211_local *local, enum nl80211_iftype iftype); void ieee80211_recalc_dtim(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect); +int ieee80211_max_num_channels(struct ieee80211_local *local); + +/* TDLS */ +int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, + const u8 *extra_ies, size_t extra_ies_len); +int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, enum nl80211_tdls_operation oper); + #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b8d331e7d883..388b863e821c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -250,6 +250,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *nsdata; + int ret; ASSERT_RTNL(); @@ -300,7 +301,10 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, } } - return 0; + mutex_lock(&local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&local->chanctx_mtx); + return ret; } static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, @@ -395,6 +399,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) sdata->vif.type = NL80211_IFTYPE_MONITOR; snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); + sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; @@ -423,7 +428,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (ret) { mutex_lock(&local->iflist_mtx); - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); drv_remove_interface(local, sdata); @@ -452,7 +457,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) return; } - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); @@ -492,7 +497,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (!sdata->bss) return -ENOLINK; + mutex_lock(&local->mtx); list_add(&sdata->u.vlan.list, &sdata->bss->vlans); + mutex_unlock(&local->mtx); master = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -722,8 +729,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) drv_stop(local); err_del_bss: sdata->bss = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); + mutex_unlock(&local->mtx); + } /* might already be clear but that doesn't matter */ clear_bit(SDATA_STATE_RUNNING, &sdata->state); return res; @@ -829,8 +839,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->recalc_smps); sdata_lock(sdata); + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); sdata_unlock(sdata); + cancel_work_sync(&sdata->csa_finalize_work); cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); @@ -875,8 +892,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: + mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); - rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + mutex_unlock(&local->mtx); + RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL); /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: @@ -895,7 +914,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ - rcu_assign_pointer(local->p2p_sdata, NULL); + RCU_INIT_POINTER(local->p2p_sdata, NULL); /* fall through */ default: cancel_work_sync(&sdata->work); @@ -1267,6 +1286,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); sdata->control_port_no_encrypt = false; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + sdata->vif.bss_conf.idle = true; sdata->noack_map = 0; @@ -1280,6 +1300,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work); + INIT_LIST_HEAD(&sdata->assigned_chanctx_list); + INIT_LIST_HEAD(&sdata->reserved_chanctx_list); switch (type) { case NL80211_IFTYPE_P2P_GO: @@ -1758,7 +1780,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); - list_del(&unreg_list); list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { list_del(&sdata->list); @@ -1774,20 +1795,19 @@ static int netdev_notify(struct notifier_block *nb, struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) - return 0; + return NOTIFY_DONE; if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy) - return 0; + return NOTIFY_DONE; if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) - return 0; + return NOTIFY_DONE; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - memcpy(sdata->name, dev->name, IFNAMSIZ); - ieee80211_debugfs_rename_netdev(sdata); - return 0; + + return NOTIFY_OK; } static struct notifier_block mac80211_netdev_notifier = { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 6ff65a1ebaa9..16d97f044a20 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -325,7 +325,8 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, struct ieee80211_key *key; int i, j, err; - BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); + if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)) + return ERR_PTR(-EINVAL); key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); if (!key) @@ -481,8 +482,8 @@ int ieee80211_key_link(struct ieee80211_key *key, int idx, ret; bool pairwise; - BUG_ON(!sdata); - BUG_ON(!key); + if (WARN_ON(!sdata || !key)) + return -EINVAL; pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; idx = key->conf.keyidx; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4c1bf61bc778..d17c26d6e369 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -340,7 +340,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, sdata_unlock(sdata); - return NOTIFY_DONE; + return NOTIFY_OK; } #endif @@ -371,7 +371,7 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb, drv_ipv6_addr_change(local, sdata, idev); - return NOTIFY_DONE; + return NOTIFY_OK; } #endif @@ -446,7 +446,9 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_MAX_AMSDU | IEEE80211_HT_CAP_SGI_20 | - IEEE80211_HT_CAP_SGI_40), + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_LDPC_CODING | + IEEE80211_HT_CAP_40MHZ_INTOLERANT), .mcs = { .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, @@ -954,6 +956,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; + local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f70e9cd10552..6495a3f0428d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -366,20 +366,15 @@ int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) return 0; /* find RSN IE */ - data = ifmsh->ie; - while (data < ifmsh->ie + ifmsh->ie_len) { - if (*data == WLAN_EID_RSN) { - len = data[1] + 2; - break; - } - data++; - } + data = cfg80211_find_ie(WLAN_EID_RSN, ifmsh->ie, ifmsh->ie_len); + if (!data) + return 0; - if (len) { - if (skb_tailroom(skb) < len) - return -ENOMEM; - memcpy(skb_put(skb, len), data, len); - } + len = data[1] + 2; + + if (skb_tailroom(skb) < len) + return -ENOMEM; + memcpy(skb_put(skb, len), data, len); return 0; } @@ -684,7 +679,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) *pos++ = 0x0; *pos++ = ieee80211_frequency_to_channel( csa->settings.chandef.chan->center_freq); - sdata->csa_counter_offset_beacon = hdr_len + 6; + sdata->csa_counter_offset_beacon[0] = hdr_len + 6; *pos++ = csa->settings.count; *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; *pos++ = 6; @@ -829,7 +824,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); bcn = rcu_dereference_protected(ifmsh->beacon, lockdep_is_held(&sdata->wdev.mtx)); - rcu_assign_pointer(ifmsh->beacon, NULL); + RCU_INIT_POINTER(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); /* flush STAs and mpaths on this iface */ @@ -903,14 +898,15 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_MESH_POINT); if (err < 0) return false; - if (err) { - params.radar_required = true; + if (err > 0) /* TODO: DFS not (yet) supported */ return false; - } + + params.radar_required = err; if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { @@ -1068,7 +1064,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) /* Remove the CSA and MCSP elements from the beacon */ tmp_csa_settings = rcu_dereference(ifmsh->csa); - rcu_assign_pointer(ifmsh->csa, NULL); + RCU_INIT_POINTER(ifmsh->csa, NULL); if (tmp_csa_settings) kfree_rcu(tmp_csa_settings, rcu_head); ret = ieee80211_mesh_rebuild_beacon(sdata); @@ -1102,7 +1098,7 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, ret = ieee80211_mesh_rebuild_beacon(sdata); if (ret) { tmp_csa_settings = rcu_dereference(ifmsh->csa); - rcu_assign_pointer(ifmsh->csa, NULL); + RCU_INIT_POINTER(ifmsh->csa, NULL); kfree_rcu(tmp_csa_settings, rcu_head); return ret; } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index f9514685d45a..94758b9c9ed4 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -37,7 +37,7 @@ static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae) return get_unaligned_le32(preq_elem + offset); } -static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae) +static inline u16 u16_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; @@ -544,9 +544,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || time_before(jiffies, ifmsh->last_sn_update)) { - target_sn = ++ifmsh->sn; + ++ifmsh->sn; ifmsh->last_sn_update = jiffies; } + target_sn = ifmsh->sn; } else if (is_broadcast_ether_addr(target_addr) && (target_flags & IEEE80211_PREQ_TO_FLAG)) { rcu_read_lock(); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 7d050ed6fe5a..cf032a8db9d7 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -287,8 +287,10 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct sk_buff_head failq; unsigned long flags; - BUG_ON(gate_mpath == from_mpath); - BUG_ON(!gate_mpath->next_hop); + if (WARN_ON(gate_mpath == from_mpath)) + return; + if (WARN_ON(!gate_mpath->next_hop)) + return; __skb_queue_head_init(&failq); diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 2bc5dc25d5ad..09625d6205c3 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -171,7 +171,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, u8 cap; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); - BUG_ON(!rcu_read_lock_held()); + WARN_ON(!rcu_read_lock_held()); cap = beacon->meshconf->meshconf_cap; spin_lock_bh(&ifmsh->sync_offset_lock); diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h index 3b848dad9587..0e4886f881f1 100644 --- a/net/mac80211/michael.h +++ b/net/mac80211/michael.h @@ -11,6 +11,7 @@ #define MICHAEL_H #include <linux/types.h> +#include <linux/ieee80211.h> #define MICHAEL_MIC_LEN 8 diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 27600a9808ba..3345401be1b3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -975,16 +975,23 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: shouldn't really modify cfg80211-owned data! */ ifmgd->associated->channel = sdata->csa_chandef.chan; + ieee80211_bss_info_change_notify(sdata, changed); + + mutex_lock(&local->mtx); + sdata->vif.csa_active = false; /* XXX: wait for a beacon first? */ - ieee80211_wake_queues_by_reason(&local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); - ieee80211_bss_info_change_notify(sdata, changed); - - out: - sdata->vif.csa_active = false; ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + + ieee80211_sta_reset_beacon_monitor(sdata); + ieee80211_sta_reset_conn_monitor(sdata); + +out: sdata_unlock(sdata); } @@ -1089,7 +1096,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf), struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { + if (ieee80211_chanctx_refcount(local, chanctx) > 1) { sdata_info(sdata, "channel switch with multiple interfaces on the same channel, disconnecting\n"); ieee80211_queue_work(&local->hw, @@ -1100,12 +1107,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->chanctx_mtx); sdata->csa_chandef = csa_ie.chandef; + + mutex_lock(&local->mtx); sdata->vif.csa_active = true; + sdata->csa_block_tx = csa_ie.mode; - if (csa_ie.mode) + if (sdata->csa_block_tx) ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); if (local->ops->channel_switch) { /* use driver's channel switch callback */ @@ -1817,6 +1828,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags = 0; mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + + sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&local->mtx); sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; @@ -2045,6 +2062,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get); static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -2058,10 +2076,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + + mutex_lock(&local->mtx); sdata->vif.csa_active = false; - ieee80211_wake_queues_by_reason(&sdata->local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); @@ -3546,6 +3568,9 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data) if (local->quiescing) return; + if (sdata->vif.csa_active) + return; + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_connection_loss_work); @@ -3561,6 +3586,9 @@ static void ieee80211_sta_conn_mon_timer(unsigned long data) if (local->quiescing) return; + if (sdata->vif.csa_active) + return; + ieee80211_queue_work(&local->hw, &ifmgd->monitor_work); } @@ -3707,7 +3735,7 @@ int ieee80211_max_network_latency(struct notifier_block *nb, ieee80211_recalc_ps(local, latency_usec); mutex_unlock(&local->iflist_mtx); - return 0; + return NOTIFY_OK; } static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 26fd94fa0aed..1c1469c36dca 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -657,6 +657,17 @@ minstrel_free(void *priv) kfree(priv); } +static u32 minstrel_get_expected_throughput(void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + int idx = mi->max_tp_rate[0]; + + /* convert pkt per sec in kbps (1200 is the average pkt size used for + * computing cur_tp + */ + return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024; +} + const struct rate_control_ops mac80211_minstrel = { .name = "minstrel", .tx_status = minstrel_tx_status, @@ -670,6 +681,7 @@ const struct rate_control_ops mac80211_minstrel = { .add_sta_debugfs = minstrel_add_sta_debugfs, .remove_sta_debugfs = minstrel_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_get_expected_throughput, }; int __init diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index bccaf854a309..85c1e74b7714 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -22,7 +22,7 @@ #define MCS_NBITS (AVG_PKT_SIZE << 3) /* Number of symbols for a packet with (bps) bits per symbol */ -#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) +#define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps)) /* Transmission time (nanoseconds) for a packet containing (syms) symbols */ #define MCS_SYMBOL_TIME(sgi, syms) \ @@ -226,8 +226,9 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - tp = 1000000 * ((prob * 1000) / nsecs); + /* prob is scaled - see MINSTREL_FRAC above */ + tp = 1000000 * ((prob * 1000) / nsecs); mr->cur_tp = MINSTREL_TRUNC(tp); } @@ -1031,6 +1032,22 @@ minstrel_ht_free(void *priv) mac80211_minstrel.free(priv); } +static u32 minstrel_ht_get_expected_throughput(void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + int i, j; + + if (!msp->is_ht) + return mac80211_minstrel.get_expected_throughput(priv_sta); + + i = mi->max_tp_rate / MCS_GROUP_RATES; + j = mi->max_tp_rate % MCS_GROUP_RATES; + + /* convert cur_tp from pkt per second in kbps */ + return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024; +} + static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", .tx_status = minstrel_ht_tx_status, @@ -1045,6 +1062,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = { .add_sta_debugfs = minstrel_ht_add_sta_debugfs, .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_ht_get_expected_throughput, }; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2b608b2b70ec..394e201cde6d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -54,24 +54,25 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, return skb; } -static inline int should_drop_frame(struct sk_buff *skb, int present_fcs_len) +static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_hdr *hdr; - - hdr = (void *)(skb->data); + struct ieee80211_hdr *hdr = (void *)skb->data; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC | RX_FLAG_AMPDU_IS_ZEROLEN)) - return 1; + return true; + if (unlikely(skb->len < 16 + present_fcs_len)) - return 1; + return true; + if (ieee80211_is_ctl(hdr->frame_control) && !ieee80211_is_pspoll(hdr->frame_control) && !ieee80211_is_back_req(hdr->frame_control)) - return 1; - return 0; + return true; + + return false; } static int @@ -3191,7 +3192,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, } /* - * This is the actual Rx frames handler. as it blongs to Rx path it must + * This is the actual Rx frames handler. as it belongs to Rx path it must * be called with rcu_read_lock protection. */ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 3ce7f2c8539a..f40661eb75b5 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -309,7 +309,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (local->scan_req != local->int_scan_req) cfg80211_scan_done(local->scan_req, aborted); local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); local->scanning = 0; local->scan_chandef.chan = NULL; @@ -559,7 +559,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); } return rc; @@ -773,7 +773,7 @@ void ieee80211_scan_work(struct work_struct *work) int rc; local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); rc = __ieee80211_start_scan(sdata, req); if (rc) { @@ -1014,7 +1014,7 @@ out_free: if (ret) { /* Clean in case of failure after HW restart or upon resume. */ - rcu_assign_pointer(local->sched_scan_sdata, NULL); + RCU_INIT_POINTER(local->sched_scan_sdata, NULL); local->sched_scan_req = NULL; } @@ -1076,12 +1076,8 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_sched_scan_results); -void ieee80211_sched_scan_stopped_work(struct work_struct *work) +void ieee80211_sched_scan_end(struct ieee80211_local *local) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, - sched_scan_stopped_work); - mutex_lock(&local->mtx); if (!rcu_access_pointer(local->sched_scan_sdata)) { @@ -1089,7 +1085,7 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) return; } - rcu_assign_pointer(local->sched_scan_sdata, NULL); + RCU_INIT_POINTER(local->sched_scan_sdata, NULL); /* If sched scan was aborted by the driver. */ local->sched_scan_req = NULL; @@ -1099,6 +1095,15 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) cfg80211_sched_scan_stopped(local->hw.wiphy); } +void ieee80211_sched_scan_stopped_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + sched_scan_stopped_work); + + ieee80211_sched_scan_end(local); +} + void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 847d92f6bef6..a9b46d8ea22f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -240,6 +240,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); + kfree(rcu_dereference_raw(sta->sta.rates)); kfree(sta); } @@ -552,7 +553,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; - int err = 0; + int err; might_sleep(); @@ -570,7 +571,6 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) return 0; out_free: - BUG_ON(!err); sta_info_free(local, sta); return err; } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 60cb7a665976..ba29ebc86141 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -541,6 +541,23 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, */ #define STA_LOST_PKT_THRESHOLD 50 +static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + /* This packet was aggregated but doesn't carry status info */ + if ((info->flags & IEEE80211_TX_CTL_AMPDU) && + !(info->flags & IEEE80211_TX_STAT_AMPDU)) + return; + + if (++sta->lost_packets < STA_LOST_PKT_THRESHOLD) + return; + + cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr, + sta->lost_packets, GFP_ATOMIC); + sta->lost_packets = 0; +} + void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; @@ -680,12 +697,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_STAT_ACK) { if (sta->lost_packets) sta->lost_packets = 0; - } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) { - cfg80211_cqm_pktloss_notify(sta->sdata->dev, - sta->sta.addr, - sta->lost_packets, - GFP_ATOMIC); - sta->lost_packets = 0; + } else { + ieee80211_lost_packet(sta, skb); } } diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c new file mode 100644 index 000000000000..652813b2d3df --- /dev/null +++ b/net/mac80211/tdls.c @@ -0,0 +1,325 @@ +/* + * mac80211 TDLS handling code + * + * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2014, Intel Corporation + * + * This file is GPLv2 as found in COPYING. + */ + +#include <linux/ieee80211.h> +#include "ieee80211_i.h" + +static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) +{ + u8 *pos = (void *)skb_put(skb, 7); + + *pos++ = WLAN_EID_EXT_CAPABILITY; + *pos++ = 5; /* len */ + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; +} + +static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u16 capab; + + capab = 0; + if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) + return capab; + + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + + return capab; +} + +static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr, + const u8 *peer, const u8 *bssid) +{ + struct ieee80211_tdls_lnkie *lnkid; + + lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); + + lnkid->ie_type = WLAN_EID_LINK_ID; + lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; + + memcpy(lnkid->bssid, bssid, ETH_ALEN); + memcpy(lnkid->init_sta, src_addr, ETH_ALEN); + memcpy(lnkid->resp_sta, peer, ETH_ALEN); +} + +static int +ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_tdls_data *tf; + + tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); + + memcpy(tf->da, peer, ETH_ALEN); + memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); + tf->ether_type = cpu_to_be16(ETH_P_TDLS); + tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_REQUEST; + + skb_put(skb, sizeof(tf->u.setup_req)); + tf->u.setup_req.dialog_token = dialog_token; + tf->u.setup_req.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(sdata, skb, false, band); + ieee80211_add_ext_srates_ie(sdata, skb, false, band); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_RESPONSE: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_RESPONSE; + + skb_put(skb, sizeof(tf->u.setup_resp)); + tf->u.setup_resp.status_code = cpu_to_le16(status_code); + tf->u.setup_resp.dialog_token = dialog_token; + tf->u.setup_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(sdata, skb, false, band); + ieee80211_add_ext_srates_ie(sdata, skb, false, band); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_CONFIRM: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_CONFIRM; + + skb_put(skb, sizeof(tf->u.setup_cfm)); + tf->u.setup_cfm.status_code = cpu_to_le16(status_code); + tf->u.setup_cfm.dialog_token = dialog_token; + break; + case WLAN_TDLS_TEARDOWN: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_TEARDOWN; + + skb_put(skb, sizeof(tf->u.teardown)); + tf->u.teardown.reason_code = cpu_to_le16(status_code); + break; + case WLAN_TDLS_DISCOVERY_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; + + skb_put(skb, sizeof(tf->u.discover_req)); + tf->u.discover_req.dialog_token = dialog_token; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_mgmt *mgmt; + + mgmt = (void *)skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, peer, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + switch (action_code) { + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); + mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; + mgmt->u.action.u.tdls_discover_resp.action_code = + WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + mgmt->u.action.u.tdls_discover_resp.dialog_token = + dialog_token; + mgmt->u.action.u.tdls_discover_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(sdata, skb, false, band); + ieee80211_add_ext_srates_ie(sdata, skb, false, band); + ieee80211_tdls_add_ext_capab(skb); + break; + default: + return -EINVAL; + } + + return 0; +} + +int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, + const u8 *extra_ies, size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb = NULL; + bool send_direct; + int ret; + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + /* make sure we are in managed mode, and associated */ + if (sdata->vif.type != NL80211_IFTYPE_STATION || + !sdata->u.mgd.associated) + return -EINVAL; + + tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n", + action_code, peer); + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + max(sizeof(struct ieee80211_mgmt), + sizeof(struct ieee80211_tdls_data)) + + 50 + /* supported rates */ + 7 + /* ext capab */ + extra_ies_len + + sizeof(struct ieee80211_tdls_lnkie)); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, + action_code, dialog_token, + status_code, skb); + send_direct = false; + break; + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, + dialog_token, status_code, + skb); + send_direct = true; + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret < 0) + goto fail; + + if (extra_ies_len) + memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); + + /* the TDLS link IE is always added last */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + /* we are the initiator */ + ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer, + sdata->u.mgd.bssid); + break; + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + /* we are the responder */ + ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr, + sdata->u.mgd.bssid); + break; + default: + ret = -ENOTSUPP; + goto fail; + } + + if (send_direct) { + ieee80211_tx_skb(sdata, skb); + return 0; + } + + /* + * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise + * we should default to AC_VI. + */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + skb_set_queue_mapping(skb, IEEE80211_AC_BK); + skb->priority = 2; + break; + default: + skb_set_queue_mapping(skb, IEEE80211_AC_VI); + skb->priority = 5; + break; + } + + /* disable bottom halves when entering the Tx path */ + local_bh_disable(); + ret = ieee80211_subif_start_xmit(skb, dev); + local_bh_enable(); + + return ret; + +fail: + dev_kfree_skb(skb); + return ret; +} + +int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, enum nl80211_tdls_operation oper) +{ + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; + + tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); + + switch (oper) { + case NL80211_TDLS_ENABLE_LINK: + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + if (!sta) { + rcu_read_unlock(); + return -ENOLINK; + } + + set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); + rcu_read_unlock(); + break; + case NL80211_TDLS_DISABLE_LINK: + return sta_info_destroy_addr(sdata, peer); + case NL80211_TDLS_TEARDOWN: + case NL80211_TDLS_SETUP: + case NL80211_TDLS_DISCOVERY_REQ: + /* We don't support in-driver setup/teardown/discovery */ + return -ENOTSUPP; + default: + return -ENOTSUPP; + } + + return 0; +} diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index cec5b60487a4..cfe1a0688b5c 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -184,6 +184,20 @@ TRACE_EVENT(drv_return_bool, "true" : "false") ); +TRACE_EVENT(drv_return_u32, + TP_PROTO(struct ieee80211_local *local, u32 ret), + TP_ARGS(local, ret), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, ret) + ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->ret = ret; + ), + TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret) +); + TRACE_EVENT(drv_return_u64, TP_PROTO(struct ieee80211_local *local, u64 ret), TP_ARGS(local, ret), @@ -1375,6 +1389,91 @@ TRACE_EVENT(drv_change_chanctx, ) ); +#if !defined(__TRACE_VIF_ENTRY) +#define __TRACE_VIF_ENTRY +struct trace_vif_entry { + enum nl80211_iftype vif_type; + bool p2p; + char vif_name[IFNAMSIZ]; +} __packed; + +struct trace_chandef_entry { + u32 control_freq; + u32 chan_width; + u32 center_freq1; + u32 center_freq2; +} __packed; + +struct trace_switch_entry { + struct trace_vif_entry vif; + struct trace_chandef_entry old_chandef; + struct trace_chandef_entry new_chandef; +} __packed; + +#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from +#endif + +TRACE_EVENT(drv_switch_vif_chanctx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, enum ieee80211_chanctx_switch_mode mode), + TP_ARGS(local, vifs, n_vifs, mode), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, n_vifs) + __field(u32, mode) + __dynamic_array(u8, vifs, + sizeof(struct trace_switch_entry) * n_vifs) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->n_vifs = n_vifs; + __entry->mode = mode; + { + struct trace_switch_entry *local_vifs = + __get_dynamic_array(vifs); + int i; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_sub_if_data *sdata; + + sdata = container_of(vifs[i].vif, + struct ieee80211_sub_if_data, + vif); + + SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type); + SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p); + strncpy(local_vifs[i].vif.vif_name, + sdata->name, + sizeof(local_vifs[i].vif.vif_name)); + SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, + old_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(old_chandef.chan_width, + old_ctx->def.width); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1, + old_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2, + old_ctx->def.center_freq2); + SWITCH_ENTRY_ASSIGN(new_chandef.control_freq, + new_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(new_chandef.chan_width, + new_ctx->def.width); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1, + new_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2, + new_ctx->def.center_freq2); + } + } + ), + + TP_printk( + LOCAL_PR_FMT " n_vifs:%d mode:%d", + LOCAL_PR_ARG, __entry->n_vifs, __entry->mode + ) +); + DECLARE_EVENT_CLASS(local_sdata_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1499,6 +1598,24 @@ DEFINE_EVENT(local_sdata_evt, drv_leave_ibss, TP_ARGS(local, sdata) ); +TRACE_EVENT(drv_get_expected_throughput, + TP_PROTO(struct ieee80211_sta *sta), + + TP_ARGS(sta), + + TP_STRUCT__entry( + STA_ENTRY + ), + + TP_fast_assign( + STA_ASSIGN; + ), + + TP_printk( + STA_PR_FMT, STA_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 19d36d4117e0..5214686d9fd1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2328,7 +2328,8 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { u8 *pos, *tim; int aid0 = 0; @@ -2341,11 +2342,12 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * checking byte-for-byte */ have_bits = !bitmap_empty((unsigned long *)ps->tim, IEEE80211_MAX_AID+1); - - if (ps->dtim_count == 0) - ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; - else - ps->dtim_count--; + if (!is_template) { + if (ps->dtim_count == 0) + ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; + else + ps->dtim_count--; + } tim = pos = (u8 *) skb_put(skb, 6); *pos++ = WLAN_EID_TIM; @@ -2391,7 +2393,8 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, } static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { struct ieee80211_local *local = sdata->local; @@ -2403,24 +2406,24 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * of the tim bitmap in mac80211 and the driver. */ if (local->tim_in_locked_section) { - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); } else { spin_lock_bh(&local->tim_lock); - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); spin_unlock_bh(&local->tim_lock); } return 0; } -static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, - struct beacon_data *beacon) +static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon) { struct probe_resp *resp; - int counter_offset_beacon = sdata->csa_counter_offset_beacon; - int counter_offset_presp = sdata->csa_counter_offset_presp; u8 *beacon_data; size_t beacon_data_len; + int i; + u8 count = sdata->csa_current_counter; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: @@ -2438,40 +2441,57 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, default: return; } - if (WARN_ON(counter_offset_beacon >= beacon_data_len)) - return; - /* Warn if the driver did not check for/react to csa - * completeness. A beacon with CSA counter set to 0 should - * never occur, because a counter of 1 means switch just - * before the next beacon. - */ - if (WARN_ON(beacon_data[counter_offset_beacon] == 1)) - return; + for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) { + u16 counter_offset_beacon = + sdata->csa_counter_offset_beacon[i]; + u16 counter_offset_presp = sdata->csa_counter_offset_presp[i]; - beacon_data[counter_offset_beacon]--; + if (counter_offset_beacon) { + if (WARN_ON(counter_offset_beacon >= beacon_data_len)) + return; - if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) { - rcu_read_lock(); - resp = rcu_dereference(sdata->u.ap.probe_resp); + beacon_data[counter_offset_beacon] = count; + } + + if (sdata->vif.type == NL80211_IFTYPE_AP && + counter_offset_presp) { + rcu_read_lock(); + resp = rcu_dereference(sdata->u.ap.probe_resp); - /* if nl80211 accepted the offset, this should not happen. */ - if (WARN_ON(!resp)) { + /* If nl80211 accepted the offset, this should + * not happen. + */ + if (WARN_ON(!resp)) { + rcu_read_unlock(); + return; + } + resp->data[counter_offset_presp] = count; rcu_read_unlock(); - return; } - resp->data[counter_offset_presp]--; - rcu_read_unlock(); } } +u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + sdata->csa_current_counter--; + + /* the counter should never reach 0 */ + WARN_ON(!sdata->csa_current_counter); + + return sdata->csa_current_counter; +} +EXPORT_SYMBOL(ieee80211_csa_update_counter); + bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct beacon_data *beacon = NULL; u8 *beacon_data; size_t beacon_data_len; - int counter_beacon = sdata->csa_counter_offset_beacon; + int counter_beacon = sdata->csa_counter_offset_beacon[0]; int ret = false; if (!ieee80211_sdata_running(sdata)) @@ -2521,9 +2541,11 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_is_complete); -struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - u16 *tim_offset, u16 *tim_length) +static struct sk_buff * +__ieee80211_beacon_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs, + bool is_template) { struct ieee80211_local *local = hw_to_local(hw); struct sk_buff *skb = NULL; @@ -2532,6 +2554,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, enum ieee80211_band band; struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; + int csa_off_base = 0; rcu_read_lock(); @@ -2541,18 +2564,20 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!ieee80211_sdata_running(sdata) || !chanctx_conf) goto out; - if (tim_offset) - *tim_offset = 0; - if (tim_length) - *tim_length = 0; + if (offs) + memset(offs, 0, sizeof(*offs)); if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_if_ap *ap = &sdata->u.ap; struct beacon_data *beacon = rcu_dereference(ap->beacon); if (beacon) { - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, beacon); + if (sdata->vif.csa_active) { + if (!is_template) + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, beacon); + } /* * headroom, head length, @@ -2569,12 +2594,16 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); + ieee80211_beacon_add_tim(sdata, &ap->ps, skb, + is_template); - if (tim_offset) - *tim_offset = beacon->head_len; - if (tim_length) - *tim_length = skb->len - beacon->head_len; + if (offs) { + offs->tim_offset = beacon->head_len; + offs->tim_length = skb->len - beacon->head_len; + + /* for AP the csa offsets are from tail */ + csa_off_base = skb->len; + } if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), @@ -2589,9 +2618,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!presp) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, presp); + if (sdata->vif.csa_active) { + if (!is_template) + ieee80211_csa_update_counter(vif); + ieee80211_set_csa(sdata, presp); + } skb = dev_alloc_skb(local->tx_headroom + presp->head_len + local->hw.extra_beacon_tailroom); @@ -2611,8 +2643,17 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!bcn) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, bcn); + if (sdata->vif.csa_active) { + if (!is_template) + /* TODO: For mesh csa_counter is in TU, so + * decrementing it by one isn't correct, but + * for now we leave it consistent with overall + * mac80211's behavior. + */ + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, bcn); + } if (ifmsh->sync_ops) ifmsh->sync_ops->adjust_tbtt(sdata, bcn); @@ -2626,13 +2667,33 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, goto out; skb_reserve(skb, local->tx_headroom); memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len); - ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb); + ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template); + + if (offs) { + offs->tim_offset = bcn->head_len; + offs->tim_length = skb->len - bcn->head_len; + } + memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len); } else { WARN_ON(1); goto out; } + /* CSA offsets */ + if (offs) { + int i; + + for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) { + u16 csa_off = sdata->csa_counter_offset_beacon[i]; + + if (!csa_off) + continue; + + offs->csa_counter_offs[i] = csa_off_base + csa_off; + } + } + band = chanctx_conf->def.chan->band; info = IEEE80211_SKB_CB(skb); @@ -2663,6 +2724,32 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, out: rcu_read_unlock(); return skb; + +} + +struct sk_buff * +ieee80211_beacon_get_template(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs) +{ + return __ieee80211_beacon_get(hw, vif, offs, true); +} +EXPORT_SYMBOL(ieee80211_beacon_get_template); + +struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 *tim_offset, u16 *tim_length) +{ + struct ieee80211_mutable_offsets offs = {}; + struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false); + + if (tim_offset) + *tim_offset = offs.tim_offset; + + if (tim_length) + *tim_length = offs.tim_length; + + return bcn; } EXPORT_SYMBOL(ieee80211_beacon_get_tim); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3c365837e910..6886601afe1c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -554,7 +554,7 @@ void ieee80211_flush_queues(struct ieee80211_local *local, ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_FLUSH); - drv_flush(local, queues, false); + drv_flush(local, sdata, queues, false); ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_FLUSH); @@ -1457,6 +1457,44 @@ void ieee80211_stop_device(struct ieee80211_local *local) drv_stop(local); } +static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_chanctx *ctx; + + /* + * We get here if during resume the device can't be restarted properly. + * We might also get here if this happens during HW reset, which is a + * slightly different situation and we need to drop all connections in + * the latter case. + * + * Ask cfg80211 to turn off all interfaces, this will result in more + * warnings but at least we'll then get into a clean stopped state. + */ + + local->resuming = false; + local->suspended = false; + local->started = false; + + /* scheduled scan clearly can't be running any more, but tell + * cfg80211 and clear local state + */ + ieee80211_sched_scan_end(local); + + list_for_each_entry(sdata, &local->interfaces, list) + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; + + /* Mark channel contexts as not being in the driver any more to avoid + * removing them from the driver during the shutdown process... + */ + mutex_lock(&local->chanctx_mtx); + list_for_each_entry(ctx, &local->chanctx_list, list) + ctx->driver_present = false; + mutex_unlock(&local->chanctx_mtx); + + cfg80211_shutdown_all_interfaces(local->hw.wiphy); +} + static void ieee80211_assign_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { @@ -1520,9 +1558,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) */ res = drv_start(local); if (res) { - WARN(local->suspended, "Hardware became unavailable " - "upon resume. This could be a software issue " - "prior to suspend or a hardware issue.\n"); + if (local->suspended) + WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n"); + else + WARN(1, "Hardware became unavailable during restart.\n"); + ieee80211_handle_reconfig_failure(local); return res; } @@ -1546,7 +1586,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(local->resuming); res = drv_add_interface(local, sdata); if (WARN_ON(res)) { - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); synchronize_net(); kfree(sdata); } @@ -1565,17 +1605,17 @@ int ieee80211_reconfig(struct ieee80211_local *local) list_for_each_entry(ctx, &local->chanctx_list, list) WARN_ON(drv_add_chanctx(local, ctx)); mutex_unlock(&local->chanctx_mtx); - } - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) - continue; - ieee80211_assign_chanctx(local, sdata); - } + list_for_each_entry(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + ieee80211_assign_chanctx(local, sdata); + } - sdata = rtnl_dereference(local->monitor_sdata); - if (sdata && ieee80211_sdata_running(sdata)) - ieee80211_assign_chanctx(local, sdata); + sdata = rtnl_dereference(local->monitor_sdata); + if (sdata && ieee80211_sdata_running(sdata)) + ieee80211_assign_chanctx(local, sdata); + } /* add STAs back */ mutex_lock(&local->sta_mtx); @@ -1671,13 +1711,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } break; case NL80211_IFTYPE_WDS: - break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: - /* ignore virtual */ - break; case NL80211_IFTYPE_P2P_DEVICE: - changed = BSS_CHANGED_IDLE; + /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -2797,3 +2834,121 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, ps->dtim_count = dtim_count; } + +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *sdata_iter; + enum nl80211_iftype iftype = sdata->wdev.iftype; + int num[NUM_NL80211_IFTYPES]; + struct ieee80211_chanctx *ctx; + int num_different_channels = 0; + int total = 1; + + lockdep_assert_held(&local->chanctx_mtx); + + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + if (WARN_ON(chandef && chanmode == IEEE80211_CHANCTX_SHARED && + !chandef->chan)) + return -EINVAL; + + if (chandef) + num_different_channels = 1; + + if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) + return -EINVAL; + + /* Always allow software iftypes */ + if (local->hw.wiphy->software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; + return 0; + } + + memset(num, 0, sizeof(num)); + + if (iftype != NL80211_IFTYPE_UNSPECIFIED) + num[iftype] = 1; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->conf.radar_enabled) + radar_detect |= BIT(ctx->conf.def.width); + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { + num_different_channels++; + continue; + } + if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && + cfg80211_chandef_compatible(chandef, + &ctx->conf.def)) + continue; + num_different_channels++; + } + + list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { + struct wireless_dev *wdev_iter; + + wdev_iter = &sdata_iter->wdev; + + if (sdata_iter == sdata || + rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL || + local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) + continue; + + num[wdev_iter->iftype]++; + total++; + } + + if (total == 1 && !radar_detect) + return 0; + + return cfg80211_check_combinations(local->hw.wiphy, + num_different_channels, + radar_detect, num); +} + +static void +ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c, + void *data) +{ + u32 *max_num_different_channels = data; + + *max_num_different_channels = max(*max_num_different_channels, + c->num_different_channels); +} + +int ieee80211_max_num_channels(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + int num[NUM_NL80211_IFTYPES] = {}; + struct ieee80211_chanctx *ctx; + int num_different_channels = 0; + u8 radar_detect = 0; + u32 max_num_different_channels = 1; + int err; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(ctx, &local->chanctx_list, list) { + num_different_channels++; + + if (ctx->conf.radar_enabled) + radar_detect |= BIT(ctx->conf.def.width); + } + + list_for_each_entry_rcu(sdata, &local->interfaces, list) + num[sdata->wdev.iftype]++; + + err = cfg80211_iter_combinations(local->hw.wiphy, + num_different_channels, radar_detect, + num, ieee80211_iter_max_chans, + &max_num_different_channels); + if (err < 0) + return err; + + return max_num_different_channels; +} diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b8600e3c29c8..9b3dcc201145 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -406,7 +406,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && - !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && + !((info->control.hw_key->flags & + IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && + ieee80211_is_mgmt(hdr->frame_control))) { /* * hwaccel has no need for preallocated room for CCMP * header or MIC fields diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig index b33dd76d4307..1818a99b3081 100644 --- a/net/mac802154/Kconfig +++ b/net/mac802154/Kconfig @@ -2,6 +2,10 @@ config MAC802154 tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)" depends on IEEE802154 select CRC_CCITT + select CRYPTO_AUTHENC + select CRYPTO_CCM + select CRYPTO_CTR + select CRYPTO_AES ---help--- This option enables the hardware independent IEEE 802.15.4 networking stack for SoftMAC devices (the ones implementing diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile index 15d62df52182..9723d6f3f3e5 100644 --- a/net/mac802154/Makefile +++ b/net/mac802154/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_MAC802154) += mac802154.o -mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o +mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o \ + monitor.o wpan.o llsec.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c new file mode 100644 index 000000000000..1456f73b02b9 --- /dev/null +++ b/net/mac802154/llsec.c @@ -0,0 +1,1070 @@ +/* + * Copyright (C) 2014 Fraunhofer ITWM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> + */ + +#include <linux/err.h> +#include <linux/bug.h> +#include <linux/completion.h> +#include <net/ieee802154.h> +#include <crypto/algapi.h> + +#include "mac802154.h" +#include "llsec.h" + +static void llsec_key_put(struct mac802154_llsec_key *key); +static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a, + const struct ieee802154_llsec_key_id *b); + +static void llsec_dev_free(struct mac802154_llsec_device *dev); + +void mac802154_llsec_init(struct mac802154_llsec *sec) +{ + memset(sec, 0, sizeof(*sec)); + + memset(&sec->params.default_key_source, 0xFF, IEEE802154_ADDR_LEN); + + INIT_LIST_HEAD(&sec->table.security_levels); + INIT_LIST_HEAD(&sec->table.devices); + INIT_LIST_HEAD(&sec->table.keys); + hash_init(sec->devices_short); + hash_init(sec->devices_hw); + rwlock_init(&sec->lock); +} + +void mac802154_llsec_destroy(struct mac802154_llsec *sec) +{ + struct ieee802154_llsec_seclevel *sl, *sn; + struct ieee802154_llsec_device *dev, *dn; + struct ieee802154_llsec_key_entry *key, *kn; + + list_for_each_entry_safe(sl, sn, &sec->table.security_levels, list) { + struct mac802154_llsec_seclevel *msl; + + msl = container_of(sl, struct mac802154_llsec_seclevel, level); + list_del(&sl->list); + kfree(msl); + } + + list_for_each_entry_safe(dev, dn, &sec->table.devices, list) { + struct mac802154_llsec_device *mdev; + + mdev = container_of(dev, struct mac802154_llsec_device, dev); + list_del(&dev->list); + llsec_dev_free(mdev); + } + + list_for_each_entry_safe(key, kn, &sec->table.keys, list) { + struct mac802154_llsec_key *mkey; + + mkey = container_of(key->key, struct mac802154_llsec_key, key); + list_del(&key->list); + llsec_key_put(mkey); + kfree(key); + } +} + + + +int mac802154_llsec_get_params(struct mac802154_llsec *sec, + struct ieee802154_llsec_params *params) +{ + read_lock_bh(&sec->lock); + *params = sec->params; + read_unlock_bh(&sec->lock); + + return 0; +} + +int mac802154_llsec_set_params(struct mac802154_llsec *sec, + const struct ieee802154_llsec_params *params, + int changed) +{ + write_lock_bh(&sec->lock); + + if (changed & IEEE802154_LLSEC_PARAM_ENABLED) + sec->params.enabled = params->enabled; + if (changed & IEEE802154_LLSEC_PARAM_FRAME_COUNTER) + sec->params.frame_counter = params->frame_counter; + if (changed & IEEE802154_LLSEC_PARAM_OUT_LEVEL) + sec->params.out_level = params->out_level; + if (changed & IEEE802154_LLSEC_PARAM_OUT_KEY) + sec->params.out_key = params->out_key; + if (changed & IEEE802154_LLSEC_PARAM_KEY_SOURCE) + sec->params.default_key_source = params->default_key_source; + if (changed & IEEE802154_LLSEC_PARAM_PAN_ID) + sec->params.pan_id = params->pan_id; + if (changed & IEEE802154_LLSEC_PARAM_HWADDR) + sec->params.hwaddr = params->hwaddr; + if (changed & IEEE802154_LLSEC_PARAM_COORD_HWADDR) + sec->params.coord_hwaddr = params->coord_hwaddr; + if (changed & IEEE802154_LLSEC_PARAM_COORD_SHORTADDR) + sec->params.coord_shortaddr = params->coord_shortaddr; + + write_unlock_bh(&sec->lock); + + return 0; +} + + + +static struct mac802154_llsec_key* +llsec_key_alloc(const struct ieee802154_llsec_key *template) +{ + const int authsizes[3] = { 4, 8, 16 }; + struct mac802154_llsec_key *key; + int i; + + key = kzalloc(sizeof(*key), GFP_KERNEL); + if (!key) + return NULL; + + kref_init(&key->ref); + key->key = *template; + + BUILD_BUG_ON(ARRAY_SIZE(authsizes) != ARRAY_SIZE(key->tfm)); + + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) { + key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0, + CRYPTO_ALG_ASYNC); + if (!key->tfm[i]) + goto err_tfm; + if (crypto_aead_setkey(key->tfm[i], template->key, + IEEE802154_LLSEC_KEY_SIZE)) + goto err_tfm; + if (crypto_aead_setauthsize(key->tfm[i], authsizes[i])) + goto err_tfm; + } + + key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); + if (!key->tfm0) + goto err_tfm; + + if (crypto_blkcipher_setkey(key->tfm0, template->key, + IEEE802154_LLSEC_KEY_SIZE)) + goto err_tfm0; + + return key; + +err_tfm0: + crypto_free_blkcipher(key->tfm0); +err_tfm: + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) + if (key->tfm[i]) + crypto_free_aead(key->tfm[i]); + + kfree(key); + return NULL; +} + +static void llsec_key_release(struct kref *ref) +{ + struct mac802154_llsec_key *key; + int i; + + key = container_of(ref, struct mac802154_llsec_key, ref); + + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) + crypto_free_aead(key->tfm[i]); + + crypto_free_blkcipher(key->tfm0); + kfree(key); +} + +static struct mac802154_llsec_key* +llsec_key_get(struct mac802154_llsec_key *key) +{ + kref_get(&key->ref); + return key; +} + +static void llsec_key_put(struct mac802154_llsec_key *key) +{ + kref_put(&key->ref, llsec_key_release); +} + +static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a, + const struct ieee802154_llsec_key_id *b) +{ + if (a->mode != b->mode) + return false; + + if (a->mode == IEEE802154_SCF_KEY_IMPLICIT) + return ieee802154_addr_equal(&a->device_addr, &b->device_addr); + + if (a->id != b->id) + return false; + + switch (a->mode) { + case IEEE802154_SCF_KEY_INDEX: + return true; + case IEEE802154_SCF_KEY_SHORT_INDEX: + return a->short_source == b->short_source; + case IEEE802154_SCF_KEY_HW_INDEX: + return a->extended_source == b->extended_source; + } + + return false; +} + +int mac802154_llsec_key_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key) +{ + struct mac802154_llsec_key *mkey = NULL; + struct ieee802154_llsec_key_entry *pos, *new; + + if (!(key->frame_types & (1 << IEEE802154_FC_TYPE_MAC_CMD)) && + key->cmd_frame_ids) + return -EINVAL; + + list_for_each_entry(pos, &sec->table.keys, list) { + if (llsec_key_id_equal(&pos->id, id)) + return -EEXIST; + + if (memcmp(pos->key->key, key->key, + IEEE802154_LLSEC_KEY_SIZE)) + continue; + + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + /* Don't allow multiple instances of the same AES key to have + * different allowed frame types/command frame ids, as this is + * not possible in the 802.15.4 PIB. + */ + if (pos->key->frame_types != key->frame_types || + pos->key->cmd_frame_ids != key->cmd_frame_ids) + return -EEXIST; + + break; + } + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + + if (!mkey) + mkey = llsec_key_alloc(key); + else + mkey = llsec_key_get(mkey); + + if (!mkey) + goto fail; + + new->id = *id; + new->key = &mkey->key; + + list_add_rcu(&new->list, &sec->table.keys); + + return 0; + +fail: + kfree(new); + return -ENOMEM; +} + +int mac802154_llsec_key_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *key) +{ + struct ieee802154_llsec_key_entry *pos; + + list_for_each_entry(pos, &sec->table.keys, list) { + struct mac802154_llsec_key *mkey; + + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + if (llsec_key_id_equal(&pos->id, key)) { + list_del_rcu(&pos->list); + llsec_key_put(mkey); + return 0; + } + } + + return -ENOENT; +} + + + +static bool llsec_dev_use_shortaddr(__le16 short_addr) +{ + return short_addr != cpu_to_le16(IEEE802154_ADDR_UNDEF) && + short_addr != cpu_to_le16(0xffff); +} + +static u32 llsec_dev_hash_short(__le16 short_addr, __le16 pan_id) +{ + return ((__force u16) short_addr) << 16 | (__force u16) pan_id; +} + +static u64 llsec_dev_hash_long(__le64 hwaddr) +{ + return (__force u64) hwaddr; +} + +static struct mac802154_llsec_device* +llsec_dev_find_short(struct mac802154_llsec *sec, __le16 short_addr, + __le16 pan_id) +{ + struct mac802154_llsec_device *dev; + u32 key = llsec_dev_hash_short(short_addr, pan_id); + + hash_for_each_possible_rcu(sec->devices_short, dev, bucket_s, key) { + if (dev->dev.short_addr == short_addr && + dev->dev.pan_id == pan_id) + return dev; + } + + return NULL; +} + +static struct mac802154_llsec_device* +llsec_dev_find_long(struct mac802154_llsec *sec, __le64 hwaddr) +{ + struct mac802154_llsec_device *dev; + u64 key = llsec_dev_hash_long(hwaddr); + + hash_for_each_possible_rcu(sec->devices_hw, dev, bucket_hw, key) { + if (dev->dev.hwaddr == hwaddr) + return dev; + } + + return NULL; +} + +static void llsec_dev_free(struct mac802154_llsec_device *dev) +{ + struct ieee802154_llsec_device_key *pos, *pn; + struct mac802154_llsec_device_key *devkey; + + list_for_each_entry_safe(pos, pn, &dev->dev.keys, list) { + devkey = container_of(pos, struct mac802154_llsec_device_key, + devkey); + + list_del(&pos->list); + kfree(devkey); + } + + kfree(dev); +} + +int mac802154_llsec_dev_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_device *dev) +{ + struct mac802154_llsec_device *entry; + u32 skey = llsec_dev_hash_short(dev->short_addr, dev->pan_id); + u64 hwkey = llsec_dev_hash_long(dev->hwaddr); + + BUILD_BUG_ON(sizeof(hwkey) != IEEE802154_ADDR_LEN); + + if ((llsec_dev_use_shortaddr(dev->short_addr) && + llsec_dev_find_short(sec, dev->short_addr, dev->pan_id)) || + llsec_dev_find_long(sec, dev->hwaddr)) + return -EEXIST; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->dev = *dev; + spin_lock_init(&entry->lock); + INIT_LIST_HEAD(&entry->dev.keys); + + if (llsec_dev_use_shortaddr(dev->short_addr)) + hash_add_rcu(sec->devices_short, &entry->bucket_s, skey); + else + INIT_HLIST_NODE(&entry->bucket_s); + + hash_add_rcu(sec->devices_hw, &entry->bucket_hw, hwkey); + list_add_tail_rcu(&entry->dev.list, &sec->table.devices); + + return 0; +} + +static void llsec_dev_free_rcu(struct rcu_head *rcu) +{ + llsec_dev_free(container_of(rcu, struct mac802154_llsec_device, rcu)); +} + +int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr) +{ + struct mac802154_llsec_device *pos; + + pos = llsec_dev_find_long(sec, device_addr); + if (!pos) + return -ENOENT; + + hash_del_rcu(&pos->bucket_s); + hash_del_rcu(&pos->bucket_hw); + call_rcu(&pos->rcu, llsec_dev_free_rcu); + + return 0; +} + + + +static struct mac802154_llsec_device_key* +llsec_devkey_find(struct mac802154_llsec_device *dev, + const struct ieee802154_llsec_key_id *key) +{ + struct ieee802154_llsec_device_key *devkey; + + list_for_each_entry_rcu(devkey, &dev->dev.keys, list) { + if (!llsec_key_id_equal(key, &devkey->key_id)) + continue; + + return container_of(devkey, struct mac802154_llsec_device_key, + devkey); + } + + return NULL; +} + +int mac802154_llsec_devkey_add(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_llsec_device *dev; + struct mac802154_llsec_device_key *devkey; + + dev = llsec_dev_find_long(sec, dev_addr); + + if (!dev) + return -ENOENT; + + if (llsec_devkey_find(dev, &key->key_id)) + return -EEXIST; + + devkey = kmalloc(sizeof(*devkey), GFP_KERNEL); + if (!devkey) + return -ENOMEM; + + devkey->devkey = *key; + list_add_tail_rcu(&devkey->devkey.list, &dev->dev.keys); + return 0; +} + +int mac802154_llsec_devkey_del(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_llsec_device *dev; + struct mac802154_llsec_device_key *devkey; + + dev = llsec_dev_find_long(sec, dev_addr); + + if (!dev) + return -ENOENT; + + devkey = llsec_devkey_find(dev, &key->key_id); + if (!devkey) + return -ENOENT; + + list_del_rcu(&devkey->devkey.list); + kfree_rcu(devkey, rcu); + return 0; +} + + + +static struct mac802154_llsec_seclevel* +llsec_find_seclevel(const struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl) +{ + struct ieee802154_llsec_seclevel *pos; + + list_for_each_entry(pos, &sec->table.security_levels, list) { + if (pos->frame_type != sl->frame_type || + (pos->frame_type == IEEE802154_FC_TYPE_MAC_CMD && + pos->cmd_frame_id != sl->cmd_frame_id) || + pos->device_override != sl->device_override || + pos->sec_levels != sl->sec_levels) + continue; + + return container_of(pos, struct mac802154_llsec_seclevel, + level); + } + + return NULL; +} + +int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_llsec_seclevel *entry; + + if (llsec_find_seclevel(sec, sl)) + return -EEXIST; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->level = *sl; + + list_add_tail_rcu(&entry->level.list, &sec->table.security_levels); + + return 0; +} + +int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_llsec_seclevel *pos; + + pos = llsec_find_seclevel(sec, sl); + if (!pos) + return -ENOENT; + + list_del_rcu(&pos->level.list); + kfree_rcu(pos, rcu); + + return 0; +} + + + +static int llsec_recover_addr(struct mac802154_llsec *sec, + struct ieee802154_addr *addr) +{ + __le16 caddr = sec->params.coord_shortaddr; + addr->pan_id = sec->params.pan_id; + + if (caddr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { + return -EINVAL; + } else if (caddr == cpu_to_le16(IEEE802154_ADDR_UNDEF)) { + addr->extended_addr = sec->params.coord_hwaddr; + addr->mode = IEEE802154_ADDR_LONG; + } else { + addr->short_addr = sec->params.coord_shortaddr; + addr->mode = IEEE802154_ADDR_SHORT; + } + + return 0; +} + +static struct mac802154_llsec_key* +llsec_lookup_key(struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + const struct ieee802154_addr *addr, + struct ieee802154_llsec_key_id *key_id) +{ + struct ieee802154_addr devaddr = *addr; + u8 key_id_mode = hdr->sec.key_id_mode; + struct ieee802154_llsec_key_entry *key_entry; + struct mac802154_llsec_key *key; + + if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT && + devaddr.mode == IEEE802154_ADDR_NONE) { + if (hdr->fc.type == IEEE802154_FC_TYPE_BEACON) { + devaddr.extended_addr = sec->params.coord_hwaddr; + devaddr.mode = IEEE802154_ADDR_LONG; + } else if (llsec_recover_addr(sec, &devaddr) < 0) { + return NULL; + } + } + + list_for_each_entry_rcu(key_entry, &sec->table.keys, list) { + const struct ieee802154_llsec_key_id *id = &key_entry->id; + + if (!(key_entry->key->frame_types & BIT(hdr->fc.type))) + continue; + + if (id->mode != key_id_mode) + continue; + + if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (ieee802154_addr_equal(&devaddr, &id->device_addr)) + goto found; + } else { + if (id->id != hdr->sec.key_id) + continue; + + if ((key_id_mode == IEEE802154_SCF_KEY_INDEX) || + (key_id_mode == IEEE802154_SCF_KEY_SHORT_INDEX && + id->short_source == hdr->sec.short_src) || + (key_id_mode == IEEE802154_SCF_KEY_HW_INDEX && + id->extended_source == hdr->sec.extended_src)) + goto found; + } + } + + return NULL; + +found: + key = container_of(key_entry->key, struct mac802154_llsec_key, key); + if (key_id) + *key_id = key_entry->id; + return llsec_key_get(key); +} + + +static void llsec_geniv(u8 iv[16], __le64 addr, + const struct ieee802154_sechdr *sec) +{ + __be64 addr_bytes = (__force __be64) swab64((__force u64) addr); + __be32 frame_counter = (__force __be32) swab32((__force u32) sec->frame_counter); + + iv[0] = 1; /* L' = L - 1 = 1 */ + memcpy(iv + 1, &addr_bytes, sizeof(addr_bytes)); + memcpy(iv + 9, &frame_counter, sizeof(frame_counter)); + iv[13] = sec->level; + iv[14] = 0; + iv[15] = 1; +} + +static int +llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key) +{ + u8 iv[16]; + struct scatterlist src; + struct blkcipher_desc req = { + .tfm = key->tfm0, + .info = iv, + .flags = 0, + }; + + llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); + sg_init_one(&src, skb->data, skb->len); + return crypto_blkcipher_encrypt_iv(&req, &src, &src, skb->len); +} + +static struct crypto_aead* +llsec_tfm_by_len(struct mac802154_llsec_key *key, int authlen) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) + if (crypto_aead_authsize(key->tfm[i]) == authlen) + return key->tfm[i]; + + BUG(); +} + +static int +llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key) +{ + u8 iv[16]; + unsigned char *data; + int authlen, assoclen, datalen, rc; + struct scatterlist src, assoc[2], dst[2]; + struct aead_request *req; + + authlen = ieee802154_sechdr_authtag_len(&hdr->sec); + llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); + + req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + sg_init_table(assoc, 2); + sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len); + assoclen = skb->mac_len; + + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + + if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) { + sg_set_buf(&assoc[1], data, 0); + } else { + sg_set_buf(&assoc[1], data, datalen); + assoclen += datalen; + datalen = 0; + } + + sg_init_one(&src, data, datalen); + + sg_init_table(dst, 2); + sg_set_buf(&dst[0], data, datalen); + sg_set_buf(&dst[1], skb_put(skb, authlen), authlen); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_assoc(req, assoc, assoclen); + aead_request_set_crypt(req, &src, dst, datalen, iv); + + rc = crypto_aead_encrypt(req); + + kfree(req); + + return rc; +} + +static int llsec_do_encrypt(struct sk_buff *skb, + const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key) +{ + if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC) + return llsec_do_encrypt_unauth(skb, sec, hdr, key); + else + return llsec_do_encrypt_auth(skb, sec, hdr, key); +} + +int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) +{ + struct ieee802154_hdr hdr; + int rc, authlen, hlen; + struct mac802154_llsec_key *key; + u32 frame_ctr; + + hlen = ieee802154_hdr_pull(skb, &hdr); + + if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA) + return -EINVAL; + + if (!hdr.fc.security_enabled || hdr.sec.level == 0) { + skb_push(skb, hlen); + return 0; + } + + authlen = ieee802154_sechdr_authtag_len(&hdr.sec); + + if (skb->len + hlen + authlen + IEEE802154_MFR_SIZE > IEEE802154_MTU) + return -EMSGSIZE; + + rcu_read_lock(); + + read_lock_bh(&sec->lock); + + if (!sec->params.enabled) { + rc = -EINVAL; + goto fail_read; + } + + key = llsec_lookup_key(sec, &hdr, &hdr.dest, NULL); + if (!key) { + rc = -ENOKEY; + goto fail_read; + } + + read_unlock_bh(&sec->lock); + + write_lock_bh(&sec->lock); + + frame_ctr = be32_to_cpu(sec->params.frame_counter); + hdr.sec.frame_counter = cpu_to_le32(frame_ctr); + if (frame_ctr == 0xFFFFFFFF) { + write_unlock_bh(&sec->lock); + llsec_key_put(key); + rc = -EOVERFLOW; + goto fail; + } + + sec->params.frame_counter = cpu_to_be32(frame_ctr + 1); + + write_unlock_bh(&sec->lock); + + rcu_read_unlock(); + + skb->mac_len = ieee802154_hdr_push(skb, &hdr); + skb_reset_mac_header(skb); + + rc = llsec_do_encrypt(skb, sec, &hdr, key); + llsec_key_put(key); + + return rc; + +fail_read: + read_unlock_bh(&sec->lock); +fail: + rcu_read_unlock(); + return rc; +} + + + +static struct mac802154_llsec_device* +llsec_lookup_dev(struct mac802154_llsec *sec, + const struct ieee802154_addr *addr) +{ + struct ieee802154_addr devaddr = *addr; + struct mac802154_llsec_device *dev = NULL; + + if (devaddr.mode == IEEE802154_ADDR_NONE && + llsec_recover_addr(sec, &devaddr) < 0) + return NULL; + + if (devaddr.mode == IEEE802154_ADDR_SHORT) { + u32 key = llsec_dev_hash_short(devaddr.short_addr, + devaddr.pan_id); + + hash_for_each_possible_rcu(sec->devices_short, dev, + bucket_s, key) { + if (dev->dev.pan_id == devaddr.pan_id && + dev->dev.short_addr == devaddr.short_addr) + return dev; + } + } else { + u64 key = llsec_dev_hash_long(devaddr.extended_addr); + + hash_for_each_possible_rcu(sec->devices_hw, dev, + bucket_hw, key) { + if (dev->dev.hwaddr == devaddr.extended_addr) + return dev; + } + } + + return NULL; +} + +static int +llsec_lookup_seclevel(const struct mac802154_llsec *sec, + u8 frame_type, u8 cmd_frame_id, + struct ieee802154_llsec_seclevel *rlevel) +{ + struct ieee802154_llsec_seclevel *level; + + list_for_each_entry_rcu(level, &sec->table.security_levels, list) { + if (level->frame_type == frame_type && + (frame_type != IEEE802154_FC_TYPE_MAC_CMD || + level->cmd_frame_id == cmd_frame_id)) { + *rlevel = *level; + return 0; + } + } + + return -EINVAL; +} + +static int +llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key, __le64 dev_addr) +{ + u8 iv[16]; + unsigned char *data; + int datalen; + struct scatterlist src; + struct blkcipher_desc req = { + .tfm = key->tfm0, + .info = iv, + .flags = 0, + }; + + llsec_geniv(iv, dev_addr, &hdr->sec); + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + + sg_init_one(&src, data, datalen); + + return crypto_blkcipher_decrypt_iv(&req, &src, &src, datalen); +} + +static int +llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key, __le64 dev_addr) +{ + u8 iv[16]; + unsigned char *data; + int authlen, datalen, assoclen, rc; + struct scatterlist src, assoc[2]; + struct aead_request *req; + + authlen = ieee802154_sechdr_authtag_len(&hdr->sec); + llsec_geniv(iv, dev_addr, &hdr->sec); + + req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + sg_init_table(assoc, 2); + sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len); + assoclen = skb->mac_len; + + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + + if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) { + sg_set_buf(&assoc[1], data, 0); + } else { + sg_set_buf(&assoc[1], data, datalen - authlen); + assoclen += datalen - authlen; + data += datalen - authlen; + datalen = authlen; + } + + sg_init_one(&src, data, datalen); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_assoc(req, assoc, assoclen); + aead_request_set_crypt(req, &src, &src, datalen, iv); + + rc = crypto_aead_decrypt(req); + + kfree(req); + skb_trim(skb, skb->len - authlen); + + return rc; +} + +static int +llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key, __le64 dev_addr) +{ + if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC) + return llsec_do_decrypt_unauth(skb, sec, hdr, key, dev_addr); + else + return llsec_do_decrypt_auth(skb, sec, hdr, key, dev_addr); +} + +static int +llsec_update_devkey_record(struct mac802154_llsec_device *dev, + const struct ieee802154_llsec_key_id *in_key) +{ + struct mac802154_llsec_device_key *devkey; + + devkey = llsec_devkey_find(dev, in_key); + + if (!devkey) { + struct mac802154_llsec_device_key *next; + + next = kzalloc(sizeof(*devkey), GFP_ATOMIC); + if (!next) + return -ENOMEM; + + next->devkey.key_id = *in_key; + + spin_lock_bh(&dev->lock); + + devkey = llsec_devkey_find(dev, in_key); + if (!devkey) + list_add_rcu(&next->devkey.list, &dev->dev.keys); + else + kfree(next); + + spin_unlock_bh(&dev->lock); + } + + return 0; +} + +static int +llsec_update_devkey_info(struct mac802154_llsec_device *dev, + const struct ieee802154_llsec_key_id *in_key, + u32 frame_counter) +{ + struct mac802154_llsec_device_key *devkey = NULL; + + if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RESTRICT) { + devkey = llsec_devkey_find(dev, in_key); + if (!devkey) + return -ENOENT; + } + + if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RECORD) { + int rc = llsec_update_devkey_record(dev, in_key); + + if (rc < 0) + return rc; + } + + spin_lock_bh(&dev->lock); + + if ((!devkey && frame_counter < dev->dev.frame_counter) || + (devkey && frame_counter < devkey->devkey.frame_counter)) { + spin_unlock_bh(&dev->lock); + return -EINVAL; + } + + if (devkey) + devkey->devkey.frame_counter = frame_counter + 1; + else + dev->dev.frame_counter = frame_counter + 1; + + spin_unlock_bh(&dev->lock); + + return 0; +} + +int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb) +{ + struct ieee802154_hdr hdr; + struct mac802154_llsec_key *key; + struct ieee802154_llsec_key_id key_id; + struct mac802154_llsec_device *dev; + struct ieee802154_llsec_seclevel seclevel; + int err; + __le64 dev_addr; + u32 frame_ctr; + + if (ieee802154_hdr_peek(skb, &hdr) < 0) + return -EINVAL; + if (!hdr.fc.security_enabled) + return 0; + if (hdr.fc.version == 0) + return -EINVAL; + + read_lock_bh(&sec->lock); + if (!sec->params.enabled) { + read_unlock_bh(&sec->lock); + return -EINVAL; + } + read_unlock_bh(&sec->lock); + + rcu_read_lock(); + + key = llsec_lookup_key(sec, &hdr, &hdr.source, &key_id); + if (!key) { + err = -ENOKEY; + goto fail; + } + + dev = llsec_lookup_dev(sec, &hdr.source); + if (!dev) { + err = -EINVAL; + goto fail_dev; + } + + if (llsec_lookup_seclevel(sec, hdr.fc.type, 0, &seclevel) < 0) { + err = -EINVAL; + goto fail_dev; + } + + if (!(seclevel.sec_levels & BIT(hdr.sec.level)) && + (hdr.sec.level == 0 && seclevel.device_override && + !dev->dev.seclevel_exempt)) { + err = -EINVAL; + goto fail_dev; + } + + frame_ctr = le32_to_cpu(hdr.sec.frame_counter); + + if (frame_ctr == 0xffffffff) { + err = -EOVERFLOW; + goto fail_dev; + } + + err = llsec_update_devkey_info(dev, &key_id, frame_ctr); + if (err) + goto fail_dev; + + dev_addr = dev->dev.hwaddr; + + rcu_read_unlock(); + + err = llsec_do_decrypt(skb, sec, &hdr, key, dev_addr); + llsec_key_put(key); + return err; + +fail_dev: + llsec_key_put(key); +fail: + rcu_read_unlock(); + return err; +} diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h new file mode 100644 index 000000000000..950578e1d7be --- /dev/null +++ b/net/mac802154/llsec.h @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2014 Fraunhofer ITWM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> + */ + +#ifndef MAC802154_LLSEC_H +#define MAC802154_LLSEC_H + +#include <linux/slab.h> +#include <linux/hashtable.h> +#include <linux/crypto.h> +#include <linux/kref.h> +#include <linux/spinlock.h> +#include <net/af_ieee802154.h> +#include <net/ieee802154_netdev.h> + +struct mac802154_llsec_key { + struct ieee802154_llsec_key key; + + /* one tfm for each authsize (4/8/16) */ + struct crypto_aead *tfm[3]; + struct crypto_blkcipher *tfm0; + + struct kref ref; +}; + +struct mac802154_llsec_device_key { + struct ieee802154_llsec_device_key devkey; + + struct rcu_head rcu; +}; + +struct mac802154_llsec_device { + struct ieee802154_llsec_device dev; + + struct hlist_node bucket_s; + struct hlist_node bucket_hw; + + /* protects dev.frame_counter and the elements of dev.keys */ + spinlock_t lock; + + struct rcu_head rcu; +}; + +struct mac802154_llsec_seclevel { + struct ieee802154_llsec_seclevel level; + + struct rcu_head rcu; +}; + +struct mac802154_llsec { + struct ieee802154_llsec_params params; + struct ieee802154_llsec_table table; + + DECLARE_HASHTABLE(devices_short, 6); + DECLARE_HASHTABLE(devices_hw, 6); + + /* protects params, all other fields are fine with RCU */ + rwlock_t lock; +}; + +void mac802154_llsec_init(struct mac802154_llsec *sec); +void mac802154_llsec_destroy(struct mac802154_llsec *sec); + +int mac802154_llsec_get_params(struct mac802154_llsec *sec, + struct ieee802154_llsec_params *params); +int mac802154_llsec_set_params(struct mac802154_llsec *sec, + const struct ieee802154_llsec_params *params, + int changed); + +int mac802154_llsec_key_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key); +int mac802154_llsec_key_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *key); + +int mac802154_llsec_dev_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_device *dev); +int mac802154_llsec_dev_del(struct mac802154_llsec *sec, + __le64 device_addr); + +int mac802154_llsec_devkey_add(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key); +int mac802154_llsec_devkey_del(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key); + +int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl); +int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl); + +int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb); +int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb); + +#endif /* MAC802154_LLSEC_H */ diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index 28ef59c566e6..762a6f849c6b 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -23,8 +23,12 @@ #ifndef MAC802154_H #define MAC802154_H +#include <linux/mutex.h> +#include <net/mac802154.h> #include <net/ieee802154_netdev.h> +#include "llsec.h" + /* mac802154 device private data */ struct mac802154_priv { struct ieee802154_dev hw; @@ -90,6 +94,13 @@ struct mac802154_sub_if_data { u8 bsn; /* MAC DSN field */ u8 dsn; + + /* protects sec from concurrent access by netlink. access by + * encrypt/decrypt/header_create safe without additional protection. + */ + struct mutex sec_mtx; + + struct mac802154_llsec sec; }; #define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw) @@ -125,4 +136,37 @@ int mac802154_set_mac_params(struct net_device *dev, void mac802154_get_mac_params(struct net_device *dev, struct ieee802154_mac_params *params); +int mac802154_get_params(struct net_device *dev, + struct ieee802154_llsec_params *params); +int mac802154_set_params(struct net_device *dev, + const struct ieee802154_llsec_params *params, + int changed); + +int mac802154_add_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key); +int mac802154_del_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id); + +int mac802154_add_dev(struct net_device *dev, + const struct ieee802154_llsec_device *llsec_dev); +int mac802154_del_dev(struct net_device *dev, __le64 dev_addr); + +int mac802154_add_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); +int mac802154_del_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); + +int mac802154_add_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); +int mac802154_del_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); + +void mac802154_lock_table(struct net_device *dev); +void mac802154_get_table(struct net_device *dev, + struct ieee802154_llsec_table **t); +void mac802154_unlock_table(struct net_device *dev); + #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index d40c0928bc62..bf809131eef7 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -40,6 +40,9 @@ static int mac802154_mlme_start_req(struct net_device *dev, u8 pan_coord, u8 blx, u8 coord_realign) { + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + int rc = 0; + BUG_ON(addr->mode != IEEE802154_ADDR_SHORT); mac802154_dev_set_pan_id(dev, addr->pan_id); @@ -47,12 +50,31 @@ static int mac802154_mlme_start_req(struct net_device *dev, mac802154_dev_set_ieee_addr(dev); mac802154_dev_set_page_channel(dev, page, channel); + if (ops->llsec) { + struct ieee802154_llsec_params params; + int changed = 0; + + params.coord_shortaddr = addr->short_addr; + changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR; + + params.pan_id = addr->pan_id; + changed |= IEEE802154_LLSEC_PARAM_PAN_ID; + + params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr); + changed |= IEEE802154_LLSEC_PARAM_HWADDR; + + params.coord_hwaddr = params.hwaddr; + changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR; + + rc = ops->llsec->set_params(dev, ¶ms, changed); + } + /* FIXME: add validation for unused parameters to be sane * for SoftMAC */ ieee802154_nl_start_confirm(dev, IEEE802154_SUCCESS); - return 0; + return rc; } static struct wpan_phy *mac802154_get_phy(const struct net_device *dev) @@ -64,6 +86,22 @@ static struct wpan_phy *mac802154_get_phy(const struct net_device *dev) return to_phy(get_device(&priv->hw->phy->dev)); } +static struct ieee802154_llsec_ops mac802154_llsec_ops = { + .get_params = mac802154_get_params, + .set_params = mac802154_set_params, + .add_key = mac802154_add_key, + .del_key = mac802154_del_key, + .add_dev = mac802154_add_dev, + .del_dev = mac802154_del_dev, + .add_devkey = mac802154_add_devkey, + .del_devkey = mac802154_del_devkey, + .add_seclevel = mac802154_add_seclevel, + .del_seclevel = mac802154_del_seclevel, + .lock_table = mac802154_lock_table, + .get_table = mac802154_get_table, + .unlock_table = mac802154_unlock_table, +}; + struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = { .get_phy = mac802154_get_phy, }; @@ -75,6 +113,8 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .get_short_addr = mac802154_dev_get_short_addr, .get_dsn = mac802154_dev_get_dsn, + .llsec = &mac802154_llsec_ops, + .set_mac_params = mac802154_set_mac_params, .get_mac_params = mac802154_get_mac_params, }; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index f0991f2344d4..15aa2f2b03a7 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -213,3 +213,190 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) } else mutex_unlock(&priv->hw->phy->pib_lock); } + + +int mac802154_get_params(struct net_device *dev, + struct ieee802154_llsec_params *params) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_get_params(&priv->sec, params); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_set_params(struct net_device *dev, + const struct ieee802154_llsec_params *params, + int changed) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_set_params(&priv->sec, params, changed); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_key_add(&priv->sec, id, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_key_del(&priv->sec, id); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_dev(struct net_device *dev, + const struct ieee802154_llsec_device *llsec_dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_dev_add(&priv->sec, llsec_dev); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_dev(struct net_device *dev, __le64 dev_addr) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_dev_del(&priv->sec, dev_addr); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_devkey_add(&priv->sec, device_addr, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_devkey_del(&priv->sec, device_addr, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_seclevel_add(&priv->sec, sl); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_seclevel_del(&priv->sec, sl); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +void mac802154_lock_table(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); +} + +void mac802154_get_table(struct net_device *dev, + struct ieee802154_llsec_table **t) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + *t = &priv->sec.table; +} + +void mac802154_unlock_table(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_unlock(&priv->sec_mtx); +} diff --git a/net/mac802154/monitor.c b/net/mac802154/monitor.c index 434a26f76a80..a68230e2b25f 100644 --- a/net/mac802154/monitor.c +++ b/net/mac802154/monitor.c @@ -70,7 +70,8 @@ void mac802154_monitors_rx(struct mac802154_priv *priv, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(sdata, &priv->slaves, list) { - if (sdata->type != IEEE802154_DEV_MONITOR) + if (sdata->type != IEEE802154_DEV_MONITOR || + !netif_running(sdata->dev)) continue; skb2 = skb_clone(skb, GFP_ATOMIC); diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 03855b0677cc..7f820a108a9c 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -59,27 +59,28 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi) skb->protocol = htons(ETH_P_IEEE802154); skb_reset_mac_header(skb); - BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb)); - if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) { u16 crc; if (skb->len < 2) { pr_debug("got invalid frame\n"); - goto out; + goto fail; } crc = crc_ccitt(0, skb->data, skb->len); if (crc) { pr_debug("CRC mismatch\n"); - goto out; + goto fail; } skb_trim(skb, skb->len - 2); /* CRC */ } mac802154_monitors_rx(priv, skb); mac802154_wpans_rx(priv, skb); -out: - dev_kfree_skb(skb); + + return; + +fail: + kfree_skb(skb); } static void mac802154_rx_worker(struct work_struct *work) diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 1df7a6a57386..3c3069fd6971 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -35,6 +35,28 @@ #include "mac802154.h" +static int mac802154_wpan_update_llsec(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + int rc = 0; + + if (ops->llsec) { + struct ieee802154_llsec_params params; + int changed = 0; + + params.pan_id = priv->pan_id; + changed |= IEEE802154_LLSEC_PARAM_PAN_ID; + + params.hwaddr = priv->extended_addr; + changed |= IEEE802154_LLSEC_PARAM_HWADDR; + + rc = ops->llsec->set_params(dev, ¶ms, changed); + } + + return rc; +} + static int mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -81,7 +103,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) priv->pan_id = cpu_to_le16(sa->addr.pan_id); priv->short_addr = cpu_to_le16(sa->addr.short_addr); - err = 0; + err = mac802154_wpan_update_llsec(dev); break; } @@ -99,7 +121,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) /* FIXME: validate addr */ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); mac802154_dev_set_ieee_addr(dev); - return 0; + return mac802154_wpan_update_llsec(dev); } int mac802154_set_mac_params(struct net_device *dev, @@ -124,7 +146,7 @@ void mac802154_get_mac_params(struct net_device *dev, mutex_unlock(&priv->hw->slaves_mtx); } -int mac802154_wpan_open(struct net_device *dev) +static int mac802154_wpan_open(struct net_device *dev) { int rc; struct mac802154_sub_if_data *priv = netdev_priv(dev); @@ -183,6 +205,38 @@ out: return rc; } +static int mac802154_set_header_security(struct mac802154_sub_if_data *priv, + struct ieee802154_hdr *hdr, + const struct ieee802154_mac_cb *cb) +{ + struct ieee802154_llsec_params params; + u8 level; + + mac802154_llsec_get_params(&priv->sec, ¶ms); + + if (!params.enabled && cb->secen_override && cb->secen) + return -EINVAL; + if (!params.enabled || + (cb->secen_override && !cb->secen) || + !params.out_level) + return 0; + if (cb->seclevel_override && !cb->seclevel) + return -EINVAL; + + level = cb->seclevel_override ? cb->seclevel : params.out_level; + + hdr->fc.security_enabled = 1; + hdr->sec.level = level; + hdr->sec.key_id_mode = params.out_key.mode; + if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX) + hdr->sec.short_src = params.out_key.short_source; + else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX) + hdr->sec.extended_src = params.out_key.extended_source; + hdr->sec.key_id = params.out_key.id; + + return 0; +} + static int mac802154_header_create(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -192,15 +246,20 @@ static int mac802154_header_create(struct sk_buff *skb, { struct ieee802154_hdr hdr; struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct ieee802154_mac_cb *cb = mac_cb(skb); int hlen; if (!daddr) return -EINVAL; memset(&hdr.fc, 0, sizeof(hdr.fc)); - hdr.fc.type = mac_cb_type(skb); - hdr.fc.security_enabled = mac_cb_is_secen(skb); - hdr.fc.ack_request = mac_cb_is_ackreq(skb); + hdr.fc.type = cb->type; + hdr.fc.security_enabled = cb->secen; + hdr.fc.ack_request = cb->ackreq; + hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + + if (mac802154_set_header_security(priv, &hdr, cb) < 0) + return -EINVAL; if (!saddr) { spin_lock_bh(&priv->mib_lock); @@ -231,7 +290,7 @@ static int mac802154_header_create(struct sk_buff *skb, skb_reset_mac_header(skb); skb->mac_len = hlen; - if (hlen + len + 2 > dev->mtu) + if (len > ieee802154_max_payload(&hdr)) return -EMSGSIZE; return hlen; @@ -257,6 +316,7 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) { struct mac802154_sub_if_data *priv; u8 chan, page; + int rc; priv = netdev_priv(dev); @@ -272,6 +332,13 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + rc = mac802154_llsec_encrypt(&priv->sec, skb); + if (rc) { + pr_warn("encryption failed: %i\n", rc); + kfree_skb(skb); + return NETDEV_TX_OK; + } + skb->skb_iif = dev->ifindex; dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; @@ -292,6 +359,15 @@ static const struct net_device_ops mac802154_wpan_ops = { .ndo_set_mac_address = mac802154_wpan_mac_addr, }; +static void mac802154_wpan_free(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + mac802154_llsec_destroy(&priv->sec); + + free_netdev(dev); +} + void mac802154_wpan_setup(struct net_device *dev) { struct mac802154_sub_if_data *priv; @@ -301,14 +377,14 @@ void mac802154_wpan_setup(struct net_device *dev) dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN; dev->header_ops = &mac802154_header_ops; - dev->needed_tailroom = 2; /* FCS */ + dev->needed_tailroom = 2 + 16; /* FCS + MIC */ dev->mtu = IEEE802154_MTU; dev->tx_queue_len = 300; dev->type = ARPHRD_IEEE802154; dev->flags = IFF_NOARP | IFF_BROADCAST; dev->watchdog_timeo = 0; - dev->destructor = free_netdev; + dev->destructor = mac802154_wpan_free; dev->netdev_ops = &mac802154_wpan_ops; dev->ml_priv = &mac802154_mlme_wpan; @@ -319,6 +395,7 @@ void mac802154_wpan_setup(struct net_device *dev) priv->page = 0; spin_lock_init(&priv->mib_lock); + mutex_init(&priv->sec_mtx); get_random_bytes(&priv->bsn, 1); get_random_bytes(&priv->dsn, 1); @@ -331,6 +408,8 @@ void mac802154_wpan_setup(struct net_device *dev) priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + + mac802154_llsec_init(&priv->sec); } static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) @@ -339,9 +418,11 @@ static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) } static int -mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) +mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb, + const struct ieee802154_hdr *hdr) { __le16 span, sshort; + int rc; pr_debug("getting packet via slave interface %s\n", sdata->dev->name); @@ -388,15 +469,22 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) skb->dev = sdata->dev; + rc = mac802154_llsec_decrypt(&sdata->sec, skb); + if (rc) { + pr_debug("decryption failed: %i\n", rc); + kfree_skb(skb); + return NET_RX_DROP; + } + sdata->dev->stats.rx_packets++; sdata->dev->stats.rx_bytes += skb->len; - switch (mac_cb_type(skb)) { + switch (mac_cb(skb)->type) { case IEEE802154_FC_TYPE_DATA: return mac802154_process_data(sdata->dev, skb); default: pr_warn("ieee802154: bad frame received (type = %d)\n", - mac_cb_type(skb)); + mac_cb(skb)->type); kfree_skb(skb); return NET_RX_DROP; } @@ -419,62 +507,58 @@ static void mac802154_print_addr(const char *name, } } -static int mac802154_parse_frame_start(struct sk_buff *skb) +static int mac802154_parse_frame_start(struct sk_buff *skb, + struct ieee802154_hdr *hdr) { int hlen; - struct ieee802154_hdr hdr; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); - hlen = ieee802154_hdr_pull(skb, &hdr); + hlen = ieee802154_hdr_pull(skb, hdr); if (hlen < 0) return -EINVAL; skb->mac_len = hlen; - pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc), - hdr.seq); - - mac_cb(skb)->flags = hdr.fc.type; + pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr->fc), + hdr->seq); - if (hdr.fc.ack_request) - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; - if (hdr.fc.security_enabled) - mac_cb(skb)->flags |= MAC_CB_FLAG_SECEN; + cb->type = hdr->fc.type; + cb->ackreq = hdr->fc.ack_request; + cb->secen = hdr->fc.security_enabled; - mac802154_print_addr("destination", &hdr.dest); - mac802154_print_addr("source", &hdr.source); + mac802154_print_addr("destination", &hdr->dest); + mac802154_print_addr("source", &hdr->source); - mac_cb(skb)->source = hdr.source; - mac_cb(skb)->dest = hdr.dest; + cb->source = hdr->source; + cb->dest = hdr->dest; - if (hdr.fc.security_enabled) { + if (hdr->fc.security_enabled) { u64 key; - pr_debug("seclevel %i\n", hdr.sec.level); + pr_debug("seclevel %i\n", hdr->sec.level); - switch (hdr.sec.key_id_mode) { + switch (hdr->sec.key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: pr_debug("implicit key\n"); break; case IEEE802154_SCF_KEY_INDEX: - pr_debug("key %02x\n", hdr.sec.key_id); + pr_debug("key %02x\n", hdr->sec.key_id); break; case IEEE802154_SCF_KEY_SHORT_INDEX: pr_debug("key %04x:%04x %02x\n", - le32_to_cpu(hdr.sec.short_src) >> 16, - le32_to_cpu(hdr.sec.short_src) & 0xffff, - hdr.sec.key_id); + le32_to_cpu(hdr->sec.short_src) >> 16, + le32_to_cpu(hdr->sec.short_src) & 0xffff, + hdr->sec.key_id); break; case IEEE802154_SCF_KEY_HW_INDEX: - key = swab64((__force u64) hdr.sec.extended_src); + key = swab64((__force u64) hdr->sec.extended_src); pr_debug("key source %8phC %02x\n", &key, - hdr.sec.key_id); + hdr->sec.key_id); break; } - - return -EINVAL; } return 0; @@ -483,10 +567,10 @@ static int mac802154_parse_frame_start(struct sk_buff *skb) void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) { int ret; - struct sk_buff *sskb; struct mac802154_sub_if_data *sdata; + struct ieee802154_hdr hdr; - ret = mac802154_parse_frame_start(skb); + ret = mac802154_parse_frame_start(skb, &hdr); if (ret) { pr_debug("got invalid frame\n"); return; @@ -494,12 +578,16 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(sdata, &priv->slaves, list) { - if (sdata->type != IEEE802154_DEV_WPAN) + if (sdata->type != IEEE802154_DEV_WPAN || + !netif_running(sdata->dev)) continue; - sskb = skb_clone(skb, GFP_ATOMIC); - if (sskb) - mac802154_subif_frame(sdata, sskb); + mac802154_subif_frame(sdata, skb, &hdr); + skb = NULL; + break; } rcu_read_unlock(); + + if (skb) + kfree_skb(skb); } diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 851cd880b0c0..6b38d083e1c9 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_MPLS))) goto out; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 117208321f16..ec8114fae50b 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -271,10 +271,7 @@ ip_set_free(void *members) { pr_debug("%p: free with %s\n", members, is_vmalloc_addr(members) ? "vfree" : "kfree"); - if (is_vmalloc_addr(members)) - vfree(members); - else - kfree(members); + kvfree(members); } EXPORT_SYMBOL_GPL(ip_set_free); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3d2d2c8108ca..e6836755c45d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -97,7 +97,7 @@ const char *ip_vs_proto_name(unsigned int proto) return "ICMPv6"; #endif default: - sprintf(buf, "IP_%d", proto); + sprintf(buf, "IP_%u", proto); return buf; } } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index c47444e4cf8c..73ba1cc7a88d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(iph); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); @@ -883,10 +883,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -974,7 +974,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp_v6(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 52ca952b802c..09096a670c45 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -358,6 +358,19 @@ out: rcu_read_unlock(); } +struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + if (nat) + return nat; + + if (!nf_ct_is_confirmed(ct)) + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + + return nat; +} +EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add); + unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, @@ -368,14 +381,9 @@ nf_nat_setup_info(struct nf_conn *ct, struct nf_conn_nat *nat; /* nat helper or nfctnetlink also setup binding */ - nat = nfct_nat(ct); - if (!nat) { - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3fd159db9f06..624e083125b9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) return ERR_PTR(-EAFNOSUPPORT); } +static void nft_ctx_init(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nft_af_info *afi, + struct nft_table *table, + struct nft_chain *chain, + const struct nlattr * const *nla) +{ + ctx->net = sock_net(skb->sk); + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; + ctx->nla = nla; + ctx->portid = NETLINK_CB(skb).portid; + ctx->report = nlmsg_report(nlh); + ctx->seq = nlh->nlmsg_seq; +} + +static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, + u32 size) +{ + struct nft_trans *trans; + + trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL); + if (trans == NULL) + return NULL; + + trans->msg_type = msg_type; + trans->ctx = *ctx; + + return trans; +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + list_del(&trans->list); + kfree(trans); +} + /* * Tables */ @@ -197,20 +236,13 @@ nla_put_failure: return -1; } -static int nf_tables_table_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - int event, int family) +static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - u32 seq = nlh ? nlh->nlmsg_seq : 0; - struct net *net = oskb ? sock_net(oskb->sk) : &init_net; - bool report; int err; - report = nlh ? nlmsg_report(nlh) : false; - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb, if (skb == NULL) goto err; - err = nf_tables_fill_table_info(skb, portid, seq, event, 0, - family, table); + err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0, + ctx->afi->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -269,6 +303,9 @@ done: return skb->len; } +/* Internal table flags */ +#define NFT_TABLE_INACTIVE (1 << 15) + static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -295,6 +332,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -343,7 +382,7 @@ err: return err; } -static int nf_tables_table_disable(const struct nft_af_info *afi, +static void nf_tables_table_disable(const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; @@ -353,45 +392,63 @@ static int nf_tables_table_disable(const struct nft_af_info *afi, nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); } - - return 0; } -static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct nft_af_info *afi, struct nft_table *table) +static int nf_tables_updtable(struct nft_ctx *ctx) { - const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - int family = nfmsg->nfgen_family, ret = 0; + struct nft_trans *trans; + u32 flags; + int ret = 0; - if (nla[NFTA_TABLE_FLAGS]) { - u32 flags; + if (!ctx->nla[NFTA_TABLE_FLAGS]) + return 0; - flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); - if (flags & ~NFT_TABLE_F_DORMANT) - return -EINVAL; + flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, + sizeof(struct nft_trans_table)); + if (trans == NULL) + return -ENOMEM; - if ((flags & NFT_TABLE_F_DORMANT) && - !(table->flags & NFT_TABLE_F_DORMANT)) { - ret = nf_tables_table_disable(afi, table); - if (ret >= 0) - table->flags |= NFT_TABLE_F_DORMANT; - } else if (!(flags & NFT_TABLE_F_DORMANT) && - table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(afi, table); - if (ret >= 0) - table->flags &= ~NFT_TABLE_F_DORMANT; + if ((flags & NFT_TABLE_F_DORMANT) && + !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { + nft_trans_table_enable(trans) = false; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + ctx->table->flags & NFT_TABLE_F_DORMANT) { + ret = nf_tables_table_enable(ctx->afi, ctx->table); + if (ret >= 0) { + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + nft_trans_table_enable(trans) = true; } - if (ret < 0) - goto err; } + if (ret < 0) + goto err; - nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + nft_trans_table_update(trans) = true; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; err: + nft_trans_destroy(trans); return ret; } +static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWTABLE) + ctx->table->flags |= NFT_TABLE_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; +} + static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -403,6 +460,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; u32 flags = 0; + struct nft_ctx ctx; + int err; afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) @@ -417,11 +476,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, } if (table != NULL) { + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); + + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + return nf_tables_updtable(&ctx); } if (nla[NFTA_TABLE_FLAGS]) { @@ -444,8 +507,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->sets); table->flags = flags; + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); + if (err < 0) { + kfree(table); + module_put(afi->owner); + return err; + } list_add_tail(&table->list, &afi->tables); - nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); return 0; } @@ -457,7 +526,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, struct nft_af_info *afi; struct nft_table *table; struct net *net = sock_net(skb->sk); - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err; + struct nft_ctx ctx; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -466,17 +536,28 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); - - if (!list_empty(&table->chains) || !list_empty(&table->sets)) + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; + if (table->use > 0) return -EBUSY; + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE); + if (err < 0) + return err; + list_del(&table->list); - nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); - kfree(table); - module_put(afi->owner); return 0; } +static void nf_tables_table_destroy(struct nft_ctx *ctx) +{ + BUG_ON(ctx->table->use > 0); + + kfree(ctx->table); + module_put(ctx->afi->owner); +} + int nft_register_chain_type(const struct nf_chain_type *ctype) { int err = 0; @@ -541,7 +622,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, - [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, + [NFTA_CHAIN_TYPE] = { .type = NLA_STRING }, [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, }; @@ -637,21 +718,13 @@ nla_put_failure: return -1; } -static int nf_tables_chain_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - const struct nft_chain *chain, - int event, int family) +static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - struct net *net = oskb ? sock_net(oskb->sk) : &init_net; - u32 seq = nlh ? nlh->nlmsg_seq : 0; - bool report; int err; - report = nlh ? nlmsg_report(nlh) : false; - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -659,18 +732,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb, if (skb == NULL) goto err; - err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, - table, chain); + err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0, + ctx->afi->family, ctx->table, + ctx->chain); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -740,10 +816,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -767,8 +847,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; -static int -nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) +static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) { struct nlattr *tb[NFTA_COUNTER_MAX+1]; struct nft_stats __percpu *newstats; @@ -777,14 +856,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); if (err < 0) - return err; + return ERR_PTR(err); if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) - return -EINVAL; + return ERR_PTR(-EINVAL); newstats = alloc_percpu(struct nft_stats); if (newstats == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* Restore old counters on this cpu, no problem. Per-cpu statistics * are not exposed to userspace. @@ -793,6 +872,12 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + return newstats; +} + +static void nft_chain_stats_replace(struct nft_base_chain *chain, + struct nft_stats __percpu *newstats) +{ if (chain->stats) { struct nft_stats __percpu *oldstats = nft_dereference(chain->stats); @@ -802,17 +887,43 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) free_percpu(oldstats); } else rcu_assign_pointer(chain->stats, newstats); +} + +static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWCHAIN) + ctx->chain->flags |= NFT_CHAIN_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; } +static void nf_tables_chain_destroy(struct nft_chain *chain) +{ + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) { + module_put(nft_base_chain(chain)->type->owner); + free_percpu(nft_base_chain(chain)->stats); + kfree(nft_base_chain(chain)); + } else { + kfree(chain); + } +} + static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nlattr * uninitialized_var(name); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct nft_base_chain *basechain = NULL; @@ -822,8 +933,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, u8 policy = NF_ACCEPT; u64 handle = 0; unsigned int i; + struct nft_stats __percpu *stats; int err; bool create; + struct nft_ctx ctx; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -869,6 +982,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, } if (chain != NULL) { + struct nft_stats *stats = NULL; + struct nft_trans *trans; + + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -882,19 +1000,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (!(chain->flags & NFT_BASE_CHAIN)) return -EOPNOTSUPP; - err = nf_tables_counters(nft_base_chain(chain), - nla[NFTA_CHAIN_COUNTERS]); - if (err < 0) - return err; + stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); + if (IS_ERR(stats)) + return PTR_ERR(stats); } - if (nla[NFTA_CHAIN_POLICY]) - nft_base_chain(chain)->policy = policy; + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN, + sizeof(struct nft_trans_chain)); + if (trans == NULL) + return -ENOMEM; + + nft_trans_chain_stats(trans) = stats; + nft_trans_chain_update(trans) = true; - if (nla[NFTA_CHAIN_HANDLE] && name) - nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + if (nla[NFTA_CHAIN_POLICY]) + nft_trans_chain_policy(trans) = policy; + else + nft_trans_chain_policy(trans) = -1; - goto notify; + if (nla[NFTA_CHAIN_HANDLE] && name) { + nla_strlcpy(nft_trans_chain_name(trans), name, + NFT_CHAIN_MAXNAMELEN); + } + list_add_tail(&trans->list, &net->nft.commit_list); + return 0; } if (table->use == UINT_MAX) @@ -939,23 +1069,21 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return -ENOMEM; if (nla[NFTA_CHAIN_COUNTERS]) { - err = nf_tables_counters(basechain, - nla[NFTA_CHAIN_COUNTERS]); - if (err < 0) { + stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); + if (IS_ERR(stats)) { module_put(type->owner); kfree(basechain); - return err; + return PTR_ERR(stats); } + basechain->stats = stats; } else { - struct nft_stats __percpu *newstats; - - newstats = alloc_percpu(struct nft_stats); - if (newstats == NULL) { + stats = alloc_percpu(struct nft_stats); + if (IS_ERR(stats)) { module_put(type->owner); kfree(basechain); - return -ENOMEM; + return PTR_ERR(stats); } - rcu_assign_pointer(basechain->stats, newstats); + rcu_assign_pointer(basechain->stats, stats); } basechain->type = type; @@ -992,31 +1120,27 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (!(table->flags & NFT_TABLE_F_DORMANT) && chain->flags & NFT_BASE_CHAIN) { err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); - if (err < 0) { - module_put(basechain->type->owner); - free_percpu(basechain->stats); - kfree(basechain); - return err; - } + if (err < 0) + goto err1; } - list_add_tail(&chain->list, &table->chains); - table->use++; -notify: - nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, - family); - return 0; -} -static void nf_tables_chain_destroy(struct nft_chain *chain) -{ - BUG_ON(chain->use > 0); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN); + if (err < 0) + goto err2; - if (chain->flags & NFT_BASE_CHAIN) { - module_put(nft_base_chain(chain)->type->owner); - free_percpu(nft_base_chain(chain)->stats); - kfree(nft_base_chain(chain)); - } else - kfree(chain); + table->use++; + list_add_tail(&chain->list, &table->chains); + return 0; +err2: + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(chain)->ops, + afi->nops); + } +err1: + nf_tables_chain_destroy(chain); + return err; } static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, @@ -1024,11 +1148,13 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nft_ctx ctx; + int err; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1037,48 +1163,27 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); - - if (!list_empty(&chain->rules) || chain->use > 0) + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; + if (chain->use > 0) return -EBUSY; - list_del(&chain->list); - table->use--; - - if (!(table->flags & NFT_TABLE_F_DORMANT) && - chain->flags & NFT_BASE_CHAIN) - nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); - - nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, - family); - - /* Make sure all rule references are gone before this is released */ - synchronize_rcu(); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN); + if (err < 0) + return err; - nf_tables_chain_destroy(chain); + table->use--; + list_del(&chain->list); return 0; } -static void nft_ctx_init(struct nft_ctx *ctx, - const struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nft_af_info *afi, - const struct nft_table *table, - const struct nft_chain *chain, - const struct nlattr * const *nla) -{ - ctx->net = sock_net(skb->sk); - ctx->skb = skb; - ctx->nlh = nlh; - ctx->afi = afi; - ctx->table = table; - ctx->chain = chain; - ctx->nla = nla; -} - /* * Expressions */ @@ -1093,7 +1198,10 @@ static void nft_ctx_init(struct nft_ctx *ctx, int nft_register_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&type->list, &nf_tables_expressions); + if (type->family == NFPROTO_UNSPEC) + list_add_tail(&type->list, &nf_tables_expressions); + else + list_add(&type->list, &nf_tables_expressions); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -1361,22 +1469,15 @@ nla_put_failure: return -1; } -static int nf_tables_rule_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - const struct nft_chain *chain, +static int nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, - int event, u32 flags, int family) + int event) { struct sk_buff *skb; - u32 portid = NETLINK_CB(oskb).portid; - struct net *net = oskb ? sock_net(oskb->sk) : &init_net; - u32 seq = nlh->nlmsg_seq; - bool report; int err; - report = nlmsg_report(nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -1384,18 +1485,21 @@ static int nf_tables_rule_notify(const struct sk_buff *oskb, if (skb == NULL) goto err; - err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, - family, table, chain, rule); + err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0, + ctx->afi->family, ctx->table, + ctx->chain, rule); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -1511,10 +1615,14 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); if (IS_ERR(chain)) return PTR_ERR(chain); + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) @@ -1554,37 +1662,36 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -#define NFT_RULE_MAXEXPRS 128 - -static struct nft_expr_info *info; - -static struct nft_rule_trans * -nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule) +static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, + struct nft_rule *rule) { - struct nft_rule_trans *rupd; + struct nft_trans *trans; - rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); - if (rupd == NULL) - return NULL; + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule)); + if (trans == NULL) + return NULL; - rupd->ctx = *ctx; - rupd->rule = rule; - list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + nft_trans_rule(trans) = rule; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); - return rupd; + return trans; } +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; - struct nft_rule_trans *repl = NULL; + struct nft_trans *trans = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; @@ -1682,8 +1789,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (nft_rule_is_active_next(net, old_rule)) { - repl = nf_tables_trans_add(&ctx, old_rule); - if (repl == NULL) { + trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, + old_rule); + if (trans == NULL) { err = -ENOMEM; goto err2; } @@ -1705,19 +1813,19 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - if (nf_tables_trans_add(&ctx, rule) == NULL) { + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { err = -ENOMEM; goto err3; } + chain->use++; return 0; err3: list_del_rcu(&rule->list); - if (repl) { - list_del_rcu(&repl->rule->list); - list_del(&repl->list); - nft_rule_clear(net, repl->rule); - kfree(repl); + if (trans) { + list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_clear(net, nft_trans_rule(trans)); + nft_trans_destroy(trans); } err2: nf_tables_rule_destroy(&ctx, rule); @@ -1734,9 +1842,10 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ if (nft_rule_is_active_next(ctx->net, rule)) { - if (nf_tables_trans_add(ctx, rule) == NULL) + if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL) return -ENOMEM; nft_rule_disactivate_next(ctx->net, rule); + ctx->chain->use--; return 0; } return -ENOENT; @@ -1760,9 +1869,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); - const struct nft_table *table; + struct nft_table *table; struct nft_chain *chain = NULL; struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; @@ -1775,6 +1884,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; if (nla[NFTA_RULE_CHAIN]) { chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); @@ -1807,88 +1918,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, return err; } -static int nf_tables_commit(struct sk_buff *skb) -{ - struct net *net = sock_net(skb->sk); - struct nft_rule_trans *rupd, *tmp; - - /* Bump generation counter, invalidate any dump in progress */ - net->nft.genctr++; - - /* A new generation has just started */ - net->nft.gencursor = gencursor_next(net); - - /* Make sure all packets have left the previous generation before - * purging old rules. - */ - synchronize_rcu(); - - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - /* This rule was inactive in the past and just became active. - * Clear the next bit of the genmask since its meaning has - * changed, now it is the future. - */ - if (nft_rule_is_active(net, rupd->rule)) { - nft_rule_clear(net, rupd->rule); - nf_tables_rule_notify(skb, rupd->ctx.nlh, - rupd->ctx.table, rupd->ctx.chain, - rupd->rule, NFT_MSG_NEWRULE, 0, - rupd->ctx.afi->family); - list_del(&rupd->list); - kfree(rupd); - continue; - } - - /* This rule is in the past, get rid of it */ - list_del_rcu(&rupd->rule->list); - nf_tables_rule_notify(skb, rupd->ctx.nlh, - rupd->ctx.table, rupd->ctx.chain, - rupd->rule, NFT_MSG_DELRULE, 0, - rupd->ctx.afi->family); - } - - /* Make sure we don't see any packet traversing old rules */ - synchronize_rcu(); - - /* Now we can safely release unused old rules */ - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&rupd->ctx, rupd->rule); - list_del(&rupd->list); - kfree(rupd); - } - - return 0; -} - -static int nf_tables_abort(struct sk_buff *skb) -{ - struct net *net = sock_net(skb->sk); - struct nft_rule_trans *rupd, *tmp; - - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - if (!nft_rule_is_active_next(net, rupd->rule)) { - nft_rule_clear(net, rupd->rule); - list_del(&rupd->list); - kfree(rupd); - continue; - } - - /* This rule is inactive, get rid of it */ - list_del_rcu(&rupd->rule->list); - } - - /* Make sure we don't see any packet accessing aborted rules */ - synchronize_rcu(); - - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&rupd->ctx, rupd->rule); - list_del(&rupd->list); - kfree(rupd); - } - - return 0; -} - /* * Sets */ @@ -1912,9 +1941,18 @@ void nft_unregister_set(struct nft_set_ops *ops) } EXPORT_SYMBOL_GPL(nft_unregister_set); -static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) +/* + * Select a set implementation based on the data characteristics and the + * given policy. The total memory use might not be known if no size is + * given, in that case the amount of memory per element is used. + */ +static const struct nft_set_ops * +nft_select_set_ops(const struct nlattr * const nla[], + const struct nft_set_desc *desc, + enum nft_set_policies policy) { - const struct nft_set_ops *ops; + const struct nft_set_ops *ops, *bops; + struct nft_set_estimate est, best; u32 features; #ifdef CONFIG_MODULES @@ -1932,15 +1970,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const features &= NFT_SET_INTERVAL | NFT_SET_MAP; } - // FIXME: implement selection properly + bops = NULL; + best.size = ~0; + best.class = ~0; + list_for_each_entry(ops, &nf_tables_set_ops, list) { if ((ops->features & features) != features) continue; + if (!ops->estimate(desc, features, &est)) + continue; + + switch (policy) { + case NFT_SET_POL_PERFORMANCE: + if (est.class < best.class) + break; + if (est.class == best.class && est.size < best.size) + break; + continue; + case NFT_SET_POL_MEMORY: + if (est.size < best.size) + break; + if (est.size == best.size && est.class < best.class) + break; + continue; + default: + break; + } + if (!try_module_get(ops->owner)) continue; - return ops; + if (bops != NULL) + module_put(bops->owner); + + bops = ops; + best = est; } + if (bops != NULL) + return bops; + return ERR_PTR(-EOPNOTSUPP); } @@ -1953,6 +2021,13 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, + [NFTA_SET_POLICY] = { .type = NLA_U32 }, + [NFTA_SET_DESC] = { .type = NLA_NESTED }, + [NFTA_SET_ID] = { .type = NLA_U32 }, +}; + +static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { + [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, }; static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, @@ -1962,8 +2037,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, { struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi = NULL; - const struct nft_table *table = NULL; + struct nft_af_info *afi = NULL; + struct nft_table *table = NULL; if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); @@ -1978,6 +2053,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; } nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); @@ -1999,13 +2076,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-ENOENT); } +struct nft_set *nf_tables_set_lookup_byid(const struct net *net, + const struct nlattr *nla) +{ + struct nft_trans *trans; + u32 id = ntohl(nla_get_be32(nla)); + + list_for_each_entry(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + id == nft_trans_set_id(trans)) + return nft_trans_set(trans); + } + return ERR_PTR(-ENOENT); +} + static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) { const struct nft_set *i; const char *p; unsigned long *inuse; - unsigned int n = 0; + unsigned int n = 0, min = 0; p = strnchr(name, IFNAMSIZ, '%'); if (p != NULL) { @@ -2015,23 +2106,28 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; - +cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; if (!sscanf(i->name, name, &tmp)) continue; - if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE) + if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE) continue; - set_bit(tmp, inuse); + set_bit(tmp - min, inuse); } n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE); + if (n >= BITS_PER_BYTE * PAGE_SIZE) { + min += BITS_PER_BYTE * PAGE_SIZE; + memset(inuse, 0, PAGE_SIZE); + goto cont; + } free_page((unsigned long)inuse); } - snprintf(set->name, sizeof(set->name), name, n); + snprintf(set->name, sizeof(set->name), name, min + n); list_for_each_entry(i, &ctx->table->sets, list) { if (!strcmp(set->name, i->name)) return -ENFILE; @@ -2044,8 +2140,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, { struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; - u32 portid = NETLINK_CB(ctx->skb).portid; - u32 seq = ctx->nlh->nlmsg_seq; + struct nlattr *desc; + u32 portid = ctx->portid; + u32 seq = ctx->seq; event |= NFNL_SUBSYS_NFTABLES << 8; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), @@ -2077,6 +2174,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + desc = nla_nest_start(skb, NFTA_SET_DESC); + if (desc == NULL) + goto nla_put_failure; + if (set->size && + nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size))) + goto nla_put_failure; + nla_nest_end(skb, desc); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -2086,19 +2191,18 @@ nla_put_failure: static int nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, - int event) + int event, gfp_t gfp_flags) { struct sk_buff *skb; - u32 portid = NETLINK_CB(ctx->skb).portid; - bool report; + u32 portid = ctx->portid; int err; - report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags); if (skb == NULL) goto err; @@ -2108,8 +2212,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, + ctx->report, gfp_flags); err: if (err < 0) nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); @@ -2183,7 +2287,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); int cur_family = cb->args[3]; @@ -2260,6 +2364,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) return ret; } +#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */ + static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2289,6 +2395,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) @@ -2305,13 +2413,50 @@ err: return err; } +static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, + struct nft_set_desc *desc, + const struct nlattr *nla) +{ + struct nlattr *da[NFTA_SET_DESC_MAX + 1]; + int err; + + err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy); + if (err < 0) + return err; + + if (da[NFTA_SET_DESC_SIZE] != NULL) + desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE])); + + return 0; +} + +static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, + struct nft_set *set) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { + nft_trans_set_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); + set->flags |= NFT_SET_INACTIVE; + } + nft_trans_set(trans) = set; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_set_ops *ops; - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_set *set; @@ -2319,14 +2464,18 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, char name[IFNAMSIZ]; unsigned int size; bool create; - u32 ktype, klen, dlen, dtype, flags; + u32 ktype, dtype, flags, policy; + struct nft_set_desc desc; int err; if (nla[NFTA_SET_TABLE] == NULL || nla[NFTA_SET_NAME] == NULL || - nla[NFTA_SET_KEY_LEN] == NULL) + nla[NFTA_SET_KEY_LEN] == NULL || + nla[NFTA_SET_ID] == NULL) return -EINVAL; + memset(&desc, 0, sizeof(desc)); + ktype = NFT_DATA_VALUE; if (nla[NFTA_SET_KEY_TYPE] != NULL) { ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); @@ -2334,8 +2483,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, return -EINVAL; } - klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); - if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) + desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data)) return -EINVAL; flags = 0; @@ -2347,7 +2496,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, } dtype = 0; - dlen = 0; if (nla[NFTA_SET_DATA_TYPE] != NULL) { if (!(flags & NFT_SET_MAP)) return -EINVAL; @@ -2360,15 +2508,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (dtype != NFT_DATA_VERDICT) { if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; - dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); - if (dlen == 0 || - dlen > FIELD_SIZEOF(struct nft_data, data)) + desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); + if (desc.dlen == 0 || + desc.dlen > FIELD_SIZEOF(struct nft_data, data)) return -EINVAL; } else - dlen = sizeof(struct nft_data); + desc.dlen = sizeof(struct nft_data); } else if (flags & NFT_SET_MAP) return -EINVAL; + policy = NFT_SET_POL_PERFORMANCE; + if (nla[NFTA_SET_POLICY] != NULL) + policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + + if (nla[NFTA_SET_DESC] != NULL) { + err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]); + if (err < 0) + return err; + } + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); @@ -2399,7 +2557,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(nla); + ops = nft_select_set_ops(nla, &desc, policy); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -2420,17 +2578,22 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&set->bindings); set->ops = ops; set->ktype = ktype; - set->klen = klen; + set->klen = desc.klen; set->dtype = dtype; - set->dlen = dlen; + set->dlen = desc.dlen; set->flags = flags; + set->size = desc.size; + + err = ops->init(set, &desc, nla); + if (err < 0) + goto err2; - err = ops->init(set, nla); + err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) goto err2; list_add_tail(&set->list, &table->sets); - nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET); + table->use++; return 0; err2: @@ -2440,16 +2603,20 @@ err1: return err; } -static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +static void nft_set_destroy(struct nft_set *set) { - list_del(&set->list); - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); - set->ops->destroy(set); module_put(set->ops->owner); kfree(set); } +static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +{ + list_del(&set->list); + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + nft_set_destroy(set); +} + static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2471,10 +2638,17 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; if (!list_empty(&set->bindings)) return -EBUSY; - nf_tables_set_destroy(&ctx, set); + err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set); + if (err < 0) + return err; + + list_del(&set->list); + ctx.table->use--; return 0; } @@ -2534,7 +2708,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, { list_del(&binding->list); - if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && + !(set->flags & NFT_SET_INACTIVE)) nf_tables_set_destroy(ctx, set); } @@ -2552,16 +2727,18 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + bool trans) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; - const struct nft_table *table; + struct nft_af_info *afi; + struct nft_table *table; struct net *net = sock_net(skb->sk); afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); @@ -2571,6 +2748,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (!trans && (table->flags & NFT_TABLE_INACTIVE)) + return -ENOENT; nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; @@ -2644,13 +2823,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); + err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla, + false); if (err < 0) return err; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; event = NFT_MSG_NEWSETELEM; event |= NFNL_SUBSYS_NFTABLES << 8; @@ -2707,13 +2889,15 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) return err; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2724,7 +2908,98 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, return -EOPNOTSUPP; } -static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, +static int nf_tables_fill_setelem_info(struct sk_buff *skb, + const struct nft_ctx *ctx, u32 seq, + u32 portid, int event, u16 flags, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct nlattr *nest; + int err; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = ctx->afi->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_NAME, set->name)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + if (nest == NULL) + goto nla_put_failure; + + err = nf_tables_fill_setelem(skb, set, elem); + if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_setelem_notify(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_elem *elem, + int event, u16 flags) +{ + struct net *net = ctx->net; + u32 portid = ctx->portid; + struct sk_buff *skb; + int err; + + if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, + set, elem); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, + int msg_type, + struct nft_set *set) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem)); + if (trans == NULL) + return NULL; + + nft_trans_elem_set(trans) = set; + return trans; +} + +static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; @@ -2732,8 +3007,12 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; struct nft_set_binding *binding; enum nft_registers dreg; + struct nft_trans *trans; int err; + if (set->size && set->nelems == set->size) + return -ENFILE; + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy); if (err < 0) @@ -2786,7 +3065,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, struct nft_ctx bind_ctx = { .afi = ctx->afi, .table = ctx->table, - .chain = binding->chain, + .chain = (struct nft_chain *)binding->chain, }; err = nft_validate_data_load(&bind_ctx, dreg, @@ -2796,12 +3075,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, } } + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); + if (trans == NULL) + goto err3; + err = set->ops->insert(set, &elem); if (err < 0) - goto err3; + goto err4; + nft_trans_elem(trans) = elem; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; +err4: + kfree(trans); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_uninit(&elem.data, d2.type); @@ -2815,35 +3102,46 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; - int rem, err; + int rem, err = 0; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true); if (err < 0) return err; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); - if (IS_ERR(set)) - return PTR_ERR(set); + if (IS_ERR(set)) { + if (nla[NFTA_SET_ELEM_LIST_SET_ID]) { + set = nf_tables_set_lookup_byid(net, + nla[NFTA_SET_ELEM_LIST_SET_ID]); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr); if (err < 0) - return err; + break; + + set->nelems++; } - return 0; + return err; } -static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, +static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc desc; struct nft_set_elem elem; + struct nft_trans *trans; int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, @@ -2867,7 +3165,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err2; - set->ops->remove(set, &elem); + trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); + if (trans == NULL) + goto err2; + + nft_trans_elem(trans) = elem; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); nft_data_uninit(&elem.key, NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP) @@ -2886,9 +3189,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; - int rem, err; + int rem, err = 0; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) return err; @@ -2901,14 +3204,16 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); if (err < 0) - return err; + break; + + set->nelems--; } - return 0; + return err; } static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { - .call = nf_tables_newtable, + .call_batch = nf_tables_newtable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, @@ -2918,12 +3223,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_table_policy, }, [NFT_MSG_DELTABLE] = { - .call = nf_tables_deltable, + .call_batch = nf_tables_deltable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, [NFT_MSG_NEWCHAIN] = { - .call = nf_tables_newchain, + .call_batch = nf_tables_newchain, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, @@ -2933,7 +3238,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_DELCHAIN] = { - .call = nf_tables_delchain, + .call_batch = nf_tables_delchain, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, @@ -2953,7 +3258,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_NEWSET] = { - .call = nf_tables_newset, + .call_batch = nf_tables_newset, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, @@ -2963,12 +3268,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_policy, }, [NFT_MSG_DELSET] = { - .call = nf_tables_delset, + .call_batch = nf_tables_delset, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, [NFT_MSG_NEWSETELEM] = { - .call = nf_tables_newsetelem, + .call_batch = nf_tables_newsetelem, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, @@ -2978,12 +3283,282 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_DELSETELEM] = { - .call = nf_tables_delsetelem, + .call_batch = nf_tables_delsetelem, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, }; +static void nft_chain_commit_update(struct nft_trans *trans) +{ + struct nft_base_chain *basechain; + + if (nft_trans_chain_name(trans)[0]) + strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); + + if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN)) + return; + + basechain = nft_base_chain(trans->ctx.chain); + nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans)); + + switch (nft_trans_chain_policy(trans)) { + case NF_DROP: + case NF_ACCEPT: + basechain->policy = nft_trans_chain_policy(trans); + break; + } +} + +/* Schedule objects for release via rcu to make sure no packets are accesing + * removed rules. + */ +static void nf_tables_commit_release_rcu(struct rcu_head *rt) +{ + struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); + + switch (trans->msg_type) { + case NFT_MSG_DELTABLE: + nf_tables_table_destroy(&trans->ctx); + break; + case NFT_MSG_DELCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; + case NFT_MSG_DELRULE: + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_DELSET: + nft_set_destroy(nft_trans_set(trans)); + break; + } + kfree(trans); +} + +static int nf_tables_commit(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_trans *trans, *next; + struct nft_set *set; + + /* Bump generation counter, invalidate any dump in progress */ + net->nft.genctr++; + + /* A new generation has just started */ + net->nft.gencursor = gencursor_next(net); + + /* Make sure all packets have left the previous generation before + * purging old rules. + */ + synchronize_rcu(); + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { + if (!nft_trans_table_enable(trans)) { + nf_tables_table_disable(trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } + } else { + trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE; + } + nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELTABLE: + nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); + break; + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain_update(trans)) + nft_chain_commit_update(trans); + else + trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE; + + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELCHAIN: + nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && + trans->ctx.chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, + trans->ctx.afi->nops); + } + break; + case NFT_MSG_NEWRULE: + nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nf_tables_rule_notify(&trans->ctx, + nft_trans_rule(trans), + NFT_MSG_NEWRULE); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELRULE: + list_del_rcu(&nft_trans_rule(trans)->list); + nf_tables_rule_notify(&trans->ctx, + nft_trans_rule(trans), + NFT_MSG_DELRULE); + break; + case NFT_MSG_NEWSET: + nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE; + /* This avoids hitting -EBUSY when deleting the table + * from the transaction. + */ + if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS && + !list_empty(&nft_trans_set(trans)->bindings)) + trans->ctx.table->use--; + + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_NEWSET, GFP_KERNEL); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSET: + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_DELSET, GFP_KERNEL); + break; + case NFT_MSG_NEWSETELEM: + nf_tables_setelem_notify(&trans->ctx, + nft_trans_elem_set(trans), + &nft_trans_elem(trans), + NFT_MSG_NEWSETELEM, 0); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSETELEM: + nf_tables_setelem_notify(&trans->ctx, + nft_trans_elem_set(trans), + &nft_trans_elem(trans), + NFT_MSG_DELSETELEM, 0); + set = nft_trans_elem_set(trans); + set->ops->get(set, &nft_trans_elem(trans)); + set->ops->remove(set, &nft_trans_elem(trans)); + nft_trans_destroy(trans); + break; + } + } + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_del(&trans->list); + trans->ctx.nla = NULL; + call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); + } + + return 0; +} + +/* Schedule objects for release via rcu to make sure no packets are accesing + * aborted rules. + */ +static void nf_tables_abort_release_rcu(struct rcu_head *rt) +{ + struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); + + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + nf_tables_table_destroy(&trans->ctx); + break; + case NFT_MSG_NEWCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; + case NFT_MSG_NEWRULE: + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_NEWSET: + nft_set_destroy(nft_trans_set(trans)); + break; + } + kfree(trans); +} + +static int nf_tables_abort(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_trans *trans, *next; + struct nft_set *set; + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { + if (nft_trans_table_enable(trans)) { + nf_tables_table_disable(trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } + nft_trans_destroy(trans); + } else { + list_del(&trans->ctx.table->list); + } + break; + case NFT_MSG_DELTABLE: + list_add_tail(&trans->ctx.table->list, + &trans->ctx.afi->tables); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain_update(trans)) { + if (nft_trans_chain_stats(trans)) + free_percpu(nft_trans_chain_stats(trans)); + + nft_trans_destroy(trans); + } else { + trans->ctx.table->use--; + list_del(&trans->ctx.chain->list); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && + trans->ctx.chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, + trans->ctx.afi->nops); + } + } + break; + case NFT_MSG_DELCHAIN: + trans->ctx.table->use++; + list_add_tail(&trans->ctx.chain->list, + &trans->ctx.table->chains); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWRULE: + trans->ctx.chain->use--; + list_del_rcu(&nft_trans_rule(trans)->list); + break; + case NFT_MSG_DELRULE: + trans->ctx.chain->use++; + nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSET: + trans->ctx.table->use--; + list_del(&nft_trans_set(trans)->list); + break; + case NFT_MSG_DELSET: + trans->ctx.table->use++; + list_add_tail(&nft_trans_set(trans)->list, + &trans->ctx.table->sets); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSETELEM: + nft_trans_elem_set(trans)->nelems--; + set = nft_trans_elem_set(trans); + set->ops->get(set, &nft_trans_elem(trans)); + set->ops->remove(set, &nft_trans_elem(trans)); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSETELEM: + nft_trans_elem_set(trans)->nelems++; + nft_trans_destroy(trans); + break; + } + } + + list_for_each_entry_safe_reverse(trans, next, + &net->nft.commit_list, list) { + list_del(&trans->list); + trans->ctx.nla = NULL; + call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); + } + + return 0; +} + static const struct nfnetlink_subsystem nf_tables_subsys = { .name = "nf_tables", .subsys_id = NFNL_SUBSYS_NFTABLES, diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 23ef77c60fff..c138b8fbe280 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -399,19 +399,17 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static void nfnetlink_bind(int group) +static int nfnetlink_bind(int group) { const struct nfnetlink_subsystem *ss; int type = nfnl_group2type[group]; rcu_read_lock(); ss = nfnetlink_get_subsys(type); - if (!ss) { - rcu_read_unlock(); - request_module("nfnetlink-subsys-%d", type); - return; - } rcu_read_unlock(); + if (!ss) + request_module("nfnetlink-subsys-%d", type); + return 0; } #endif diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c7b6d466a662..2baa125c2e8d 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list); struct nf_acct { atomic64_t pkts; atomic64_t bytes; + unsigned long flags; struct list_head head; atomic_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; + char data[0]; }; +#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) + static int nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; char *acct_name; + unsigned int size = 0; + u32 flags = 0; if (!tb[NFACCT_NAME]) return -EINVAL; @@ -68,15 +74,38 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, /* reset counters if you request a replacement. */ atomic64_set(&matching->pkts, 0); atomic64_set(&matching->bytes, 0); + smp_mb__before_atomic(); + /* reset overquota flag if quota is enabled. */ + if ((matching->flags & NFACCT_F_QUOTA)) + clear_bit(NFACCT_F_OVERQUOTA, &matching->flags); return 0; } return -EBUSY; } - nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); + if (tb[NFACCT_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); + if (flags & ~NFACCT_F_QUOTA) + return -EOPNOTSUPP; + if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA) + return -EINVAL; + if (flags & NFACCT_F_OVERQUOTA) + return -EINVAL; + + size += sizeof(u64); + } + + nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL); if (nfacct == NULL) return -ENOMEM; + if (flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)nfacct->data; + + *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA])); + nfacct->flags = flags; + } + strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { @@ -117,6 +146,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); bytes = atomic64_xchg(&acct->bytes, 0); + smp_mb__before_atomic(); + if (acct->flags & NFACCT_F_QUOTA) + clear_bit(NFACCT_F_OVERQUOTA, &acct->flags); } else { pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); @@ -125,7 +157,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) goto nla_put_failure; + if (acct->flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)acct->data; + if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) || + nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) + goto nla_put_failure; + } nlmsg_end(skb, nlh); return skb->len; @@ -270,6 +308,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, [NFACCT_BYTES] = { .type = NLA_U64 }, [NFACCT_PKTS] = { .type = NLA_U64 }, + [NFACCT_FLAGS] = { .type = NLA_U32 }, + [NFACCT_QUOTA] = { .type = NLA_U64 }, }; static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { @@ -336,6 +376,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_update); +static void nfnl_overquota_report(struct nf_acct *nfacct) +{ + int ret; + struct sk_buff *skb; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (skb == NULL) + return; + + ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0, + nfacct); + if (ret <= 0) { + kfree_skb(skb); + return; + } + netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, + GFP_ATOMIC); +} + +int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) +{ + u64 now; + u64 *quota; + int ret = NFACCT_UNDERQUOTA; + + /* no place here if we don't have a quota */ + if (!(nfacct->flags & NFACCT_F_QUOTA)) + return NFACCT_NO_QUOTA; + + quota = (u64 *)nfacct->data; + now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ? + atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes); + + ret = now > *quota; + + if (now >= *quota && + !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) { + nfnl_overquota_report(nfacct); + } + + return ret; +} +EXPORT_SYMBOL_GPL(nfnl_acct_overquota); + static int __init nfnl_acct_init(void) { int ret; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bd0d41e69341..cc5603016242 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -215,22 +215,14 @@ static void nft_ct_l3proto_module_put(uint8_t family) nf_ct_l3proto_module_put(family); } -static int nft_ct_init_validate_get(const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_ct_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + int err; - if (tb[NFTA_CT_DIRECTION] != NULL) { - priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); - switch (priv->dir) { - case IP_CT_DIR_ORIGINAL: - case IP_CT_DIR_REPLY: - break; - default: - return -EINVAL; - } - } - + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { case NFT_CT_STATE: case NFT_CT_DIRECTION: @@ -262,55 +254,55 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr, return -EOPNOTSUPP; } - return 0; -} - -static int nft_ct_init_validate_set(uint32_t key) -{ - switch (key) { - case NFT_CT_MARK: - break; - default: - return -EOPNOTSUPP; + if (tb[NFTA_CT_DIRECTION] != NULL) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } } + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + err = nft_ct_l3proto_try_module_get(ctx->afi->family); + if (err < 0) + return err; + return 0; } -static int nft_ct_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_ct_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); - - if (tb[NFTA_CT_DREG]) { - err = nft_ct_init_validate_get(expr, tb); - if (err < 0) - return err; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, - NFT_DATA_VALUE); - if (err < 0) - return err; - } else { - err = nft_ct_init_validate_set(priv->key); - if (err < 0) - return err; - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + break; +#endif + default: + return -EOPNOTSUPP; } + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + err = nft_ct_l3proto_try_module_get(ctx->afi->family); if (err < 0) return err; @@ -370,7 +362,7 @@ static const struct nft_expr_ops nft_ct_get_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_get_eval, - .init = nft_ct_init, + .init = nft_ct_get_init, .destroy = nft_ct_destroy, .dump = nft_ct_get_dump, }; @@ -379,7 +371,7 @@ static const struct nft_expr_ops nft_ct_set_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_set_eval, - .init = nft_ct_init, + .init = nft_ct_set_init, .destroy = nft_ct_destroy, .dump = nft_ct_set_dump, }; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3b1ad876d6b0..4080ed6a072b 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/list.h> +#include <linux/log2.h> #include <linux/jhash.h> #include <linux/netlink.h> #include <linux/vmalloc.h> @@ -19,7 +20,7 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> -#define NFT_HASH_MIN_SIZE 4 +#define NFT_HASH_MIN_SIZE 4UL struct nft_hash { struct nft_hash_table __rcu *tbl; @@ -27,7 +28,6 @@ struct nft_hash { struct nft_hash_table { unsigned int size; - unsigned int elements; struct nft_hash_elem __rcu *buckets[]; }; @@ -76,10 +76,12 @@ static bool nft_hash_lookup(const struct nft_set *set, static void nft_hash_tbl_free(const struct nft_hash_table *tbl) { - if (is_vmalloc_addr(tbl)) - vfree(tbl); - else - kfree(tbl); + kvfree(tbl); +} + +static unsigned int nft_hash_tbl_size(unsigned int nelem) +{ + return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE); } static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) @@ -161,7 +163,6 @@ static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) break; } } - ntbl->elements = tbl->elements; /* Publish new table */ rcu_assign_pointer(priv->tbl, ntbl); @@ -201,7 +202,6 @@ static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) ; RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); } - ntbl->elements = tbl->elements; /* Publish new table */ rcu_assign_pointer(priv->tbl, ntbl); @@ -237,10 +237,9 @@ static int nft_hash_insert(const struct nft_set *set, h = nft_hash_data(&he->key, tbl->size, set->klen); RCU_INIT_POINTER(he->next, tbl->buckets[h]); rcu_assign_pointer(tbl->buckets[h], he); - tbl->elements++; /* Expand table when exceeding 75% load */ - if (tbl->elements > tbl->size / 4 * 3) + if (set->nelems + 1 > tbl->size / 4 * 3) nft_hash_tbl_expand(set, priv); return 0; @@ -268,10 +267,9 @@ static void nft_hash_remove(const struct nft_set *set, RCU_INIT_POINTER(*pprev, he->next); synchronize_rcu(); kfree(he); - tbl->elements--; /* Shrink table beneath 30% load */ - if (tbl->elements < tbl->size * 3 / 10 && + if (set->nelems - 1 < tbl->size * 3 / 10 && tbl->size > NFT_HASH_MIN_SIZE) nft_hash_tbl_shrink(set, priv); } @@ -335,17 +333,23 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) } static int nft_hash_init(const struct nft_set *set, + const struct nft_set_desc *desc, const struct nlattr * const tb[]) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_table *tbl; + unsigned int size; if (unlikely(!nft_hash_rnd_initted)) { get_random_bytes(&nft_hash_rnd, 4); nft_hash_rnd_initted = true; } - tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE); + size = NFT_HASH_MIN_SIZE; + if (desc->size) + size = nft_hash_tbl_size(desc->size); + + tbl = nft_hash_tbl_alloc(size); if (tbl == NULL) return -ENOMEM; RCU_INIT_POINTER(priv->tbl, tbl); @@ -369,8 +373,37 @@ static void nft_hash_destroy(const struct nft_set *set) kfree(tbl); } +static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int esize; + + esize = sizeof(struct nft_hash_elem); + if (features & NFT_SET_MAP) + esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); + + if (desc->size) { + est->size = sizeof(struct nft_hash) + + nft_hash_tbl_size(desc->size) * + sizeof(struct nft_hash_elem *) + + desc->size * esize; + } else { + /* Resizing happens when the load drops below 30% or goes + * above 75%. The average of 52.5% load (approximated by 50%) + * is used for the size estimation of the hash buckets, + * meaning we calculate two buckets per element. + */ + est->size = esize + 2 * sizeof(struct nft_hash_elem *); + } + + est->class = NFT_SET_CLASS_O_1; + + return true; +} + static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, .get = nft_hash_get, diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 7fd2bea8aa23..6404a726d17b 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return -EINVAL; set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); - if (IS_ERR(set)) - return PTR_ERR(set); + if (IS_ERR(set)) { + if (tb[NFTA_LOOKUP_SET_ID]) { + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_LOOKUP_SET_ID]); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); err = nft_validate_input_register(priv->sreg); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 425cf39af890..852b178c6ae7 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,18 +18,11 @@ #include <net/sock.h> #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> -struct nft_meta { - enum nft_meta_keys key:8; - union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; - }; -}; - -static void nft_meta_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr, err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +EXPORT_SYMBOL_GPL(nft_meta_get_eval); -static void nft_meta_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; @@ -163,28 +157,24 @@ static void nft_meta_set_eval(const struct nft_expr *expr, WARN_ON(1); } } +EXPORT_SYMBOL_GPL(nft_meta_set_eval); -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { +const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, [NFTA_META_SREG] = { .type = NLA_U32 }, }; +EXPORT_SYMBOL_GPL(nft_meta_policy); -static int nft_meta_init_validate_set(uint32_t key) +int nft_meta_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { - switch (key) { - case NFT_META_MARK: - case NFT_META_PRIORITY: - case NFT_META_NFTRACE: - return 0; - default: - return -EOPNOTSUPP; - } -} + struct nft_meta *priv = nft_expr_priv(expr); + int err; -static int nft_meta_init_validate_get(uint32_t key) -{ - switch (key) { + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { case NFT_META_LEN: case NFT_META_PROTOCOL: case NFT_META_NFPROTO: @@ -205,39 +195,41 @@ static int nft_meta_init_validate_get(uint32_t key) #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif - return 0; + break; default: return -EOPNOTSUPP; } + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + return 0; } +EXPORT_SYMBOL_GPL(nft_meta_get_init); -static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_meta_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); int err; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - - if (tb[NFTA_META_DREG]) { - err = nft_meta_init_validate_get(priv->key); - if (err < 0) - return err; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - return nft_validate_data_load(ctx, priv->dreg, NULL, - NFT_DATA_VALUE); + switch (priv->key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: + break; + default: + return -EOPNOTSUPP; } - err = nft_meta_init_validate_set(priv->key); - if (err < 0) - return err; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); err = nft_validate_input_register(priv->sreg); if (err < 0) @@ -245,9 +237,10 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return 0; } +EXPORT_SYMBOL_GPL(nft_meta_set_init); -static int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) +int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -260,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb, nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_get_dump); -static int nft_meta_set_dump(struct sk_buff *skb, - const struct nft_expr *expr) +int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -276,13 +270,14 @@ static int nft_meta_set_dump(struct sk_buff *skb, nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_set_dump); static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, - .init = nft_meta_init, + .init = nft_meta_get_init, .dump = nft_meta_get_dump, }; @@ -290,7 +285,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, - .init = nft_meta_init, + .init = nft_meta_set_init, .dump = nft_meta_set_dump, }; diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index e21d69d13506..e1836ff88199 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -18,6 +18,8 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +static DEFINE_SPINLOCK(nft_rbtree_lock); + struct nft_rbtree { struct rb_root root; }; @@ -38,6 +40,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const struct rb_node *parent = priv->root.rb_node; int d; + spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -53,6 +56,8 @@ found: goto out; if (set->flags & NFT_SET_MAP) nft_data_copy(data, rbe->data); + + spin_unlock_bh(&nft_rbtree_lock); return true; } } @@ -62,6 +67,7 @@ found: goto found; } out: + spin_unlock_bh(&nft_rbtree_lock); return false; } @@ -124,9 +130,12 @@ static int nft_rbtree_insert(const struct nft_set *set, !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(rbe->data, &elem->data); + spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); if (err < 0) kfree(rbe); + + spin_unlock_bh(&nft_rbtree_lock); return err; } @@ -136,7 +145,9 @@ static void nft_rbtree_remove(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->cookie; + spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); + spin_unlock_bh(&nft_rbtree_lock); kfree(rbe); } @@ -147,6 +158,7 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) struct nft_rbtree_elem *rbe; int d; + spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -161,9 +173,11 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem->data, rbe->data); elem->flags = rbe->flags; + spin_unlock_bh(&nft_rbtree_lock); return 0; } } + spin_unlock_bh(&nft_rbtree_lock); return -ENOENT; } @@ -176,6 +190,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_elem elem; struct rb_node *node; + spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { if (iter->count < iter->skip) goto cont; @@ -188,11 +203,14 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, elem.flags = rbe->flags; iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) + if (iter->err < 0) { + spin_unlock_bh(&nft_rbtree_lock); return; + } cont: iter->count++; } + spin_unlock_bh(&nft_rbtree_lock); } static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) @@ -201,6 +219,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) } static int nft_rbtree_init(const struct nft_set *set, + const struct nft_set_desc *desc, const struct nlattr * const nla[]) { struct nft_rbtree *priv = nft_set_priv(set); @@ -215,15 +234,37 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; + spin_lock_bh(&nft_rbtree_lock); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_rbtree_elem_destroy(set, rbe); } + spin_unlock_bh(&nft_rbtree_lock); +} + +static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int nsize; + + nsize = sizeof(struct nft_rbtree_elem); + if (features & NFT_SET_MAP) + nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); + + if (desc->size) + est->size = sizeof(struct nft_rbtree) + desc->size * nsize; + else + est->size = nsize; + + est->class = NFT_SET_CLASS_O_LOG_N; + + return true; } static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 12d4da8e6c77..bbffdbdaf603 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf"); static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; - struct sock_fprog program; + struct sock_fprog_kern program; program.len = info->bpf_program_num_elem; - program.filter = (struct sock_filter __user *) info->bpf_program; + program.filter = info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index b3be0ef21f19..8c646ed9c921 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct"); static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) { + int overquota; const struct xt_nfacct_match_info *info = par->targinfo; nfnl_acct_update(skb, info->nfacct); - return true; + overquota = nfnl_acct_overquota(skb, info->nfacct); + + return overquota == NFACCT_UNDERQUOTA ? false : true; } static int diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 1e657cf715c4..a9faae89f955 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -313,10 +313,7 @@ out: static void recent_table_free(void *addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); + kvfree(addr); } static int recent_mt_check(const struct xt_mtchk_param *par, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f22757a29cd0..15c731f03fa6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; + unbind = nl_table[protocol].unbind; netlink_unlock_table(); if (err < 0) @@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, nlk = nlk_sk(sock->sk); nlk->module = module; nlk->netlink_bind = bind; + nlk->netlink_unbind = unbind; out: return err; @@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].unbind = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1478,6 +1482,19 @@ static int netlink_realloc_groups(struct sock *sk) return err; } +static void netlink_unbind(int group, long unsigned int groups, + struct netlink_sock *nlk) +{ + int undo; + + if (!nlk->netlink_unbind) + return; + + for (undo = 0; undo < group; undo++) + if (test_bit(group, &groups)) + nlk->netlink_unbind(undo); +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -1486,6 +1503,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + long unsigned int groups = nladdr->nl_groups; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1494,7 +1512,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups) { + if (groups) { if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); @@ -1502,37 +1520,45 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->portid) { + if (nlk->portid) if (nladdr->nl_pid != nlk->portid) return -EINVAL; - } else { + + if (nlk->netlink_bind && groups) { + int group; + + for (group = 0; group < nlk->ngroups; group++) { + if (!test_bit(group, &groups)) + continue; + err = nlk->netlink_bind(group); + if (!err) + continue; + netlink_unbind(group, groups, nlk); + return err; + } + } + + if (!nlk->portid) { err = nladdr->nl_pid ? netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); - if (err) + if (err) { + netlink_unbind(nlk->ngroups - 1, groups, nlk); return err; + } } - if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) + if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + - hweight32(nladdr->nl_groups) - + hweight32(groups) - hweight32(nlk->groups[0])); - nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; netlink_update_listeners(sk); netlink_table_ungrab(); - if (nlk->netlink_bind && nlk->groups[0]) { - int i; - - for (i = 0; i < nlk->ngroups; i++) { - if (test_bit(i, nlk->groups)) - nlk->netlink_bind(i); - } - } - return 0; } @@ -2170,13 +2196,17 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return err; if (!val || val - 1 >= nlk->ngroups) return -EINVAL; + if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { + err = nlk->netlink_bind(val); + if (err) + return err; + } netlink_table_grab(); netlink_update_socket_mc(nlk, val, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); - - if (nlk->netlink_bind) - nlk->netlink_bind(val); + if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) + nlk->netlink_unbind(val); err = 0; break; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed13a790b00e..0b59d441f5b6 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -38,7 +38,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); + int (*netlink_bind)(int group); + void (*netlink_unbind)(int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -74,7 +75,8 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a3ba3ca0ff92..76393f2f4b22 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -317,7 +317,7 @@ static void genl_unregister_mc_groups(struct genl_family *family) } } -static int genl_validate_ops(struct genl_family *family) +static int genl_validate_ops(const struct genl_family *family) { const struct genl_ops *ops = family->ops; unsigned int n_ops = family->n_ops; @@ -337,10 +337,6 @@ static int genl_validate_ops(struct genl_family *family) return -EINVAL; } - /* family is not registered yet, so no locking needed */ - family->ops = ops; - family->n_ops = n_ops; - return 0; } diff --git a/net/nfc/digital.h b/net/nfc/digital.h index 3759add68b1b..71ad7eefddd4 100644 --- a/net/nfc/digital.h +++ b/net/nfc/digital.h @@ -71,6 +71,7 @@ static inline int digital_in_send_cmd(struct nfc_digital_dev *ddev, void digital_poll_next_tech(struct nfc_digital_dev *ddev); int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech); +int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech); int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech); int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index e01e15dbf1ab..a6ce3c627e4e 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -22,6 +22,8 @@ #define DIGITAL_PROTO_NFCA_RF_TECH \ (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK) +#define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK + #define DIGITAL_PROTO_NFCF_RF_TECH \ (NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK) @@ -345,6 +347,12 @@ int digital_target_found(struct nfc_digital_dev *ddev, add_crc = digital_skb_add_crc_a; break; + case NFC_PROTO_ISO14443_B: + framing = NFC_DIGITAL_FRAMING_NFCB_T4T; + check_crc = digital_skb_check_crc_b; + add_crc = digital_skb_add_crc_b; + break; + default: pr_err("Invalid protocol %d\n", protocol); return -EINVAL; @@ -378,6 +386,8 @@ int digital_target_found(struct nfc_digital_dev *ddev, void digital_poll_next_tech(struct nfc_digital_dev *ddev) { + u8 rand_mod; + digital_switch_rf(ddev, 0); mutex_lock(&ddev->poll_lock); @@ -387,8 +397,8 @@ void digital_poll_next_tech(struct nfc_digital_dev *ddev) return; } - ddev->poll_tech_index = (ddev->poll_tech_index + 1) % - ddev->poll_tech_count; + get_random_bytes(&rand_mod, sizeof(rand_mod)); + ddev->poll_tech_index = rand_mod % ddev->poll_tech_count; mutex_unlock(&ddev->poll_lock); @@ -475,6 +485,10 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_in_send_sens_req); + if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH) + digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B, + digital_in_send_sensb_req); + if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_in_send_sensf_req); @@ -635,7 +649,8 @@ static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg, goto done; } - if (ddev->curr_protocol == NFC_PROTO_ISO14443) { + if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || + (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_pull_sod(ddev, resp); if (rc) goto done; @@ -676,7 +691,8 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, goto exit; } - if (ddev->curr_protocol == NFC_PROTO_ISO14443) { + if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || + (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_push_sod(ddev, skb); if (rc) goto exit; @@ -747,6 +763,8 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, ddev->protocols |= NFC_PROTO_ISO15693_MASK; if (supported_protocols & NFC_PROTO_ISO14443_MASK) ddev->protocols |= NFC_PROTO_ISO14443_MASK; + if (supported_protocols & NFC_PROTO_ISO14443_B_MASK) + ddev->protocols |= NFC_PROTO_ISO14443_B_MASK; ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN; ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN; diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c index d4ed25ff723f..171cb9949ab5 100644 --- a/net/nfc/digital_dep.c +++ b/net/nfc/digital_dep.c @@ -224,9 +224,8 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev, ddev->skb_add_crc(skb); - digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res, target); - - return 0; + return digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res, + target); } static int digital_in_send_rtox(struct nfc_digital_dev *ddev, diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 278c3fed27e0..c2c1c0189b7c 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -41,6 +41,24 @@ #define DIGITAL_MIFARE_READ_RES_LEN 16 #define DIGITAL_MIFARE_ACK_RES 0x0A +#define DIGITAL_CMD_SENSB_REQ 0x05 +#define DIGITAL_SENSB_ADVANCED BIT(5) +#define DIGITAL_SENSB_EXTENDED BIT(4) +#define DIGITAL_SENSB_ALLB_REQ BIT(3) +#define DIGITAL_SENSB_N(n) ((n) & 0x7) + +#define DIGITAL_CMD_SENSB_RES 0x50 + +#define DIGITAL_CMD_ATTRIB_REQ 0x1D +#define DIGITAL_ATTRIB_P1_TR0_DEFAULT (0x0 << 6) +#define DIGITAL_ATTRIB_P1_TR1_DEFAULT (0x0 << 4) +#define DIGITAL_ATTRIB_P1_SUPRESS_EOS BIT(3) +#define DIGITAL_ATTRIB_P1_SUPRESS_SOS BIT(2) +#define DIGITAL_ATTRIB_P2_LISTEN_POLL_1 (0x0 << 6) +#define DIGITAL_ATTRIB_P2_POLL_LISTEN_1 (0x0 << 4) +#define DIGITAL_ATTRIB_P2_MAX_FRAME_256 0x8 +#define DIGITAL_ATTRIB_P4_DID(n) ((n) & 0xf) + #define DIGITAL_CMD_SENSF_REQ 0x00 #define DIGITAL_CMD_SENSF_RES 0x01 @@ -75,6 +93,7 @@ static const u8 digital_ats_fsc[] = { }; #define DIGITAL_ATS_FSCI(t0) ((t0) & 0x0F) +#define DIGITAL_SENSB_FSCI(pi2) (((pi2) & 0xF0) >> 4) #define DIGITAL_ATS_MAX_FSC 256 #define DIGITAL_RATS_BYTE1 0xE0 @@ -92,6 +111,32 @@ struct digital_sel_req { u8 bcc; } __packed; +struct digital_sensb_req { + u8 cmd; + u8 afi; + u8 param; +} __packed; + +struct digital_sensb_res { + u8 cmd; + u8 nfcid0[4]; + u8 app_data[4]; + u8 proto_info[3]; +} __packed; + +struct digital_attrib_req { + u8 cmd; + u8 nfcid0[4]; + u8 param1; + u8 param2; + u8 param3; + u8 param4; +} __packed; + +struct digital_attrib_res { + u8 mbli_did; +} __packed; + struct digital_sensf_req { u8 cmd; u8 sc1; @@ -531,6 +576,175 @@ int digital_in_recv_mifare_res(struct sk_buff *resp) return -EIO; } +static void digital_in_recv_attrib_res(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + struct nfc_target *target = arg; + struct digital_attrib_res *attrib_res; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto exit; + } + + if (resp->len < sizeof(*attrib_res)) { + PROTOCOL_ERR("12.6.2"); + rc = -EIO; + goto exit; + } + + attrib_res = (struct digital_attrib_res *)resp->data; + + if (attrib_res->mbli_did & 0x0f) { + PROTOCOL_ERR("12.6.2.1"); + rc = -EIO; + goto exit; + } + + rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443_B); + +exit: + dev_kfree_skb(resp); + kfree(target); + + if (rc) + digital_poll_next_tech(ddev); +} + +static int digital_in_send_attrib_req(struct nfc_digital_dev *ddev, + struct nfc_target *target, + struct digital_sensb_res *sensb_res) +{ + struct digital_attrib_req *attrib_req; + struct sk_buff *skb; + int rc; + + skb = digital_skb_alloc(ddev, sizeof(*attrib_req)); + if (!skb) + return -ENOMEM; + + attrib_req = (struct digital_attrib_req *)skb_put(skb, + sizeof(*attrib_req)); + + attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ; + memcpy(attrib_req->nfcid0, sensb_res->nfcid0, + sizeof(attrib_req->nfcid0)); + attrib_req->param1 = DIGITAL_ATTRIB_P1_TR0_DEFAULT | + DIGITAL_ATTRIB_P1_TR1_DEFAULT; + attrib_req->param2 = DIGITAL_ATTRIB_P2_LISTEN_POLL_1 | + DIGITAL_ATTRIB_P2_POLL_LISTEN_1 | + DIGITAL_ATTRIB_P2_MAX_FRAME_256; + attrib_req->param3 = sensb_res->proto_info[1] & 0x07; + attrib_req->param4 = DIGITAL_ATTRIB_P4_DID(0); + + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_attrib_res, + target); + if (rc) + kfree_skb(skb); + + return rc; +} + +static void digital_in_recv_sensb_res(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + struct nfc_target *target = NULL; + struct digital_sensb_res *sensb_res; + u8 fsci; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto exit; + } + + if (resp->len != sizeof(*sensb_res)) { + PROTOCOL_ERR("5.6.2.1"); + rc = -EIO; + goto exit; + } + + sensb_res = (struct digital_sensb_res *)resp->data; + + if (sensb_res->cmd != DIGITAL_CMD_SENSB_RES) { + PROTOCOL_ERR("5.6.2"); + rc = -EIO; + goto exit; + } + + if (!(sensb_res->proto_info[1] & BIT(0))) { + PROTOCOL_ERR("5.6.2.12"); + rc = -EIO; + goto exit; + } + + if (sensb_res->proto_info[1] & BIT(3)) { + PROTOCOL_ERR("5.6.2.16"); + rc = -EIO; + goto exit; + } + + fsci = DIGITAL_SENSB_FSCI(sensb_res->proto_info[1]); + if (fsci >= 8) + ddev->target_fsc = DIGITAL_ATS_MAX_FSC; + else + ddev->target_fsc = digital_ats_fsc[fsci]; + + target = kzalloc(sizeof(struct nfc_target), GFP_KERNEL); + if (!target) { + rc = -ENOMEM; + goto exit; + } + + rc = digital_in_send_attrib_req(ddev, target, sensb_res); + +exit: + dev_kfree_skb(resp); + + if (rc) { + kfree(target); + digital_poll_next_tech(ddev); + } +} + +int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + struct digital_sensb_req *sensb_req; + struct sk_buff *skb; + int rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, + NFC_DIGITAL_RF_TECH_106B); + if (rc) + return rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCB); + if (rc) + return rc; + + skb = digital_skb_alloc(ddev, sizeof(*sensb_req)); + if (!skb) + return -ENOMEM; + + sensb_req = (struct digital_sensb_req *)skb_put(skb, + sizeof(*sensb_req)); + + sensb_req->cmd = DIGITAL_CMD_SENSB_REQ; + sensb_req->afi = 0x00; /* All families and sub-families */ + sensb_req->param = DIGITAL_SENSB_N(0); + + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sensb_res, + NULL); + if (rc) + kfree_skb(skb); + + return rc; +} + static void digital_in_recv_sensf_res(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { @@ -877,6 +1091,18 @@ exit: dev_kfree_skb(resp); } +static void digital_tg_recv_atr_or_sensf_req(struct nfc_digital_dev *ddev, + void *arg, struct sk_buff *resp) +{ + if (!IS_ERR(resp) && (resp->len >= 2) && + (resp->data[1] == DIGITAL_CMD_SENSF_REQ)) + digital_tg_recv_sensf_req(ddev, arg, resp); + else + digital_tg_recv_atr_req(ddev, arg, resp); + + return; +} + static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, struct digital_sensf_req *sensf_req) { @@ -887,7 +1113,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, size = sizeof(struct digital_sensf_res); - if (sensf_req->rc != DIGITAL_SENSF_REQ_RC_NONE) + if (sensf_req->rc == DIGITAL_SENSF_REQ_RC_NONE) size -= sizeof(sensf_res->rd); skb = digital_skb_alloc(ddev, size); @@ -922,7 +1148,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, digital_skb_add_crc_f(skb); rc = digital_tg_send_cmd(ddev, skb, 300, - digital_tg_recv_atr_req, NULL); + digital_tg_recv_atr_or_sensf_req, NULL); if (rc) kfree_skb(skb); diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index a9f4d2e62d8d..677d24bb70f8 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -26,6 +26,8 @@ #include "hci.h" +#define MAX_FWI 4949 + static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, const u8 *param, size_t param_len, data_exchange_cb_t cb, void *cb_context) @@ -37,7 +39,7 @@ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, * for all commands? */ return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd, - param, param_len, cb, cb_context, 3000); + param, param_len, cb, cb_context, MAX_FWI); } /* @@ -82,7 +84,7 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, NFC_HCI_HCP_COMMAND, cmd, param, param_len, nfc_hci_execute_cb, &hcp_ew, - 3000); + MAX_FWI); if (hcp_ew.exec_result < 0) return hcp_ew.exec_result; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d45b638e77c7..47403705197e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -225,7 +225,7 @@ int nfc_hci_target_discovered(struct nfc_hci_dev *hdev, u8 gate) goto exit; } - targets->sens_res = be16_to_cpu(*(u16 *)atqa_skb->data); + targets->sens_res = be16_to_cpu(*(__be16 *)atqa_skb->data); targets->sel_res = sak_skb->data[0]; r = nfc_hci_get_param(hdev, NFC_HCI_RF_READER_A_GATE, @@ -380,34 +380,31 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) if (r < 0) goto disconnect_all; - if (skb->len && skb->len == strlen(hdev->init_data.session_id)) - if (memcmp(hdev->init_data.session_id, skb->data, - skb->len) == 0) { - /* TODO ELa: restore gate<->pipe table from - * some TBD location. - * note: it doesn't seem possible to get the chip - * currently open gate/pipe table. - * It is only possible to obtain the supported - * gate list. - */ + if (skb->len && skb->len == strlen(hdev->init_data.session_id) && + (memcmp(hdev->init_data.session_id, skb->data, + skb->len) == 0) && hdev->ops->load_session) { + /* Restore gate<->pipe table from some proprietary location. */ - /* goto exit - * For now, always do a full initialization */ - } + r = hdev->ops->load_session(hdev); - r = nfc_hci_disconnect_all_gates(hdev); - if (r < 0) - goto exit; + if (r < 0) + goto disconnect_all; + } else { - r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, - hdev->init_data.gates); - if (r < 0) - goto disconnect_all; + r = nfc_hci_disconnect_all_gates(hdev); + if (r < 0) + goto exit; - r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, - NFC_HCI_ADMIN_SESSION_IDENTITY, - hdev->init_data.session_id, - strlen(hdev->init_data.session_id)); + r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, + hdev->init_data.gates); + if (r < 0) + goto disconnect_all; + + r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, + NFC_HCI_ADMIN_SESSION_IDENTITY, + hdev->init_data.session_id, + strlen(hdev->init_data.session_id)); + } if (r == 0) goto exit; diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index bec6ed15f503..a3ad69a4c648 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -387,7 +387,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX); return nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index b4671958fcf9..51e788797317 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -680,16 +680,17 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, continue; if (skb_copy == NULL) { - skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); if (skb_copy == NULL) continue; - data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE); + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); data[0] = local->dev ? local->dev->idx : 0xFF; - data[1] = direction; + data[1] = direction & 0x01; + data[1] |= (RAW_PAYLOAD_LLCP << 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); @@ -747,7 +748,7 @@ static void nfc_llcp_tx_work(struct work_struct *work) __net_timestamp(skb); nfc_llcp_send_to_raw_sock(local, skb, - NFC_LLCP_DIRECTION_TX); + NFC_DIRECTION_TX); ret = nfc_data_exchange(local->dev, local->target_idx, skb, nfc_llcp_recv, local); @@ -1476,7 +1477,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX); nfc_llcp_rx_skb(local, skb); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6c34ac978501..2b400e1a8695 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -861,6 +861,10 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); + /* Send copy to sniffer */ + nfc_send_to_raw_sock(ndev->nfc_dev, skb, + RAW_PAYLOAD_NCI, NFC_DIRECTION_TX); + return ndev->ops->send(ndev, skb); } @@ -935,6 +939,11 @@ static void nci_rx_work(struct work_struct *work) struct sk_buff *skb; while ((skb = skb_dequeue(&ndev->rx_q))) { + + /* Send copy to sniffer */ + nfc_send_to_raw_sock(ndev->nfc_dev, skb, + RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 1e905097456b..f8f6af231381 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -366,7 +366,6 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, struct nci_rf_intf_activated_ntf *ntf, __u8 *data) { struct activation_params_poll_nfc_dep *poll; - int i; switch (ntf->activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: @@ -374,10 +373,8 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, poll = &ntf->activation_params.poll_nfc_dep; poll->atr_res_len = min_t(__u8, *data++, 63); pr_debug("atr_res_len %d\n", poll->atr_res_len); - if (poll->atr_res_len > 0) { - for (i = 0; i < poll->atr_res_len; i++) - poll->atr_res[poll->atr_res_len-1-i] = data[i]; - } + if (poll->atr_res_len > 0) + memcpy(poll->atr_res, data, poll->atr_res_len); break; default: diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 9d6e74f7e6b3..88d60064890e 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -40,6 +40,12 @@ struct nfc_rawsock { struct work_struct tx_work; bool tx_work_scheduled; }; + +struct nfc_sock_list { + struct hlist_head head; + rwlock_t lock; +}; + #define nfc_rawsock(sk) ((struct nfc_rawsock *) sk) #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index c27a6e86cae4..11c3544ea546 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -27,6 +27,24 @@ #include "nfc.h" +static struct nfc_sock_list raw_sk_list = { + .lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock) +}; + +static void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_add_node(sk, &l->head); + write_unlock(&l->lock); +} + +static void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_del_node_init(sk); + write_unlock(&l->lock); +} + static void rawsock_write_queue_purge(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -57,6 +75,9 @@ static int rawsock_release(struct socket *sock) if (!sk) return 0; + if (sock->type == SOCK_RAW) + nfc_sock_unlink(&raw_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -275,6 +296,26 @@ static const struct proto_ops rawsock_ops = { .mmap = sock_no_mmap, }; +static const struct proto_ops rawsock_raw_ops = { + .family = PF_NFC, + .owner = THIS_MODULE, + .release = rawsock_release, + .bind = sock_no_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = rawsock_recvmsg, + .mmap = sock_no_mmap, +}; + static void rawsock_destruct(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -300,10 +341,13 @@ static int rawsock_create(struct net *net, struct socket *sock, pr_debug("sock=%p\n", sock); - if (sock->type != SOCK_SEQPACKET) + if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - sock->ops = &rawsock_ops; + if (sock->type == SOCK_RAW) + sock->ops = &rawsock_raw_ops; + else + sock->ops = &rawsock_ops; sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); if (!sk) @@ -313,13 +357,53 @@ static int rawsock_create(struct net *net, struct socket *sock, sk->sk_protocol = nfc_proto->id; sk->sk_destruct = rawsock_destruct; sock->state = SS_UNCONNECTED; - - INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); - nfc_rawsock(sk)->tx_work_scheduled = false; + if (sock->type == SOCK_RAW) + nfc_sock_link(&raw_sk_list, sk); + else { + INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); + nfc_rawsock(sk)->tx_work_scheduled = false; + } return 0; } +void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, + u8 payload_type, u8 direction) +{ + struct sk_buff *skb_copy = NULL, *nskb; + struct sock *sk; + u8 *data; + + read_lock(&raw_sk_list.lock); + + sk_for_each(sk, &raw_sk_list.head) { + if (!skb_copy) { + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); + if (!skb_copy) + continue; + + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); + + data[0] = dev ? dev->idx : 0xFF; + data[1] = direction & 0x01; + data[1] |= (payload_type << 1); + } + + nskb = skb_clone(skb_copy, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&raw_sk_list.lock); + + kfree_skb(skb_copy); +} +EXPORT_SYMBOL(nfc_send_to_raw_sock); + static struct proto rawsock_proto = { .name = "NFC_RAW", .owner = THIS_MODULE, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 2c77e7b1a913..c36856a457ca 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -134,8 +134,8 @@ static int set_eth_addr(struct sk_buff *skb, skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); - memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a3276e3c4feb..0d407bca81e3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -44,11 +44,11 @@ #include <linux/netfilter_ipv4.h> #include <linux/inetdevice.h> #include <linux/list.h> -#include <linux/lockdep.h> #include <linux/openvswitch.h> #include <linux/rculist.h> #include <linux/dmi.h> -#include <linux/workqueue.h> +#include <linux/genetlink.h> +#include <net/genetlink.h> #include <net/genetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -62,6 +62,31 @@ int ovs_net_id __read_mostly; +static struct genl_family dp_packet_genl_family; +static struct genl_family dp_flow_genl_family; +static struct genl_family dp_datapath_genl_family; + +static struct genl_multicast_group ovs_dp_flow_multicast_group = { + .name = OVS_FLOW_MCGROUP +}; + +static struct genl_multicast_group ovs_dp_datapath_multicast_group = { + .name = OVS_DATAPATH_MCGROUP +}; + +struct genl_multicast_group ovs_dp_vport_multicast_group = { + .name = OVS_VPORT_MCGROUP +}; + +/* Check if need to build a reply message. + * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ +static bool ovs_must_notify(struct genl_info *info, + const struct genl_multicast_group *grp) +{ + return info->nlhdr->nlmsg_flags & NLM_F_ECHO || + netlink_has_listeners(genl_info_net(info)->genl_sock, 0); +} + static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { @@ -173,6 +198,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; } +/* Called with ovs_mutex or RCU read lock. */ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; @@ -262,16 +288,6 @@ out: u64_stats_update_end(&stats->syncp); } -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_PACKET_FAMILY, - .version = OVS_PACKET_VERSION, - .maxattr = OVS_PACKET_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { @@ -524,7 +540,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ - flow = ovs_flow_alloc(false); + flow = ovs_flow_alloc(); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; @@ -590,6 +606,18 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; +static struct genl_family dp_packet_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_PACKET_FAMILY, + .version = OVS_PACKET_VERSION, + .maxattr = OVS_PACKET_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_packet_genl_ops, + .n_ops = ARRAY_SIZE(dp_packet_genl_ops), +}; + static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, struct ovs_dp_megaflow_stats *mega_stats) { @@ -621,26 +649,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, } } -static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { - [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, -}; - -static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_FLOW_FAMILY, - .version = OVS_FLOW_VERSION, - .maxattr = OVS_FLOW_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_flow_multicast_group = { - .name = OVS_FLOW_MCGROUP -}; - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -652,8 +660,8 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ } -/* Called with ovs_mutex. */ -static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, +/* Called with ovs_mutex or RCU read lock. */ +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -670,7 +678,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!ovs_header) return -EMSGSIZE; - ovs_header->dp_ifindex = get_dpifindex(dp); + ovs_header->dp_ifindex = dp_ifindex; /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); @@ -693,6 +701,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); + if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; @@ -720,9 +729,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const struct sw_flow_actions *sf_acts; sf_acts = rcu_dereference_ovsl(flow->sf_acts); - err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) nla_nest_end(skb, start); else { @@ -743,113 +752,128 @@ error: return err; } -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, - struct genl_info *info) +/* May not be called with RCU read lock. */ +static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, + struct genl_info *info, + bool always) { - size_t len; + struct sk_buff *skb; + + if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) + return NULL; - len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); + skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); - return genlmsg_new_unicast(len, info, GFP_KERNEL); + return skb; } -static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, - struct datapath *dp, - struct genl_info *info, - u8 cmd) +/* Called with ovs_mutex. */ +static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, + int dp_ifindex, + struct genl_info *info, u8 cmd, + bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info); - if (!skb) - return ERR_PTR(-ENOMEM); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + always); + if (!skb || IS_ERR(skb)) + return skb; - retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, - info->snd_seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, + info->snd_portid, info->snd_seq, 0, + cmd); BUG_ON(retval < 0); return skb; } -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) +static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow_key key, masked_key; - struct sw_flow *flow = NULL; + struct sw_flow *flow, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_actions *acts = NULL; + struct sw_flow_actions *acts; struct sw_flow_match match; - bool exact_5tuple; int error; - /* Extract key. */ + /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) goto error; + if (!a[OVS_FLOW_ATTR_ACTIONS]) + goto error; - ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, &exact_5tuple, + /* Most of the time we need to allocate a new flow, do it before + * locking. + */ + new_flow = ovs_flow_alloc(); + if (IS_ERR(new_flow)) { + error = PTR_ERR(new_flow); + goto error; + } + + /* Extract key. */ + ovs_match_init(&match, &new_flow->unmasked_key, &mask); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) - goto error; + goto err_kfree_flow; + + ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); /* Validate actions. */ - if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto error; + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto err_kfree_flow; - ovs_flow_mask_key(&masked_key, &key, &mask); - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); - if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; - } - } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { - error = -EINVAL; - goto error; + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, + 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree_acts; + } + + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow) { - /* Bail out if we're not allowed to create a new flow. */ - error = -ENOENT; - if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) - goto err_unlock_ovs; - - /* Allocate flow. */ - flow = ovs_flow_alloc(!exact_5tuple); - if (IS_ERR(flow)) { - error = PTR_ERR(flow); - goto err_unlock_ovs; - } - - flow->key = masked_key; - flow->unmasked_key = key; - rcu_assign_pointer(flow->sf_acts, acts); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (likely(!flow)) { + rcu_assign_pointer(new_flow->sf_acts, acts); /* Put flow in bucket. */ - error = ovs_flow_tbl_insert(&dp->table, flow, &mask); - if (error) { + error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); + if (unlikely(error)) { acts = NULL; - goto err_flow_free; + goto err_unlock_ovs; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(new_flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); } else { - /* We found a matching flow. */ struct sw_flow_actions *old_acts; /* Bail out if we're not allowed to modify an existing flow. @@ -858,40 +882,154 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) * request. We also accept NLM_F_EXCL in case that bug ever * gets fixed. */ - error = -EEXIST; - if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && - info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE + | NLM_F_EXCL))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - if (!ovs_flow_cmp_unmasked_key(flow, &match)) + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); + ovs_nla_free_flow_actions(old_acts); + ovs_flow_free(new_flow, false); + } + + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); + return 0; + +err_unlock_ovs: + ovs_unlock(); + kfree_skb(reply); +err_kfree_acts: + kfree(acts); +err_kfree_flow: + ovs_flow_free(new_flow, false); +error: + return error; +} + +static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key, masked_key; + struct sw_flow *flow; + struct sw_flow_mask mask; + struct sk_buff *reply = NULL; + struct datapath *dp; + struct sw_flow_actions *old_acts = NULL, *acts = NULL; + struct sw_flow_match match; + int error; + + /* Extract key. */ + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_KEY]) + goto error; + + ovs_match_init(&match, &key, &mask); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + if (error) + goto error; + + /* Validate actions. */ + if (a[OVS_FLOW_ATTR_ACTIONS]) { + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto error; + + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree_acts; + } + } + + /* Can allocate before locking if have acts. */ + if (acts) { + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; + } + } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (unlikely(!dp)) { + error = -ENODEV; + goto err_unlock_ovs; + } + /* Check that the flow exists. */ + flow = ovs_flow_tbl_lookup(&dp->table, &key); + if (unlikely(!flow)) { + error = -ENOENT; + goto err_unlock_ovs; + } + /* The unmasked key has to be the same for flow updates. */ + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; + goto err_unlock_ovs; + } + /* Update actions, if present. */ + if (likely(acts)) { + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); - /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) - ovs_flow_stats_clear(flow); + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + } else { + /* Could not alloc without acts before locking. */ + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + if (unlikely(IS_ERR(reply))) { + error = PTR_ERR(reply); + goto err_unlock_ovs; + } } + + /* Clear stats. */ + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); ovs_unlock(); - if (!IS_ERR(reply)) + if (reply) ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); + if (old_acts) + ovs_nla_free_flow_actions(old_acts); + return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); error: return error; @@ -914,7 +1052,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -931,7 +1069,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, + OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -955,45 +1094,53 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int err; + if (likely(a[OVS_FLOW_ATTR_KEY])) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + if (unlikely(err)) + return err; + } + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) { + if (unlikely(!dp)) { err = -ENODEV; goto unlock; } - if (!a[OVS_FLOW_ATTR_KEY]) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); - if (err) - goto unlock; - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info); - if (!reply) { - err = -ENOMEM; - goto unlock; - } - ovs_flow_tbl_remove(&dp->table, flow); + ovs_unlock(); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, - info->snd_seq, 0, OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); + reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, + info, false); + if (likely(reply)) { + if (likely(!IS_ERR(reply))) { + rcu_read_lock(); /*To keep RCU checker happy. */ + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + rcu_read_unlock(); + BUG_ON(err < 0); + + ovs_notify(&dp_flow_genl_family, reply, info); + } else { + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); + } + } ovs_flow_free(flow, true); - ovs_unlock(); - - ovs_notify(&dp_flow_genl_family, reply, info); return 0; unlock: ovs_unlock(); @@ -1024,7 +1171,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!flow) break; - if (ovs_flow_cmd_fill_info(flow, dp, skb, + if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) @@ -1037,11 +1184,17 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_flow_genl_ops[] = { +static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { + [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, +}; + +static struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set + .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1057,28 +1210,22 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set, + .doit = ovs_flow_cmd_set, }, }; -static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { - [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, -}; - -static struct genl_family dp_datapath_genl_family = { +static struct genl_family dp_flow_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_DATAPATH_FAMILY, - .version = OVS_DATAPATH_VERSION, - .maxattr = OVS_DP_ATTR_MAX, + .name = OVS_FLOW_FAMILY, + .version = OVS_FLOW_VERSION, + .maxattr = OVS_FLOW_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_datapath_multicast_group = { - .name = OVS_DATAPATH_MCGROUP + .ops = dp_flow_genl_ops, + .n_ops = ARRAY_SIZE(dp_flow_genl_ops), + .mcgrps = &ovs_dp_flow_multicast_group, + .n_mcgrps = 1, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1093,6 +1240,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } +/* Called with ovs_mutex or RCU read lock. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1108,9 +1256,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, ovs_header->dp_ifindex = get_dpifindex(dp); - rcu_read_lock(); err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); - rcu_read_unlock(); if (err) goto nla_put_failure; @@ -1135,25 +1281,12 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, - struct genl_info *info, u8 cmd) +static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) { - struct sk_buff *skb; - int retval; - - skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } - return skb; + return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); } -/* Called with ovs_mutex. */ +/* Called with rcu_read_lock or ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1165,10 +1298,8 @@ static struct datapath *lookup_datapath(struct net *net, else { struct vport *vport; - rcu_read_lock(); vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; - rcu_read_unlock(); } return dp ? dp : ERR_PTR(-ENODEV); } @@ -1205,12 +1336,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_unlock_ovs; + goto err_free_reply; ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); @@ -1245,6 +1378,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_change(dp, a); + /* So far only local changes have been made, now need the lock. */ + ovs_lock(); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); @@ -1263,10 +1399,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto err_destroy_local_port; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); @@ -1276,9 +1411,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_local_port: - ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); err_destroy_ports_array: + ovs_unlock(); kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -1287,8 +1421,8 @@ err_destroy_table: err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); -err_unlock_ovs: - ovs_unlock(); +err_free_reply: + kfree_skb(reply); err: return err; } @@ -1326,16 +1460,19 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto unlock; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_DEL); + BUG_ON(err < 0); __dp_destroy(dp); ovs_unlock(); @@ -1343,8 +1480,10 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1354,29 +1493,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; ovs_dp_change(dp, info->attrs); - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, - 0, err); - err = 0; - goto unlock; - } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1386,24 +1526,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + + rcu_read_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); - goto unlock; - } - - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; + goto err_unlock_free; } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); + rcu_read_unlock(); - ovs_unlock(); return genlmsg_reply(reply, info); -unlock: - ovs_unlock(); +err_unlock_free: + rcu_read_unlock(); + kfree_skb(reply); return err; } @@ -1430,7 +1572,13 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_datapath_genl_ops[] = { +static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { + [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, @@ -1454,27 +1602,18 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { - [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, - [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, -}; - -struct genl_family dp_vport_genl_family = { +static struct genl_family dp_datapath_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_VPORT_FAMILY, - .version = OVS_VPORT_VERSION, - .maxattr = OVS_VPORT_ATTR_MAX, + .name = OVS_DATAPATH_FAMILY, + .version = OVS_DATAPATH_VERSION, + .maxattr = OVS_DP_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_vport_multicast_group = { - .name = OVS_VPORT_MCGROUP + .ops = dp_datapath_genl_ops, + .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), + .mcgrps = &ovs_dp_datapath_multicast_group, + .n_mcgrps = 1, }; /* Called with ovs_mutex or RCU read lock. */ @@ -1516,7 +1655,12 @@ error: return err; } -/* Called with ovs_mutex or RCU read lock. */ +static struct sk_buff *ovs_vport_cmd_alloc_info(void) +{ + return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +} + +/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { @@ -1578,33 +1722,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) u32 port_no; int err; - err = -EINVAL; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) - goto exit; + return -EINVAL; + + port_no = a[OVS_VPORT_ATTR_PORT_NO] + ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; + if (port_no >= DP_MAX_PORTS) + return -EFBIG; + + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) - goto exit_unlock; - - if (a[OVS_VPORT_ATTR_PORT_NO]) { - port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); - - err = -EFBIG; - if (port_no >= DP_MAX_PORTS) - goto exit_unlock; + goto exit_unlock_free; + if (port_no) { vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) - goto exit_unlock; + goto exit_unlock_free; } else { for (port_no = 1; ; port_no++) { if (port_no >= DP_MAX_PORTS) { err = -EFBIG; - goto exit_unlock; + goto exit_unlock_free; } vport = ovs_vport_ovsl(dp, port_no); if (!vport) @@ -1622,22 +1768,19 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; - err = 0; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - ovs_dp_detach_port(vport); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); -exit: + kfree_skb(reply); return err; } @@ -1648,28 +1791,26 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; - goto exit_unlock; - } - - reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!reply) { - err = -ENOMEM; - goto exit_unlock; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_free; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_UPCALL_PID]) @@ -1683,10 +1824,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_vport_genl_family, reply, info); return 0; -exit_free: - kfree_skb(reply); -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1697,30 +1837,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (vport->port_no == OVSP_LOCAL) { err = -EINVAL; - goto exit_unlock; + goto exit_unlock_free; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - - err = 0; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_DEL); + BUG_ON(err < 0); ovs_dp_detach_port(vport); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1732,24 +1875,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + rcu_read_lock(); vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; - - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - + goto exit_unlock_free; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); rcu_read_unlock(); return genlmsg_reply(reply, info); -exit_unlock: +exit_unlock_free: rcu_read_unlock(); + kfree_skb(reply); return err; } @@ -1792,7 +1936,16 @@ out: return skb->len; } -static const struct genl_ops dp_vport_genl_ops[] = { +static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { + [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, + [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, +}; + +static struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, @@ -1816,26 +1969,25 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family_and_ops { - struct genl_family *family; - const struct genl_ops *ops; - int n_ops; - const struct genl_multicast_group *group; +struct genl_family dp_vport_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_VPORT_FAMILY, + .version = OVS_VPORT_VERSION, + .maxattr = OVS_VPORT_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_vport_genl_ops, + .n_ops = ARRAY_SIZE(dp_vport_genl_ops), + .mcgrps = &ovs_dp_vport_multicast_group, + .n_mcgrps = 1, }; -static const struct genl_family_and_ops dp_genl_families[] = { - { &dp_datapath_genl_family, - dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), - &ovs_dp_datapath_multicast_group }, - { &dp_vport_genl_family, - dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), - &ovs_dp_vport_multicast_group }, - { &dp_flow_genl_family, - dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), - &ovs_dp_flow_multicast_group }, - { &dp_packet_genl_family, - dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), - NULL }, +static struct genl_family * const dp_genl_families[] = { + &dp_datapath_genl_family, + &dp_vport_genl_family, + &dp_flow_genl_family, + &dp_packet_genl_family, }; static void dp_unregister_genl(int n_families) @@ -1843,33 +1995,25 @@ static void dp_unregister_genl(int n_families) int i; for (i = 0; i < n_families; i++) - genl_unregister_family(dp_genl_families[i].family); + genl_unregister_family(dp_genl_families[i]); } static int dp_register_genl(void) { - int n_registered; int err; int i; - n_registered = 0; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { - const struct genl_family_and_ops *f = &dp_genl_families[i]; - f->family->ops = f->ops; - f->family->n_ops = f->n_ops; - f->family->mcgrps = f->group; - f->family->n_mcgrps = f->group ? 1 : 0; - err = genl_register_family(f->family); + err = genl_register_family(dp_genl_families[i]); if (err) goto error; - n_registered++; } return 0; error: - dp_unregister_genl(n_registered); + dp_unregister_genl(i); return err; } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 05317380fc03..7ede507500d7 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -194,7 +194,9 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); void ovs_dp_notify_wq(struct work_struct *work); -#define OVS_NLERR(fmt, ...) \ - pr_info_once("netlink: " fmt, ##__VA_ARGS__) - +#define OVS_NLERR(fmt, ...) \ +do { \ + if (net_ratelimit()) \ + pr_info("netlink: " fmt, ##__VA_ARGS__); \ +} while (0) #endif /* datapath.h */ diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2998989e76db..334751cb1528 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -64,88 +64,110 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { struct flow_stats *stats; - __be16 tcp_flags = 0; - - if (!flow->stats.is_percpu) - stats = flow->stats.stat; - else - stats = this_cpu_ptr(flow->stats.cpu_stats); - - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && - flow->key.ip.frag != OVS_FRAG_TYPE_LATER && - flow->key.ip.proto == IPPROTO_TCP && - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { - tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); + __be16 tcp_flags = flow->key.tp.flags; + int node = numa_node_id(); + + stats = rcu_dereference(flow->stats[node]); + + /* Check if already have node-specific stats. */ + if (likely(stats)) { + spin_lock(&stats->lock); + /* Mark if we write on the pre-allocated stats. */ + if (node == 0 && unlikely(flow->stats_last_writer != node)) + flow->stats_last_writer = node; + } else { + stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ + spin_lock(&stats->lock); + + /* If the current NUMA-node is the only writer on the + * pre-allocated stats keep using them. + */ + if (unlikely(flow->stats_last_writer != node)) { + /* A previous locker may have already allocated the + * stats, so we need to check again. If node-specific + * stats were already allocated, we update the pre- + * allocated stats as we have already locked them. + */ + if (likely(flow->stats_last_writer != NUMA_NO_NODE) + && likely(!rcu_dereference(flow->stats[node]))) { + /* Try to allocate node-specific stats. */ + struct flow_stats *new_stats; + + new_stats = + kmem_cache_alloc_node(flow_stats_cache, + GFP_THISNODE | + __GFP_NOMEMALLOC, + node); + if (likely(new_stats)) { + new_stats->used = jiffies; + new_stats->packet_count = 1; + new_stats->byte_count = skb->len; + new_stats->tcp_flags = tcp_flags; + spin_lock_init(&new_stats->lock); + + rcu_assign_pointer(flow->stats[node], + new_stats); + goto unlock; + } + } + flow->stats_last_writer = node; + } } - spin_lock(&stats->lock); stats->used = jiffies; stats->packet_count++; stats->byte_count += skb->len; stats->tcp_flags |= tcp_flags; +unlock: spin_unlock(&stats->lock); } -static void stats_read(struct flow_stats *stats, - struct ovs_flow_stats *ovs_stats, - unsigned long *used, __be16 *tcp_flags) -{ - spin_lock(&stats->lock); - if (!*used || time_after(stats->used, *used)) - *used = stats->used; - *tcp_flags |= stats->tcp_flags; - ovs_stats->n_packets += stats->packet_count; - ovs_stats->n_bytes += stats->byte_count; - spin_unlock(&stats->lock); -} - -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, +/* Must be called with rcu_read_lock or ovs_mutex. */ +void ovs_flow_stats_get(const struct sw_flow *flow, + struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu; + int node; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - local_bh_disable(); - if (!flow->stats.is_percpu) { - stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); - } else { - for_each_possible_cpu(cpu) { - struct flow_stats *stats; + for_each_node(node) { + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - stats_read(stats, ovs_stats, used, tcp_flags); + if (stats) { + /* Local CPU may write on non-local stats, so we must + * block bottom-halves here. + */ + spin_lock_bh(&stats->lock); + if (!*used || time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock_bh(&stats->lock); } } - local_bh_enable(); -} - -static void stats_reset(struct flow_stats *stats) -{ - spin_lock(&stats->lock); - stats->used = 0; - stats->packet_count = 0; - stats->byte_count = 0; - stats->tcp_flags = 0; - spin_unlock(&stats->lock); } +/* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu; - - local_bh_disable(); - if (!flow->stats.is_percpu) { - stats_reset(flow->stats.stat); - } else { - for_each_possible_cpu(cpu) { - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); + int node; + + for_each_node(node) { + struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + + if (stats) { + spin_lock_bh(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock_bh(&stats->lock); } } - local_bh_enable(); } static int check_header(struct sk_buff *skb, int len) @@ -332,8 +354,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, /* The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ - key->ipv6.tp.src = htons(icmp->icmp6_type); - key->ipv6.tp.dst = htons(icmp->icmp6_code); + key->tp.src = htons(icmp->icmp6_type); + key->tp.dst = htons(icmp->icmp6_code); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -372,14 +394,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) goto invalid; - memcpy(key->ipv6.nd.sll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.sll, + &nd->opt[offset+sizeof(*nd_opt)]); } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) goto invalid; - memcpy(key->ipv6.nd.tll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.tll, + &nd->opt[offset+sizeof(*nd_opt)]); } icmp_len -= opt_len; @@ -439,8 +461,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) * header in the linear data area. */ eth = eth_hdr(skb); - memcpy(key->eth.src, eth->h_source, ETH_ALEN); - memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); __skb_pull(skb, 2 * ETH_ALEN); /* We are going to push all headers that we pull, so no need to @@ -495,21 +517,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == IPPROTO_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv4.tp.src = tcp->source; - key->ipv4.tp.dst = tcp->dest; - key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv4.tp.src = udp->source; - key->ipv4.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == IPPROTO_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv4.tp.src = sctp->source; - key->ipv4.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { @@ -517,8 +539,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store * them in 16-bit network byte order. */ - key->ipv4.tp.src = htons(icmp->type); - key->ipv4.tp.dst = htons(icmp->code); + key->tp.src = htons(icmp->type); + key->tp.dst = htons(icmp->code); } } @@ -538,8 +560,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) key->ip.proto = ntohs(arp->ar_op); memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); - memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); - memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); + ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha); + ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha); } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ @@ -564,21 +586,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv6.tp.src = tcp->source; - key->ipv6.tp.dst = tcp->dest; - key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv6.tp.src = udp->source; - key->ipv6.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == NEXTHDR_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv6.tp.src = sctp->source; - key->ipv6.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 2d770e28a3a3..ac395d2cd821 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -47,7 +47,7 @@ struct ovs_key_ipv4_tunnel { __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; -}; +} __packed __aligned(4); /* Minimize padding. */ static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, const struct iphdr *iph, __be64 tun_id, @@ -71,7 +71,7 @@ struct sw_flow_key { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ - } phy; + } __packed phy; /* Safe when right after 'tun_key'. */ struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ @@ -84,23 +84,21 @@ struct sw_flow_key { u8 ttl; /* IP TTL/hop limit. */ u8 frag; /* One of OVS_FRAG_TYPE_*. */ } ip; + struct { + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ + } tp; union { struct { struct { __be32 src; /* IP source address. */ __be32 dst; /* IP destination address. */ } addr; - union { - struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; - struct { - u8 sha[ETH_ALEN]; /* ARP source hardware address. */ - u8 tha[ETH_ALEN]; /* ARP target hardware address. */ - } arp; - }; + struct { + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ + } arp; } ipv4; struct { struct { @@ -109,11 +107,6 @@ struct sw_flow_key { } addr; __be32 label; /* IPv6 flow label. */ struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; - struct { struct in6_addr target; /* ND target address. */ u8 sll[ETH_ALEN]; /* ND source link layer address. */ u8 tll[ETH_ALEN]; /* ND target link layer address. */ @@ -155,24 +148,22 @@ struct flow_stats { __be16 tcp_flags; /* Union of seen TCP flags. */ }; -struct sw_flow_stats { - bool is_percpu; - union { - struct flow_stats *stat; - struct flow_stats __percpu *cpu_stats; - }; -}; - struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; u32 hash; - + int stats_last_writer; /* NUMA-node id of the last writer on + * 'stats[0]'. + */ struct sw_flow_key key; struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct sw_flow_stats stats; + struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one + * is allocated at flow creation time, + * the rest are allocated on demand + * while holding the 'stats[0].lock'. + */ }; struct arp_eth_header { @@ -189,10 +180,10 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, +void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, unsigned long *used, __be16 *tcp_flags); -void ovs_flow_stats_clear(struct sw_flow *flow); +void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4d000acaed0d..d757848da89c 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -16,6 +16,8 @@ * 02110-1301, USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "flow.h" #include "datapath.h" #include <linux/uaccess.h> @@ -202,11 +204,11 @@ static bool match_validate(const struct sw_flow_match *match, if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; - if (match->key->ipv6.tp.src == + if (match->key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1 << OVS_KEY_ATTR_ND; - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) + if (match->mask && (match->mask->key.tp.src == htons(0xffff))) mask_allowed |= 1 << OVS_KEY_ATTR_ND; } } @@ -216,14 +218,14 @@ static bool match_validate(const struct sw_flow_match *match, if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - key_attrs, key_expected); + (unsigned long long)key_attrs, (unsigned long long)key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - mask_attrs, mask_allowed); + (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); return false; } @@ -266,20 +268,6 @@ static bool is_all_zero(const u8 *fp, size_t size) return true; } -static bool is_all_set(const u8 *fp, size_t size) -{ - int i; - - if (!fp) - return false; - - for (i = 0; i < size; i++) - if (fp[i] != 0xff) - return false; - - return true; -} - static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) @@ -501,9 +489,8 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, - u64 attrs, const struct nlattr **a, - bool is_mask) +static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, + const struct nlattr **a, bool is_mask) { int err; u64 orig_attrs = attrs; @@ -560,11 +547,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } - if (is_mask && exact_5tuple) { - if (match->mask->key.eth.type != htons(0xffff)) - *exact_5tuple = false; - } - if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; @@ -587,13 +569,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV4); - - if (is_mask && exact_5tuple && *exact_5tuple) { - if (ipv4_key->ipv4_proto != 0xff || - ipv4_key->ipv4_src != htonl(0xffffffff) || - ipv4_key->ipv4_dst != htonl(0xffffffff)) - *exact_5tuple = false; - } } if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { @@ -625,13 +600,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV6); - - if (is_mask && exact_5tuple && *exact_5tuple) { - if (ipv6_key->ipv6_proto != 0xff || - !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || - !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) - *exact_5tuple = false; - } } if (attrs & (1 << OVS_KEY_ATTR_ARP)) { @@ -662,32 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_tcp *tcp_key; tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - tcp_key->tcp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - tcp_key->tcp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_TCP); - - if (is_mask && exact_5tuple && *exact_5tuple && - (tcp_key->tcp_src != htons(0xffff) || - tcp_key->tcp_dst != htons(0xffff))) - *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } @@ -698,40 +652,17 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_udp *udp_key; udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - udp_key->udp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - udp_key->udp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_UDP); - - if (is_mask && exact_5tuple && *exact_5tuple && - (udp_key->udp_src != htons(0xffff) || - udp_key->udp_dst != htons(0xffff))) - *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { const struct ovs_key_sctp *sctp_key; sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - sctp_key->sctp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - sctp_key->sctp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_SCTP); } @@ -739,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_icmp *icmp_key; icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - SW_FLOW_KEY_PUT(match, ipv4.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmp_key->icmp_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmp_key->icmp_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMP); } @@ -750,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - SW_FLOW_KEY_PUT(match, ipv6.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmpv6_key->icmpv6_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmpv6_key->icmpv6_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); } @@ -800,7 +731,6 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask, * attribute specifies the mask field of the wildcarded flow. */ int ovs_nla_get_match(struct sw_flow_match *match, - bool *exact_5tuple, const struct nlattr *key, const struct nlattr *mask) { @@ -848,13 +778,10 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, key_attrs, a, false); if (err) return err; - if (exact_5tuple) - *exact_5tuple = true; - if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) @@ -892,7 +819,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); if (err) return err; } else { @@ -982,8 +909,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; eth_key = nla_data(nla); - memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); - memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); + ether_addr_copy(eth_key->eth_src, output->eth.src); + ether_addr_copy(eth_key->eth_dst, output->eth.dst); if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { __be16 eth_type; @@ -1055,8 +982,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, arp_key->arp_sip = output->ipv4.addr.src; arp_key->arp_tip = output->ipv4.addr.dst; arp_key->arp_op = htons(output->ip.proto); - memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); - memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); + ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); + ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -1070,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = output->ipv4.tp.src; - tcp_key->tcp_dst = output->ipv4.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv4.tp.flags)) - goto nla_put_failure; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = output->ipv6.tp.src; - tcp_key->tcp_dst = output->ipv6.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv6.tp.flags)) - goto nla_put_failure; - } + tcp_key->tcp_src = output->tp.src; + tcp_key->tcp_dst = output->tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->tp.flags)) + goto nla_put_failure; } else if (swkey->ip.proto == IPPROTO_UDP) { struct ovs_key_udp *udp_key; @@ -1090,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = output->ipv4.tp.src; - udp_key->udp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = output->ipv6.tp.src; - udp_key->udp_dst = output->ipv6.tp.dst; - } + udp_key->udp_src = output->tp.src; + udp_key->udp_dst = output->tp.dst; } else if (swkey->ip.proto == IPPROTO_SCTP) { struct ovs_key_sctp *sctp_key; @@ -1104,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; sctp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = swkey->ipv4.tp.src; - sctp_key->sctp_dst = swkey->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = swkey->ipv6.tp.src; - sctp_key->sctp_dst = swkey->ipv6.tp.dst; - } + sctp_key->sctp_src = output->tp.src; + sctp_key->sctp_dst = output->tp.dst; } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; @@ -1119,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); + icmp_key->icmp_type = ntohs(output->tp.src); + icmp_key->icmp_code = ntohs(output->tp.dst); } else if (swkey->eth.type == htons(ETH_P_IPV6) && swkey->ip.proto == IPPROTO_ICMPV6) { struct ovs_key_icmpv6 *icmpv6_key; @@ -1130,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); + icmpv6_key->icmpv6_type = ntohs(output->tp.src); + icmpv6_key->icmpv6_code = ntohs(output->tp.dst); if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { @@ -1143,8 +1052,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, nd_key = nla_data(nla); memcpy(nd_key->nd_target, &output->ipv6.nd.target, sizeof(nd_key->nd_target)); - memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); - memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); + ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); + ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); } } } @@ -1309,13 +1218,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, static int validate_tp_port(const struct sw_flow_key *flow_key) { - if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) - return 0; - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) - return 0; - } + if ((flow_key->eth.type == htons(ETH_P_IP) || + flow_key->eth.type == htons(ETH_P_IPV6)) && + (flow_key->tp.src || flow_key->tp.dst)) + return 0; return -EINVAL; } diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index b31fbe28bc7a..440151045d39 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,7 +45,6 @@ int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_get_flow_metadata(struct sw_flow *flow, const struct nlattr *attr); int ovs_nla_get_match(struct sw_flow_match *match, - bool *exact_5tuple, const struct nlattr *, const struct nlattr *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3c268b3d71c3..574c3abc9b30 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -48,6 +48,7 @@ #define REHASH_INTERVAL (10 * 60 * HZ) static struct kmem_cache *flow_cache; +struct kmem_cache *flow_stats_cache __read_mostly; static u16 range_n_bytes(const struct sw_flow_key_range *range) { @@ -57,8 +58,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask) { - const long *m = (long *)((u8 *)&mask->key + mask->range.start); - const long *s = (long *)((u8 *)src + mask->range.start); + const long *m = (const long *)((const u8 *)&mask->key + + mask->range.start); + const long *s = (const long *)((const u8 *)src + + mask->range.start); long *d = (long *)((u8 *)dst + mask->range.start); int i; @@ -70,10 +73,11 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, *d++ = *s++ & *m++; } -struct sw_flow *ovs_flow_alloc(bool percpu_stats) +struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; - int cpu; + struct flow_stats *stats; + int node; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) @@ -81,27 +85,22 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats) flow->sf_acts = NULL; flow->mask = NULL; + flow->stats_last_writer = NUMA_NO_NODE; - flow->stats.is_percpu = percpu_stats; + /* Initialize the default stat node. */ + stats = kmem_cache_alloc_node(flow_stats_cache, + GFP_KERNEL | __GFP_ZERO, 0); + if (!stats) + goto err; - if (!percpu_stats) { - flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); - if (!flow->stats.stat) - goto err; + spin_lock_init(&stats->lock); - spin_lock_init(&flow->stats.stat->lock); - } else { - flow->stats.cpu_stats = alloc_percpu(struct flow_stats); - if (!flow->stats.cpu_stats) - goto err; + RCU_INIT_POINTER(flow->stats[0], stats); - for_each_possible_cpu(cpu) { - struct flow_stats *cpu_stats; + for_each_node(node) + if (node != 0) + RCU_INIT_POINTER(flow->stats[node], NULL); - cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - spin_lock_init(&cpu_stats->lock); - } - } return flow; err: kmem_cache_free(flow_cache, flow); @@ -138,11 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { - kfree((struct sf_flow_acts __force *)flow->sf_acts); - if (flow->stats.is_percpu) - free_percpu(flow->stats.cpu_stats); - else - kfree(flow->stats.stat); + int node; + + kfree((struct sw_flow_actions __force *)flow->sf_acts); + for_each_node(node) + if (flow->stats[node]) + kmem_cache_free(flow_stats_cache, + (struct flow_stats __force *)flow->stats[node]); kmem_cache_free(flow_cache, flow); } @@ -158,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred) if (!flow) return; - if (flow->mask) { - struct sw_flow_mask *mask = flow->mask; - - /* ovs-lock is required to protect mask-refcount and - * mask list. - */ - ASSERT_OVSL(); - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - kfree_rcu(mask, rcu); - else - kfree(mask); - } - } - if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); else @@ -375,7 +357,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) static u32 flow_hash(const struct sw_flow_key *key, int key_start, int key_end) { - u32 *hash_key = (u32 *)((u8 *)key + key_start); + const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; /* Make sure number of hash bytes are multiple of u32. */ @@ -397,8 +379,8 @@ static bool cmp_key(const struct sw_flow_key *key1, const struct sw_flow_key *key2, int key_start, int key_end) { - const long *cp1 = (long *)((u8 *)key1 + key_start); - const long *cp2 = (long *)((u8 *)key2 + key_start); + const long *cp1 = (const long *)((const u8 *)key1 + key_start); + const long *cp2 = (const long *)((const u8 *)key2 + key_start); long diffs = 0; int i; @@ -490,6 +472,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti) return table_instance_rehash(ti, ti->n_buckets * 2); } +/* Remove 'mask' from the mask list, if it is not needed any more. */ +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + if (mask) { + /* ovs-lock is required to protect mask-refcount and + * mask list. + */ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + kfree_rcu(mask, rcu); + } + } +} + +/* Must be called with OVS mutex held. */ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); @@ -497,6 +498,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[ti->node_ver]); table->count--; + + /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be + * accessible as long as the RCU read lock is held. + */ + flow_mask_remove(table, flow->mask); } static struct sw_flow_mask *mask_alloc(void) @@ -513,8 +519,8 @@ static struct sw_flow_mask *mask_alloc(void) static bool mask_equal(const struct sw_flow_mask *a, const struct sw_flow_mask *b) { - u8 *a_ = (u8 *)&a->key + a->range.start; - u8 *b_ = (u8 *)&b->key + b->range.start; + const u8 *a_ = (const u8 *)&a->key + a->range.start; + const u8 *b_ = (const u8 *)&b->key + b->range.start; return (a->range.end == b->range.end) && (a->range.start == b->range.start) @@ -559,6 +565,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, return 0; } +/* Must be called with OVS mutex held. */ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, struct sw_flow_mask *mask) { @@ -597,16 +604,28 @@ int ovs_flow_init(void) BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) + + (num_possible_nodes() + * sizeof(struct flow_stats *)), + 0, 0, NULL); if (flow_cache == NULL) return -ENOMEM; + flow_stats_cache + = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (flow_stats_cache == NULL) { + kmem_cache_destroy(flow_cache); + flow_cache = NULL; + return -ENOMEM; + } + return 0; } /* Uninitializes the flow module. */ void ovs_flow_exit(void) { + kmem_cache_destroy(flow_stats_cache); kmem_cache_destroy(flow_cache); } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index baaeb101924d..ca8a5820f615 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -52,10 +52,12 @@ struct flow_table { unsigned int count; }; +extern struct kmem_cache *flow_stats_cache; + int ovs_flow_init(void); void ovs_flow_exit(void); -struct sw_flow *ovs_flow_alloc(bool percpu_stats); +struct sw_flow *ovs_flow_alloc(void); void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index ebb6e2442554..35ec4fed09e2 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -172,7 +172,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, @@ -256,7 +256,7 @@ static void gre_tnl_destroy(struct vport *vport) ovs_net = net_generic(net, ovs_net_id); - rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL); ovs_vport_deferred_free(vport); gre_exit(); } diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 729c68763fe7..789af9280e77 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -130,7 +130,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; - SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); + netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index e797a50ac2be..0edbd95c60e7 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -122,7 +122,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false); + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -170,7 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; inet_get_local_port_range(net, &port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); @@ -180,7 +180,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); + htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8), + false); if (err < 0) ip_rt_put(rt); error: diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index d7e50a17396c..8d721e62f388 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -172,7 +172,7 @@ void ovs_vport_deferred_free(struct vport *vport); */ static inline void *vport_priv(const struct vport *vport) { - return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); + return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); } /** @@ -185,9 +185,9 @@ static inline void *vport_priv(const struct vport *vport) * the result of a hash table lookup. @priv must point to the start of the * private data area. */ -static inline struct vport *vport_from_priv(const void *priv) +static inline struct vport *vport_from_priv(void *priv) { - return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); + return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } void ovs_vport_receive(struct vport *, struct sk_buff *, diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 37be6e226d1b..1dde91e3dc70 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -298,7 +298,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rds_ib_stats_inc(s_ib_tx_cq_event); if (wc.wr_id == RDS_IB_ACK_WR_ID) { - if (ic->i_ack_queued + HZ/2 < jiffies) + if (time_after(jiffies, ic->i_ack_queued + HZ/2)) rds_ib_stats_inc(s_ib_tx_stalled); rds_ib_ack_send_complete(ic); continue; @@ -315,7 +315,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rm = rds_ib_send_unmap_op(ic, send, wc.status); - if (send->s_queued + HZ/2 < jiffies) + if (time_after(jiffies, send->s_queued + HZ/2)) rds_ib_stats_inc(s_ib_tx_stalled); if (send->s_op) { diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index e40c3c5db2c4..9105ea03aec5 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -232,7 +232,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) } if (wc.wr_id == RDS_IW_ACK_WR_ID) { - if (ic->i_ack_queued + HZ/2 < jiffies) + if (time_after(jiffies, ic->i_ack_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); rds_iw_ack_send_complete(ic); continue; @@ -267,7 +267,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) send->s_wr.opcode = 0xdead; send->s_wr.num_sge = 1; - if (send->s_queued + HZ/2 < jiffies) + if (time_after(jiffies, send->s_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); /* If a RDMA operation produced an error, signal this right diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 89c91515ed0c..139239d2cb22 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -111,8 +111,7 @@ static struct ctl_table rds_iw_sysctl_table[] = { void rds_iw_sysctl_exit(void) { - if (rds_iw_sysctl_hdr) - unregister_net_sysctl_table(rds_iw_sysctl_hdr); + unregister_net_sysctl_table(rds_iw_sysctl_hdr); } int rds_iw_sysctl_init(void) diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index c2be901d19ee..6cd9d1deafc3 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -168,7 +168,7 @@ static int rds_rdma_listen_init(void) return ret; } - sin.sin_family = AF_INET, + sin.sin_family = AF_INET; sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin.sin_port = (__force u16)htons(RDS_PORT); diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index b5cb2aa08f33..c3b0cd43eb56 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -94,8 +94,7 @@ static struct ctl_table rds_sysctl_rds_table[] = { void rds_sysctl_exit(void) { - if (rds_sysctl_reg_table) - unregister_net_sysctl_table(rds_sysctl_reg_table); + unregister_net_sysctl_table(rds_sysctl_reg_table); } int rds_sysctl_init(void) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4e638f851185..23ab4dcd1d9f 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -153,7 +153,7 @@ int rds_tcp_listen_init(void) sock->sk->sk_data_ready = rds_tcp_listen_data_ready; write_unlock_bh(&sock->sk->sk_callback_lock); - sin.sin_family = PF_INET, + sin.sin_family = PF_INET; sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin.sin_port = (__force u16)htons(RDS_TCP_PORT); diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index bd2a5b90400c..14c98e48f261 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -36,8 +36,6 @@ struct rfkill_gpio_data { struct gpio_desc *shutdown_gpio; struct rfkill *rfkill_dev; - char *reset_name; - char *shutdown_name; struct clk *clk; bool clk_enabled; @@ -47,17 +45,14 @@ static int rfkill_gpio_set_power(void *data, bool blocked) { struct rfkill_gpio_data *rfkill = data; - if (blocked) { - gpiod_set_value(rfkill->shutdown_gpio, 0); - gpiod_set_value(rfkill->reset_gpio, 0); - if (!IS_ERR(rfkill->clk) && rfkill->clk_enabled) - clk_disable(rfkill->clk); - } else { - if (!IS_ERR(rfkill->clk) && !rfkill->clk_enabled) - clk_enable(rfkill->clk); - gpiod_set_value(rfkill->reset_gpio, 1); - gpiod_set_value(rfkill->shutdown_gpio, 1); - } + if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled) + clk_enable(rfkill->clk); + + gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked); + gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked); + + if (blocked && !IS_ERR(rfkill->clk) && rfkill->clk_enabled) + clk_disable(rfkill->clk); rfkill->clk_enabled = blocked; @@ -87,10 +82,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev) { struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; struct rfkill_gpio_data *rfkill; - const char *clk_name = NULL; struct gpio_desc *gpio; int ret; - int len; rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL); if (!rfkill) @@ -101,28 +94,15 @@ static int rfkill_gpio_probe(struct platform_device *pdev) if (ret) return ret; } else if (pdata) { - clk_name = pdata->power_clk_name; rfkill->name = pdata->name; rfkill->type = pdata->type; } else { return -ENODEV; } - len = strlen(rfkill->name); - rfkill->reset_name = devm_kzalloc(&pdev->dev, len + 7, GFP_KERNEL); - if (!rfkill->reset_name) - return -ENOMEM; - - rfkill->shutdown_name = devm_kzalloc(&pdev->dev, len + 10, GFP_KERNEL); - if (!rfkill->shutdown_name) - return -ENOMEM; + rfkill->clk = devm_clk_get(&pdev->dev, NULL); - snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name); - snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name); - - rfkill->clk = devm_clk_get(&pdev->dev, clk_name); - - gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0); + gpio = devm_gpiod_get_index(&pdev->dev, "reset", 0); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -130,7 +110,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->reset_gpio = gpio; } - gpio = devm_gpiod_get_index(&pdev->dev, rfkill->shutdown_name, 1); + gpio = devm_gpiod_get_index(&pdev->dev, "shutdown", 1); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -146,14 +126,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } - if (pdata && pdata->gpio_runtime_setup) { - ret = pdata->gpio_runtime_setup(pdev); - if (ret) { - dev_err(&pdev->dev, "can't set up gpio\n"); - return ret; - } - } - rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); @@ -174,20 +146,23 @@ static int rfkill_gpio_probe(struct platform_device *pdev) static int rfkill_gpio_remove(struct platform_device *pdev) { struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev); - struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; - if (pdata && pdata->gpio_runtime_close) - pdata->gpio_runtime_close(pdev); rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); return 0; } +#ifdef CONFIG_ACPI static const struct acpi_device_id rfkill_acpi_match[] = { + { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E39", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, + { "LNV4752", RFKILL_TYPE_GPS }, { }, }; +#endif static struct platform_driver rfkill_gpio_driver = { .probe = rfkill_gpio_probe, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bdbdb1a7920a..45527e6b52db 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -134,7 +134,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) int err; int tp_created = 0; - if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTFILTER) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: @@ -317,7 +318,8 @@ replay: } } - err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh); + err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, + n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); if (err == 0) { if (tp_created) { spin_lock_bh(root_lock); @@ -504,7 +506,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -513,7 +515,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, INIT_LIST_HEAD(&exts->actions); if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", TCA_ACT_NOREPLACE, + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -523,7 +525,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } else if (exts->action && tb[exts->action]) { int err; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, TCA_ACT_NOREPLACE, + NULL, ovr, TCA_ACT_BIND, &exts->actions); if (err) return err; @@ -543,14 +545,12 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT - if (!list_empty(&src->actions)) { - LIST_HEAD(tmp); - tcf_tree_lock(tp); - list_splice_init(&dst->actions, &tmp); - list_splice(&src->actions, &dst->actions); - tcf_tree_unlock(tp); - tcf_action_destroy(&tmp, TCA_ACT_UNBIND); - } + LIST_HEAD(tmp); + tcf_tree_lock(tp); + list_splice_init(&dst->actions, &tmp); + list_splice(&src->actions, &dst->actions); + tcf_tree_unlock(tp); + tcf_action_destroy(&tmp, TCA_ACT_UNBIND); #endif } EXPORT_SYMBOL(tcf_exts_change); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index e98ca99c202b..0ae1813e3e90 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -130,14 +130,14 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err; struct tcf_exts e; struct tcf_ematch_tree t; tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -161,7 +161,7 @@ errout: static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, bool ovr) { int err; struct basic_head *head = tp->root; @@ -179,7 +179,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (f != NULL) { if (handle && f->handle != handle) return -EINVAL; - return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); + return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); } err = -ENOBUFS; @@ -206,7 +206,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, f->handle = head->hgenerator; } - err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); + err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 8e3cf49118e3..13f64df2c710 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -156,11 +156,11 @@ static void cls_bpf_put(struct tcf_proto *tp, unsigned long f) static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { struct sock_filter *bpf_ops, *bpf_old; struct tcf_exts exts; - struct sock_fprog tmp; + struct sock_fprog_kern tmp; struct sk_filter *fp, *fp_old; u16 bpf_size, bpf_len; u32 classid; @@ -170,7 +170,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return -EINVAL; tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts); + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); if (ret < 0) return ret; @@ -191,7 +191,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); tmp.len = bpf_len; - tmp.filter = (struct sock_filter __user *) bpf_ops; + tmp.filter = bpf_ops; ret = sk_unattached_filter_create(&fp, &tmp); if (ret) @@ -242,7 +242,7 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct cls_bpf_head *head = tp->root; struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg; @@ -260,7 +260,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (handle && prog->handle != handle) return -EINVAL; return cls_bpf_modify_existing(net, tp, prog, base, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } prog = kzalloc(sizeof(*prog), GFP_KERNEL); @@ -277,7 +277,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) goto errout; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 8e2158ab551c..cacf01bd04f0 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -83,7 +83,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = tp->root; @@ -119,7 +119,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, return err; tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 257029c54332..35be16f7c192 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -349,7 +349,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct flow_head *head = tp->root; struct flow_filter *f; @@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, } tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 63a3ce75c02e..861b03ccfed0 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -169,7 +169,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base) + struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) { struct fw_head *head = tp->root; struct tcf_exts e; @@ -177,7 +177,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, int err; tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; @@ -218,7 +218,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *) *arg; @@ -236,7 +236,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f != NULL) { if (f->id != handle && handle) return -EINVAL; - return fw_change_attrs(net, tp, f, tb, tca, base); + return fw_change_attrs(net, tp, f, tb, tca, base, ovr); } if (!handle) @@ -264,7 +264,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); f->id = handle; - err = fw_change_attrs(net, tp, f, tb, tca, base); + err = fw_change_attrs(net, tp, f, tb, tca, base, ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1ad3068f2ce1..dd9fc2523c76 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -333,7 +333,8 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, - struct nlattr **tb, struct nlattr *est, int new) + struct nlattr **tb, struct nlattr *est, int new, + bool ovr) { int err; u32 id = 0, to = 0, nhandle = 0x8000; @@ -343,7 +344,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -428,7 +429,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct route4_head *head = tp->root; struct route4_filter *f, *f1, **fp; @@ -455,7 +456,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, old_handle = f->handle; err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 0); + tca[TCA_RATE], 0, ovr); if (err < 0) return err; @@ -479,7 +480,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 1); + tca[TCA_RATE], 1, ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 19f8e5dfa8bd..1020e233a5d6 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -415,7 +415,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct rsvp_head *data = tp->root; struct rsvp_filter *f, **fp; @@ -436,7 +436,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, return err; tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index f435a88d899a..c721cd4a469f 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -198,7 +198,7 @@ static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; @@ -208,7 +208,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_exts e; tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -341,7 +341,7 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -361,7 +361,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 84c28daff848..c39b583ace32 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -486,13 +486,13 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_hnode *ht, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err; struct tcf_exts e; tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -545,7 +545,7 @@ errout: static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -569,7 +569,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; return u32_set_parms(net, tp, base, n->ht_up, n, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } if (tb[TCA_U32_DIVISOR]) { @@ -656,7 +656,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]); + err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr); if (err == 0) { struct tc_u_knode **ins; for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 400769014bbd..58bed7599db7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -563,7 +563,7 @@ out: } EXPORT_SYMBOL(__qdisc_calculate_pkt_len); -void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) +void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) { if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", @@ -1084,7 +1084,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *p = NULL; int err; - if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETQDISC) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1151,7 +1152,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *q, *p; int err; - if (!netlink_capable(skb, CAP_NET_ADMIN)) + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: @@ -1490,7 +1491,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) u32 qid; int err; - if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTCLASS) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 2aee02802c27..ed30e436128b 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -391,12 +391,7 @@ static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { static void choke_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static int choke_change(struct Qdisc *sch, struct nlattr *opt) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 8302717ea303..7bbbfe112192 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -391,8 +391,10 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) while (1) { cl = list_first_entry(&q->active, struct drr_class, alist); skb = cl->qdisc->ops->peek(cl->qdisc); - if (skb == NULL) + if (skb == NULL) { + qdisc_warn_nonwc(__func__, cl->qdisc); goto out; + } len = qdisc_pkt_len(skb); if (len <= cl->deficit) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 23c682b42f99..ba32c2b005d0 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -591,10 +591,7 @@ static void *fq_alloc_node(size_t sz, int node) static void fq_free(void *addr) { - if (addr && is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); + kvfree(addr); } static int fq_resize(struct Qdisc *sch, u32 log) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 0bf432c782c1..063b726bf1f8 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -365,12 +365,7 @@ static void *fq_codel_zalloc(size_t sz) static void fq_codel_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static void fq_codel_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 6e957c3b9854..d85b6812a7d4 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -414,7 +414,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) } bucket->deficit = weight * q->quantum; } - if (++sch->q.qlen < sch->limit) + if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; q->drop_overlimit++; @@ -494,12 +494,7 @@ static void *hhf_zalloc(size_t sz) static void hhf_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static void hhf_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f1669a00f571..111d70fddaea 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -648,12 +648,7 @@ static void netem_reset(struct Qdisc *sch) static void dist_free(struct disttable *d) { - if (d) { - if (is_vmalloc_addr(d)) - vfree(d); - else - kfree(d); - } + kvfree(d); } /* diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 87317ff0b4ec..1af2f73906d0 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -716,12 +716,7 @@ static void *sfq_alloc(size_t sz) static void sfq_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static void sfq_destroy(struct Qdisc *sch) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 39579c3e0d14..9de23a222d3f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -55,6 +55,7 @@ #include <net/sctp/sm.h> /* Forward declarations for internal functions. */ +static void sctp_select_active_and_retran_path(struct sctp_association *asoc); static void sctp_assoc_bh_rcv(struct work_struct *work); static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); @@ -330,7 +331,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Only real associations count against the endpoint, so * don't bother for if this is a temporary association. */ - if (!asoc->temp) { + if (!list_empty(&asoc->asocs)) { list_del(&asoc->asocs); /* Decrement the backlog value for a TCP-style listening @@ -774,9 +775,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, sctp_transport_cmd_t command, sctp_sn_error_t error) { - struct sctp_transport *t = NULL; - struct sctp_transport *first; - struct sctp_transport *second; struct sctp_ulpevent *event; struct sockaddr_storage addr; int spc_state = 0; @@ -829,13 +827,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, return; } - /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the - * user. + /* Generate and send a SCTP_PEER_ADDR_CHANGE notification + * to the user. */ if (ulp_notify) { memset(&addr, 0, sizeof(struct sockaddr_storage)); memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, spc_state, error, GFP_ATOMIC); if (event) @@ -843,60 +842,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, } /* Select new active and retran paths. */ - - /* Look for the two most recently used active transports. - * - * This code produces the wrong ordering whenever jiffies - * rolls over, but we still get usable transports, so we don't - * worry about it. - */ - first = NULL; second = NULL; - - list_for_each_entry(t, &asoc->peer.transport_addr_list, - transports) { - - if ((t->state == SCTP_INACTIVE) || - (t->state == SCTP_UNCONFIRMED) || - (t->state == SCTP_PF)) - continue; - if (!first || t->last_time_heard > first->last_time_heard) { - second = first; - first = t; - } else if (!second || - t->last_time_heard > second->last_time_heard) - second = t; - } - - /* RFC 2960 6.4 Multi-Homed SCTP Endpoints - * - * By default, an endpoint should always transmit to the - * primary path, unless the SCTP user explicitly specifies the - * destination transport address (and possibly source - * transport address) to use. - * - * [If the primary is active but not most recent, bump the most - * recently used transport.] - */ - if (((asoc->peer.primary_path->state == SCTP_ACTIVE) || - (asoc->peer.primary_path->state == SCTP_UNKNOWN)) && - first != asoc->peer.primary_path) { - second = first; - first = asoc->peer.primary_path; - } - - if (!second) - second = first; - /* If we failed to find a usable transport, just camp on the - * primary, even if it is inactive. - */ - if (!first) { - first = asoc->peer.primary_path; - second = asoc->peer.primary_path; - } - - /* Set the active and retran transports. */ - asoc->peer.active_path = first; - asoc->peer.retran_path = second; + sctp_select_active_and_retran_path(asoc); } /* Hold a reference to an association. */ @@ -1090,7 +1036,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) } if (chunk->transport) - chunk->transport->last_time_heard = jiffies; + chunk->transport->last_time_heard = ktime_get(); /* Run through the state machine. */ error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, @@ -1278,13 +1224,41 @@ static u8 sctp_trans_score(const struct sctp_transport *trans) return sctp_trans_state_to_prio_map[trans->state]; } +static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1, + struct sctp_transport *trans2) +{ + if (trans1->error_count > trans2->error_count) { + return trans2; + } else if (trans1->error_count == trans2->error_count && + ktime_after(trans2->last_time_heard, + trans1->last_time_heard)) { + return trans2; + } else { + return trans1; + } +} + static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr, struct sctp_transport *best) { + u8 score_curr, score_best; + if (best == NULL) return curr; - return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best; + score_curr = sctp_trans_score(curr); + score_best = sctp_trans_score(best); + + /* First, try a score-based selection if both transport states + * differ. If we're in a tie, lets try to make a more clever + * decision here based on error counts and last time heard. + */ + if (score_curr > score_best) + return curr; + else if (score_curr == score_best) + return sctp_trans_elect_tie(curr, best); + else + return best; } void sctp_assoc_update_retran_path(struct sctp_association *asoc) @@ -1325,6 +1299,76 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) __func__, asoc, &asoc->peer.retran_path->ipaddr.sa); } +static void sctp_select_active_and_retran_path(struct sctp_association *asoc) +{ + struct sctp_transport *trans, *trans_pri = NULL, *trans_sec = NULL; + struct sctp_transport *trans_pf = NULL; + + /* Look for the two most recently used active transports. */ + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + /* Skip uninteresting transports. */ + if (trans->state == SCTP_INACTIVE || + trans->state == SCTP_UNCONFIRMED) + continue; + /* Keep track of the best PF transport from our + * list in case we don't find an active one. + */ + if (trans->state == SCTP_PF) { + trans_pf = sctp_trans_elect_best(trans, trans_pf); + continue; + } + /* For active transports, pick the most recent ones. */ + if (trans_pri == NULL || + ktime_after(trans->last_time_heard, + trans_pri->last_time_heard)) { + trans_sec = trans_pri; + trans_pri = trans; + } else if (trans_sec == NULL || + ktime_after(trans->last_time_heard, + trans_sec->last_time_heard)) { + trans_sec = trans; + } + } + + /* RFC 2960 6.4 Multi-Homed SCTP Endpoints + * + * By default, an endpoint should always transmit to the primary + * path, unless the SCTP user explicitly specifies the + * destination transport address (and possibly source transport + * address) to use. [If the primary is active but not most recent, + * bump the most recently used transport.] + */ + if ((asoc->peer.primary_path->state == SCTP_ACTIVE || + asoc->peer.primary_path->state == SCTP_UNKNOWN) && + asoc->peer.primary_path != trans_pri) { + trans_sec = trans_pri; + trans_pri = asoc->peer.primary_path; + } + + /* We did not find anything useful for a possible retransmission + * path; either primary path that we found is the the same as + * the current one, or we didn't generally find an active one. + */ + if (trans_sec == NULL) + trans_sec = trans_pri; + + /* If we failed to find a usable transport, just camp on the + * primary or retran, even if they are inactive, if possible + * pick a PF iff it's the better choice. + */ + if (trans_pri == NULL) { + trans_pri = sctp_trans_elect_best(asoc->peer.primary_path, + asoc->peer.retran_path); + trans_pri = sctp_trans_elect_best(trans_pri, trans_pf); + trans_sec = asoc->peer.primary_path; + } + + /* Set the active and retran transports. */ + asoc->peer.active_path = trans_pri; + asoc->peer.retran_path = trans_sec; +} + struct sctp_transport * sctp_assoc_choose_alter_transport(struct sctp_association *asoc, struct sctp_transport *last_sent_to) @@ -1547,7 +1591,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, /* Set an association id for a given association */ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) { - bool preload = gfp & __GFP_WAIT; + bool preload = !!(gfp & __GFP_WAIT); int ret; /* If the id is already assigned, keep it. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3d9f429858dc..9da76ba4d10f 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -481,7 +481,7 @@ normal: } if (chunk->transport) - chunk->transport->last_time_heard = jiffies; + chunk->transport->last_time_heard = ktime_get(); error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2b1738ef9394..1999592ba88c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -216,7 +216,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) IP6_ECN_flow_xmit(sk, fl6->flowlabel); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) - skb->local_df = 1; + skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); @@ -943,7 +943,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctpv6_stream_protosw = { @@ -951,7 +950,6 @@ static struct inet_protosw sctpv6_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG, }; diff --git a/net/sctp/output.c b/net/sctp/output.c index 0f4d15fc2627..01ab8e0723f0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -591,7 +591,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); - nskb->local_df = packet->ipfragok; + nskb->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(nskb, tp); out: diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0947f1e15eb8..34229ee7f379 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -78,7 +78,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void __percpu **)net->sctp.sctp_statistics, + snmp_fold_field(net->sctp.sctp_statistics, sctp_snmp_list[i].entry)); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 44cbb54c8574..6789d785e698 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1017,7 +1017,6 @@ static struct inet_protosw sctp_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctp_stream_protosw = { @@ -1025,7 +1024,6 @@ static struct inet_protosw sctp_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -1105,14 +1103,15 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static inline int init_sctp_mibs(struct net *net) { - return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics, - sizeof(struct sctp_mib), - __alignof__(struct sctp_mib)); + net->sctp.sctp_statistics = alloc_percpu(struct sctp_mib); + if (!net->sctp.sctp_statistics) + return -ENOMEM; + return 0; } static inline void cleanup_sctp_mibs(struct net *net) { - snmp_mib_free((void __percpu **)net->sctp.sctp_statistics); + free_percpu(net->sctp.sctp_statistics); } static void sctp_v4_pf_init(void) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fee5552ddf92..ae0e616a7ca5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1782,7 +1782,7 @@ no_hmac: else kt = ktime_get(); - if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) { + if (!asoc && ktime_before(bear_cookie->expiration, kt)) { /* * Section 3.3.10.3 Stale Cookie Error (3) * diff --git a/net/sctp/socket.c b/net/sctp/socket.c index fee06b99a4da..429899689408 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -71,6 +71,7 @@ #include <net/route.h> #include <net/ipv6.h> #include <net/inet_common.h> +#include <net/busy_poll.h> #include <linux/socket.h> /* for sa_family_t */ #include <linux/export.h> @@ -5945,8 +5946,9 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) /* Search for an available port. */ int low, high, remaining, index; unsigned int rover; + struct net *net = sock_net(sk); - inet_get_local_port_range(sock_net(sk), &low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; @@ -5954,7 +5956,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) rover++; if ((rover < low) || (rover > high)) rover = low; - if (inet_is_reserved_local_port(rover)) + if (inet_is_local_reserved_port(net, rover)) continue; index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; @@ -6557,6 +6559,10 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk->sk_shutdown & RCV_SHUTDOWN) break; + if (sk_can_busy_loop(sk) && + sk_busy_loop(sk, noblock)) + continue; + /* User doesn't want to wait. */ error = -EAGAIN; if (!timeo) @@ -6940,7 +6946,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; - newsk->sk_no_check = sk->sk_no_check; + newsk->sk_no_check_tx = sk->sk_no_check_tx; + newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; newsk->sk_shutdown = sk->sk_shutdown; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index c82fdc1eab7c..7e5eb7554990 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -436,20 +436,21 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write, int sctp_sysctl_net_register(struct net *net) { - struct ctl_table *table = sctp_net_table; - - if (!net_eq(net, &init_net)) { - int i; + struct ctl_table *table; + int i; - table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); - if (!table) - return -ENOMEM; + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; - for (i = 0; table[i].data; i++) - table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; - } + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + if (net->sctp.sysctl_header == NULL) { + kfree(table); + return -ENOMEM; + } return 0; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1d348d15b33d..7dd672fa651f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net, */ peer->rto = msecs_to_jiffies(net->sctp.rto_initial); - peer->last_time_heard = jiffies; + peer->last_time_heard = ktime_get(); peer->last_time_ecne_reduced = jiffies; peer->param_flags = SPP_HB_DISABLE | diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 7144eb6a1b95..d49dc2ed30ad 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -38,6 +38,7 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <net/sock.h> +#include <net/busy_poll.h> #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> @@ -204,6 +205,9 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) goto out_free; + if (!sctp_ulpevent_is_notification(event)) + sk_mark_napi_id(sk, skb); + /* Check if the user wishes to receive this event. */ if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) goto out_free; diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 0a648c502fc3..2df87f78e518 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -173,7 +173,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if (csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 402a7e9a16b7..be8bbd5d65ec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -866,8 +866,6 @@ static void xs_reset_transport(struct sock_xprt *transport) xs_restore_old_callbacks(transport, sk); write_unlock_bh(&sk->sk_callback_lock); - sk->sk_no_check = 0; - trace_rpc_socket_close(&transport->xprt, sock); sock_release(sock); } @@ -2046,7 +2044,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; sk->sk_allocation = GFP_ATOMIC; xprt_set_connected(xprt); diff --git a/net/tipc/Makefile b/net/tipc/Makefile index b282f7130d2b..a080c66d819a 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_TIPC) := tipc.o tipc-y += addr.o bcast.o bearer.o config.o \ - core.o handler.o link.o discover.o msg.o \ + core.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ socket.o log.o eth_media.o server.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 95ab5ef92920..26631679a1fa 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -71,7 +71,7 @@ struct tipc_bcbearer_pair { * Note: The fields labelled "temporary" are incorporated into the bearer * to avoid consuming potentially limited stack space through the use of * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bc_lock". + * prevented through use of the spinlock "bclink_lock". */ struct tipc_bcbearer { struct tipc_bearer bearer; @@ -84,34 +84,64 @@ struct tipc_bcbearer { /** * struct tipc_bclink - link used for broadcast messages + * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node + * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * * Handles sequence numbering, fragmentation, bundling, etc. */ struct tipc_bclink { + spinlock_t lock; struct tipc_link link; struct tipc_node node; + unsigned int flags; struct tipc_node_map bcast_nodes; struct tipc_node *retransmit_to; }; -static struct tipc_bcbearer bcast_bearer; -static struct tipc_bclink bcast_link; - -static struct tipc_bcbearer *bcbearer = &bcast_bearer; -static struct tipc_bclink *bclink = &bcast_link; -static struct tipc_link *bcl = &bcast_link.link; - -static DEFINE_SPINLOCK(bc_lock); +static struct tipc_bcbearer *bcbearer; +static struct tipc_bclink *bclink; +static struct tipc_link *bcl; const char tipc_bclink_name[] = "broadcast-link"; static void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, struct tipc_node_map *nm_diff); +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); + +static void tipc_bclink_lock(void) +{ + spin_lock_bh(&bclink->lock); +} + +static void tipc_bclink_unlock(void) +{ + struct tipc_node *node = NULL; + + if (likely(!bclink->flags)) { + spin_unlock_bh(&bclink->lock); + return; + } + + if (bclink->flags & TIPC_BCLINK_RESET) { + bclink->flags &= ~TIPC_BCLINK_RESET; + node = tipc_bclink_retransmit_to(); + } + spin_unlock_bh(&bclink->lock); + + if (node) + tipc_link_reset_all(node); +} + +void tipc_bclink_set_flags(unsigned int flags) +{ + bclink->flags |= flags; +} static u32 bcbuf_acks(struct sk_buff *buf) { @@ -130,16 +160,16 @@ static void bcbuf_decr_acks(struct sk_buff *buf) void tipc_bclink_add_node(u32 addr) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_nmap_add(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } void tipc_bclink_remove_node(u32 addr) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_nmap_remove(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } static void bclink_set_last_sent(void) @@ -165,7 +195,7 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * - * Called with bc_lock locked + * Called with bclink_lock locked */ struct tipc_node *tipc_bclink_retransmit_to(void) { @@ -177,7 +207,7 @@ struct tipc_node *tipc_bclink_retransmit_to(void) * @after: sequence number of last packet to *not* retransmit * @to: sequence number of last packet to retransmit * - * Called with bc_lock locked + * Called with bclink_lock locked */ static void bclink_retransmit_pkt(u32 after, u32 to) { @@ -194,7 +224,7 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * @n_ptr: node that sent acknowledgement info * @acked: broadcast sequence # that has been acknowledged * - * Node is locked, bc_lock unlocked. + * Node is locked, bclink_lock unlocked. */ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) { @@ -202,8 +232,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *next; unsigned int released = 0; - spin_lock_bh(&bc_lock); - + tipc_bclink_lock(); /* Bail out if tx queue is empty (no clean up is required) */ crs = bcl->first_out; if (!crs) @@ -267,13 +296,13 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) if (unlikely(released && !list_empty(&bcl->waiting_ports))) tipc_link_wakeup_ports(bcl, 0); exit: - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } /** * tipc_bclink_update_link_state - update broadcast link state * - * tipc_net_lock and node lock set + * RCU and node lock set */ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) { @@ -320,10 +349,10 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) ? buf_seqno(n_ptr->bclink.deferred_head) - 1 : n_ptr->bclink.last_sent); - spin_lock_bh(&bc_lock); - tipc_bearer_send(&bcbearer->bearer, buf, NULL); + tipc_bclink_lock(); + tipc_bearer_send(MAX_BEARERS, buf, NULL); bcl->stats.sent_nacks++; - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); kfree_skb(buf); n_ptr->bclink.oos_state++; @@ -335,8 +364,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) * * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. - * - * Only tipc_net_lock set. */ static void bclink_peek_nack(struct tipc_msg *msg) { @@ -362,7 +389,7 @@ int tipc_bclink_xmit(struct sk_buff *buf) { int res; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); if (!bclink->bcast_nodes.count) { res = msg_data_sz(buf_msg(buf)); @@ -377,14 +404,14 @@ int tipc_bclink_xmit(struct sk_buff *buf) bcl->stats.accu_queue_sz += bcl->out_queue_size; } exit: - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return res; } /** * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet * - * Called with both sending node's lock and bc_lock taken. + * Called with both sending node's lock and bclink_lock taken. */ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) { @@ -408,7 +435,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) /** * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards * - * tipc_net_lock is read_locked, no other locks set + * RCU is locked, no other locks set */ void tipc_bclink_rcv(struct sk_buff *buf) { @@ -439,12 +466,12 @@ void tipc_bclink_rcv(struct sk_buff *buf) if (msg_destnode(msg) == tipc_own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bcl->stats.recv_nacks++; bclink->retransmit_to = node; bclink_retransmit_pkt(msg_bcgap_after(msg), msg_bcgap_to(msg)); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } else { tipc_node_unlock(node); bclink_peek_nack(msg); @@ -462,51 +489,47 @@ receive: /* Deliver message to destination */ if (likely(msg_isdata(msg))) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); if (likely(msg_mcast(msg))) tipc_port_mcast_rcv(buf, NULL); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); tipc_link_bundle_rcv(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { - int ret; - ret = tipc_link_frag_rcv(&node->bclink.reasm_head, - &node->bclink.reasm_tail, - &buf); - if (ret == LINK_REASM_ERROR) + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) goto unlock; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; - if (ret == LINK_REASM_COMPLETE) { + if (buf) { bcl->stats.recv_fragmented++; - /* Point msg to inner header */ msg = buf_msg(buf); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); goto receive; } - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); tipc_named_rcv(buf); } else { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); kfree_skb(buf); } @@ -552,14 +575,14 @@ receive: } else deferred = 0; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); if (deferred) bcl->stats.deferred_recv++; else bcl->stats.duplicates++; - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); unlock: tipc_node_unlock(node); @@ -627,13 +650,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b, buf, &b->bcast_addr); + tipc_bearer_send(b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ - tbuf = pskb_copy(buf, GFP_ATOMIC); + tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b, tbuf, &b->bcast_addr); + tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } @@ -655,20 +678,27 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(void) +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) { struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; + struct tipc_bearer *b; int b_index; int pri; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); + + if (action) + tipc_nmap_add(nm_ptr, node); + else + tipc_nmap_remove(nm_ptr, node); /* Group bearers by priority (can assume max of two per priority) */ memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct tipc_bearer *b = bearer_list[b_index]; + b = rcu_dereference_rtnl(bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -677,6 +707,7 @@ void tipc_bcbearer_sort(void) else bp_temp[b->priority].secondary = b; } + rcu_read_unlock(); /* Create array of bearer pairs for broadcasting */ bp_curr = bcbearer->bpairs; @@ -702,7 +733,7 @@ void tipc_bcbearer_sort(void) bp_curr++; } - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } @@ -714,7 +745,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) if (!bcl) return 0; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); s = &bcl->stats; @@ -743,7 +774,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) s->queue_sz_counts ? (s->accu_queue_sz / s->queue_sz_counts) : 0); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return ret; } @@ -752,9 +783,9 @@ int tipc_bclink_reset_stats(void) if (!bcl) return -ENOPROTOOPT; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); memset(&bcl->stats, 0, sizeof(bcl->stats)); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return 0; } @@ -765,46 +796,59 @@ int tipc_bclink_set_queue_limits(u32 limit) if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) return -EINVAL; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_link_set_queue_limits(bcl, limit); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return 0; } -void tipc_bclink_init(void) +int tipc_bclink_init(void) { + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); + if (!bcbearer) + return -ENOMEM; + + bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); + if (!bclink) { + kfree(bcbearer); + return -ENOMEM; + } + + bcl = &bclink->link; bcbearer->bearer.media = &bcbearer->media; bcbearer->media.send_msg = tipc_bcbearer_send; sprintf(bcbearer->media.name, "tipc-broadcast"); + spin_lock_init(&bclink->lock); INIT_LIST_HEAD(&bcl->waiting_ports); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - bcl->b_ptr = &bcbearer->bearer; - bearer_list[BCBEARER] = &bcbearer->bearer; + bcl->bearer_id = MAX_BEARERS; + rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + return 0; } void tipc_bclink_stop(void) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_link_purge_queues(bcl); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); - bearer_list[BCBEARER] = NULL; - memset(bclink, 0, sizeof(*bclink)); - memset(bcbearer, 0, sizeof(*bcbearer)); + RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); + synchronize_net(); + kfree(bcbearer); + kfree(bclink); } - /** * tipc_nmap_add - add a node to a node map */ -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; @@ -819,7 +863,7 @@ void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) /** * tipc_nmap_remove - remove a node from a node map */ -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a80ef54b818e..00330c45df3e 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -39,6 +39,7 @@ #define MAX_NODES 4096 #define WSIZE 32 +#define TIPC_BCLINK_RESET 1 /** * struct tipc_node_map - set of node identifiers @@ -69,9 +70,6 @@ struct tipc_node; extern const char tipc_bclink_name[]; -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); - /** * tipc_nmap_equal - test for equality of node maps */ @@ -84,8 +82,9 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); void tipc_port_list_free(struct tipc_port_list *pl_ptr); -void tipc_bclink_init(void); +int tipc_bclink_init(void); void tipc_bclink_stop(void); +void tipc_bclink_set_flags(unsigned int flags); void tipc_bclink_add_node(u32 addr); void tipc_bclink_remove_node(u32 addr); struct tipc_node *tipc_bclink_retransmit_to(void); @@ -98,6 +97,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(void); +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3fef7eb776dc..264474394f9f 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -49,7 +49,7 @@ static struct tipc_media * const media_info_array[] = { NULL }; -struct tipc_bearer *bearer_list[MAX_BEARERS + 1]; +struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); @@ -178,7 +178,7 @@ struct tipc_bearer *tipc_bearer_find(const char *name) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } @@ -198,10 +198,9 @@ struct sk_buff *tipc_bearer_get_names(void) if (!buf) return NULL; - read_lock_bh(&tipc_net_lock); for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b = bearer_list[j]; + b = rtnl_dereference(bearer_list[j]); if (!b) continue; if (b->media == media_info_array[i]) { @@ -211,22 +210,33 @@ struct sk_buff *tipc_bearer_get_names(void) } } } - read_unlock_bh(&tipc_net_lock); return buf; } -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_add_dest(u32 bearer_id, u32 dest) { - tipc_nmap_add(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_add_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(&b_ptr->nodes, dest, true); + tipc_disc_add_dest(b_ptr->link_req); + } + rcu_read_unlock(); } -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) { - tipc_nmap_remove(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_remove_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(&b_ptr->nodes, dest, false); + tipc_disc_remove_dest(b_ptr->link_req); + } + rcu_read_unlock(); } /** @@ -271,13 +281,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) return -EINVAL; } - write_lock_bh(&tipc_net_lock); - m_ptr = tipc_media_find(b_names.media_name); if (!m_ptr) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); - goto exit; + return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) @@ -287,7 +295,7 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (!b_ptr) { bearer_id = i; continue; @@ -295,14 +303,14 @@ restart: if (!strcmp(name, b_ptr->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); - goto exit; + return -EINVAL; } if ((b_ptr->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", name); - goto exit; + return -EINVAL; } pr_warn("Bearer <%s> priority adjustment required %u->%u\n", name, priority + 1, priority); @@ -312,21 +320,20 @@ restart: if (bearer_id >= MAX_BEARERS) { pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n", name, MAX_BEARERS); - goto exit; + return -EINVAL; } b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) { - res = -ENOMEM; - goto exit; - } + if (!b_ptr) + return -ENOMEM; + strcpy(b_ptr->name, name); b_ptr->media = m_ptr; res = m_ptr->enable_media(b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); - goto exit; + return -EINVAL; } b_ptr->identity = bearer_id; @@ -341,16 +348,14 @@ restart: bearer_disable(b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); - goto exit; + return -EINVAL; } - bearer_list[bearer_id] = b_ptr; + rcu_assign_pointer(bearer_list[bearer_id], b_ptr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); -exit: - write_unlock_bh(&tipc_net_lock); return res; } @@ -359,19 +364,16 @@ exit: */ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_disc_delete(b_ptr->link_req); tipc_link_reset_list(b_ptr->identity); - tipc_disc_create(b_ptr, &b_ptr->bcast_addr); - read_unlock_bh(&tipc_net_lock); + tipc_disc_reset(b_ptr); return 0; } /** * bearer_disable * - * Note: This routine assumes caller holds tipc_net_lock. + * Note: This routine assumes caller holds RTNL lock. */ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) { @@ -385,12 +387,12 @@ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) tipc_disc_delete(b_ptr->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == bearer_list[i]) { - bearer_list[i] = NULL; + if (b_ptr == rtnl_dereference(bearer_list[i])) { + RCU_INIT_POINTER(bearer_list[i], NULL); break; } } - kfree(b_ptr); + kfree_rcu(b_ptr, rcu); } int tipc_disable_bearer(const char *name) @@ -398,7 +400,6 @@ int tipc_disable_bearer(const char *name) struct tipc_bearer *b_ptr; int res; - write_lock_bh(&tipc_net_lock); b_ptr = tipc_bearer_find(name); if (b_ptr == NULL) { pr_warn("Attempt to disable unknown bearer <%s>\n", name); @@ -407,32 +408,9 @@ int tipc_disable_bearer(const char *name) bearer_disable(b_ptr, false); res = 0; } - write_unlock_bh(&tipc_net_lock); return res; } - -/* tipc_l2_media_addr_set - initialize Ethernet media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -void tipc_l2_media_addr_set(const struct tipc_bearer *b, - struct tipc_media_addr *a, char *mac) -{ - int len = b->media->hwaddr_len; - - if (unlikely(sizeof(a->value) < len)) { - WARN_ONCE(1, "Media length invalid\n"); - return; - } - - memcpy(a->value, mac, len); - memset(a->value + len, 0, sizeof(a->value) - len); - a->media_id = b->media->type_id; - a->broadcast = !memcmp(mac, b->bcast_addr.value, len); -} - int tipc_enable_l2_media(struct tipc_bearer *b) { struct net_device *dev; @@ -443,33 +421,37 @@ int tipc_enable_l2_media(struct tipc_bearer *b) if (!dev) return -ENODEV; - /* Associate TIPC bearer with Ethernet bearer */ - b->media_ptr = dev; - memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value)); + /* Associate TIPC bearer with L2 bearer */ + rcu_assign_pointer(b->media_ptr, dev); + memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = 1; b->mtu = dev->mtu; - tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr); + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } -/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface +/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface * - * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, + * Mark L2 bearer as inactive so that incoming buffers are thrown away, * then get worker thread to complete bearer cleanup. (Can't do cleanup * here because cleanup code needs to sleep and caller holds spinlocks.) */ void tipc_disable_l2_media(struct tipc_bearer *b) { - struct net_device *dev = (struct net_device *)b->media_ptr; + struct net_device *dev; + + dev = (struct net_device *)rtnl_dereference(b->media_ptr); + RCU_INIT_POINTER(b->media_ptr, NULL); RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); dev_put(dev); } /** - * tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface + * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @buf: the packet to be sent * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address @@ -478,8 +460,12 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct sk_buff *clone; + struct net_device *dev; int delta; - struct net_device *dev = (struct net_device *)b->media_ptr; + + dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); + if (!dev) + return 0; clone = skb_clone(buf, GFP_ATOMIC); if (!clone) @@ -507,10 +493,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, * The media send routine must not alter the buffer being passed in * as it may be needed for later retransmission! */ -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest) { - b->media->send_msg(buf, b, dest); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (likely(b_ptr)) + b_ptr->media->send_msg(buf, b_ptr, dest); + rcu_read_unlock(); } /** @@ -535,7 +527,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, } rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); + b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; @@ -568,12 +560,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); - if (!b_ptr) { - rcu_read_unlock(); + b_ptr = rtnl_dereference(dev->tipc_ptr); + if (!b_ptr) return NOTIFY_DONE; - } b_ptr->mtu = dev->mtu; @@ -586,17 +575,15 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, tipc_reset_bearer(b_ptr); break; case NETDEV_CHANGEADDR: - tipc_l2_media_addr_set(b_ptr, &b_ptr->addr, + b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, (char *)dev->dev_addr); tipc_reset_bearer(b_ptr); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - tipc_disable_bearer(b_ptr->name); + bearer_disable(b_ptr, false); break; } - rcu_read_unlock(); - return NOTIFY_OK; } @@ -633,7 +620,7 @@ void tipc_bearer_stop(void) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr) { bearer_disable(b_ptr, true); bearer_list[i] = NULL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ba48145e871d..78fccc49de23 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -42,14 +42,12 @@ #define MAX_BEARERS 2 #define MAX_MEDIA 2 -/* - * Identifiers associated with TIPC message header media address info - * - * - address info field is 20 bytes long - * - media type identifier located at offset 3 - * - remaining bytes vary according to media type +/* Identifiers associated with TIPC message header media address info + * - address info field is 32 bytes long + * - the field's actual content and length is defined per media + * - remaining unused bytes in the field are set to zero */ -#define TIPC_MEDIA_ADDR_SIZE 20 +#define TIPC_MEDIA_ADDR_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 /* @@ -77,9 +75,10 @@ struct tipc_bearer; * @send_msg: routine which handles buffer transmission * @enable_media: routine which enables a media * @disable_media: routine which disables a media - * @addr2str: routine which converts media address to string - * @addr2msg: routine which converts media address to protocol message area - * @msg2addr: routine which converts media address from protocol message area + * @addr2str: convert media address format to string + * @addr2msg: convert from media addr format to discovery msg addr format + * @msg2addr: convert from discovery msg addr format to media addr format + * @raw2addr: convert from raw addr format to media addr format * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion @@ -93,10 +92,16 @@ struct tipc_media { struct tipc_media_addr *dest); int (*enable_media)(struct tipc_bearer *b_ptr); void (*disable_media)(struct tipc_bearer *b_ptr); - int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); - int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); - int (*msg2addr)(const struct tipc_bearer *b_ptr, - struct tipc_media_addr *a, char *msg_area); + int (*addr2str)(struct tipc_media_addr *addr, + char *strbuf, + int bufsz); + int (*addr2msg)(char *msg, struct tipc_media_addr *addr); + int (*msg2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg); + int (*raw2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *raw); u32 priority; u32 tolerance; u32 window; @@ -113,6 +118,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer * @tolerance: default link tolerance for bearer @@ -127,12 +133,13 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void *media_ptr; /* initalized by media */ + void __rcu *media_ptr; /* initalized by media */ u32 mtu; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct rcu_head rcu; u32 priority; u32 window; u32 tolerance; @@ -150,7 +157,7 @@ struct tipc_bearer_names { struct tipc_link; -extern struct tipc_bearer *bearer_list[]; +extern struct tipc_bearer __rcu *bearer_list[]; /* * TIPC routines available to supported media types @@ -173,22 +180,20 @@ int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); -void tipc_l2_media_addr_set(const struct tipc_bearer *b, - struct tipc_media_addr *a, char *mac); int tipc_enable_l2_media(struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_add_dest(u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(const char *name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(void); -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c index 4b981c053823..2b42403ad33a 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -42,8 +42,6 @@ #define REPLY_TRUNCATED "<truncated>\n" -static DEFINE_MUTEX(config_mutex); - static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ static int rep_headroom; /* reply message headroom to use */ @@ -179,8 +177,10 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_net_start(addr); - return tipc_cfg_reply_none(); + if (!tipc_net_start(addr)) + return tipc_cfg_reply_none(); + + return tipc_cfg_reply_error_string("cannot change to network mode"); } static struct sk_buff *cfg_set_max_ports(void) @@ -223,7 +223,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - mutex_lock(&config_mutex); + rtnl_lock(); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -337,6 +337,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - mutex_unlock(&config_mutex); + rtnl_unlock(); return rep_tlv_buf; } diff --git a/net/tipc/core.c b/net/tipc/core.c index 50d57429ebca..676d18015dd8 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -80,7 +80,6 @@ struct sk_buff *tipc_buf_acquire(u32 size) */ static void tipc_core_stop(void) { - tipc_handler_stop(); tipc_net_stop(); tipc_bearer_cleanup(); tipc_netlink_stop(); @@ -100,10 +99,6 @@ static int tipc_core_start(void) get_random_bytes(&tipc_random, sizeof(tipc_random)); - err = tipc_handler_start(); - if (err) - goto out_handler; - err = tipc_ref_table_init(tipc_max_ports, tipc_random); if (err) goto out_reftbl; @@ -146,8 +141,6 @@ out_netlink: out_nametbl: tipc_ref_table_stop(); out_reftbl: - tipc_handler_stop(); -out_handler: return err; } @@ -161,10 +154,11 @@ static int __init tipc_init(void) tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; - sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << + sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; res = tipc_core_start(); if (res) diff --git a/net/tipc/core.h b/net/tipc/core.h index 8985bbcb942b..bb26ed1ee966 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -56,7 +56,8 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/vmalloc.h> - +#include <linux/rtnetlink.h> +#include <linux/etherdevice.h> #define TIPC_MOD_VER "2.0.0" @@ -89,8 +90,6 @@ extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems */ -int tipc_handler_start(void); -void tipc_handler_stop(void); int tipc_netlink_start(void); void tipc_netlink_stop(void); int tipc_socket_init(void); @@ -109,12 +108,10 @@ void tipc_unregister_sysctl(void); #endif /* - * TIPC timer and signal code + * TIPC timer code */ typedef void (*Handler) (unsigned long); -u32 tipc_k_signal(Handler routine, unsigned long argument); - /** * k_init_timer - initialize a timer * @timer: pointer to timer structure @@ -191,6 +188,7 @@ static inline void k_term_timer(struct timer_list *timer) struct tipc_skb_cb { void *handle; bool deferred; + struct sk_buff *tail; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 542fe3413dc4..aa722a42ef8b 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -1,7 +1,7 @@ /* * net/tipc/discover.c * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2014, Ericsson AB * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * @@ -46,8 +46,9 @@ /** * struct tipc_link_req - information about an ongoing link setup request - * @bearer: bearer issuing requests + * @bearer_id: identity of bearer issuing requests * @dest: destination address for request messages + * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent @@ -55,8 +56,9 @@ * @timer_intv: current interval between requests (in ms) */ struct tipc_link_req { - struct tipc_bearer *bearer; + u32 bearer_id; struct tipc_media_addr dest; + u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; @@ -69,22 +71,19 @@ struct tipc_link_req { * @type: message type (request or response) * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, struct tipc_bearer *b_ptr) +static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, + struct tipc_bearer *b_ptr) { - struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; u32 dest_domain = b_ptr->domain; - if (buf) { - msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); - msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); - msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); - b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); - } - return buf; + msg = buf_msg(buf); + tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); + msg_set_node_sig(msg, tipc_random); + msg_set_dest_domain(msg, dest_domain); + msg_set_bc_netid(msg, tipc_net_id); + b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); } /** @@ -107,146 +106,150 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, } /** - * tipc_disc_rcv - handle incoming link setup message (request or response) + * tipc_disc_rcv - handle incoming discovery message (request or response) * @buf: buffer containing message - * @b_ptr: bearer that message arrived on + * @bearer: bearer that message arrived on */ -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr) +void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) { - struct tipc_node *n_ptr; + struct tipc_node *node; struct tipc_link *link; - struct tipc_media_addr media_addr; + struct tipc_media_addr maddr; struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); - u32 dest = msg_dest_domain(msg); - u32 orig = msg_prevnode(msg); + u32 ddom = msg_dest_domain(msg); + u32 onode = msg_prevnode(msg); u32 net_id = msg_bc_netid(msg); - u32 type = msg_type(msg); + u32 mtyp = msg_type(msg); u32 signature = msg_node_sig(msg); - int addr_mismatch; - int link_fully_up; - - media_addr.broadcast = 1; - b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg)); + bool addr_match = false; + bool sign_match = false; + bool link_up = false; + bool accept_addr = false; + bool accept_sign = false; + bool respond = false; + + bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg)); kfree_skb(buf); /* Ensure message from node is valid and communication is permitted */ if (net_id != tipc_net_id) return; - if (media_addr.broadcast) + if (maddr.broadcast) return; - if (!tipc_addr_domain_valid(dest)) + if (!tipc_addr_domain_valid(ddom)) return; - if (!tipc_addr_node_valid(orig)) + if (!tipc_addr_node_valid(onode)) return; - if (orig == tipc_own_addr) { - if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr))) - disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); + + if (in_own_node(onode)) { + if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) + disc_dupl_alert(bearer, tipc_own_addr, &maddr); return; } - if (!tipc_in_scope(dest, tipc_own_addr)) + if (!tipc_in_scope(ddom, tipc_own_addr)) return; - if (!tipc_in_scope(b_ptr->domain, orig)) + if (!tipc_in_scope(bearer->domain, onode)) return; - /* Locate structure corresponding to requesting node */ - n_ptr = tipc_node_find(orig); - if (!n_ptr) { - n_ptr = tipc_node_create(orig); - if (!n_ptr) - return; - } - tipc_node_lock(n_ptr); + /* Locate, or if necessary, create, node: */ + node = tipc_node_find(onode); + if (!node) + node = tipc_node_create(onode); + if (!node) + return; - /* Prepare to validate requesting node's signature and media address */ - link = n_ptr->links[b_ptr->identity]; - addr_mismatch = (link != NULL) && - memcmp(&link->media_addr, &media_addr, sizeof(media_addr)); + tipc_node_lock(node); + link = node->links[bearer->identity]; - /* - * Ensure discovery message's signature is correct - * - * If signature is incorrect and there is no working link to the node, - * accept the new signature but invalidate all existing links to the - * node so they won't re-activate without a new discovery message. - * - * If signature is incorrect and the requested link to the node is - * working, accept the new signature. (This is an instance of delayed - * rediscovery, where a link endpoint was able to re-establish contact - * with its peer endpoint on a node that rebooted before receiving a - * discovery message from that node.) - * - * If signature is incorrect and there is a working link to the node - * that is not the requested link, reject the request (must be from - * a duplicate node). - */ - if (signature != n_ptr->signature) { - if (n_ptr->working_links == 0) { - struct tipc_link *curr_link; - int i; - - for (i = 0; i < MAX_BEARERS; i++) { - curr_link = n_ptr->links[i]; - if (curr_link) { - memset(&curr_link->media_addr, 0, - sizeof(media_addr)); - tipc_link_reset(curr_link); - } - } - addr_mismatch = (link != NULL); - } else if (tipc_link_is_up(link) && !addr_mismatch) { - /* delayed rediscovery */ - } else { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } - n_ptr->signature = signature; + /* Prepare to validate requesting node's signature and media address */ + sign_match = (signature == node->signature); + addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr)); + link_up = link && tipc_link_is_up(link); + + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { + /* All is fine. Do nothing. */ + } else if (sign_match && addr_match && !link_up) { + /* Respond. The link will come up in due time */ + respond = true; + } else if (sign_match && !addr_match && link_up) { + /* Peer has changed i/f address without rebooting. + * If so, the link will reset soon, and the next + * discovery will be accepted. So we can ignore it. + * It may also be an cloned or malicious peer having + * chosen the same node address and signature as an + * existing one. + * Ignore requests until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (sign_match && !addr_match && !link_up) { + /* Peer link has changed i/f address without rebooting. + * It may also be a cloned or malicious peer; we can't + * distinguish between the two. + * The signature is correct, so we must accept. + */ + accept_addr = true; + respond = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already + * reset and re-established contact with the peer, before + * receiving a discovery message from that node. + * (The peer happened to receive one from this node first). + * - The peer came back so fast that our side has not + * discovered it yet. Probing from this side will soon + * reset the link, since there can be no working link + * endpoint at the peer end, and the link will re-establish. + * Accept the signature, since it comes from a known peer. + */ + accept_sign = true; + } else if (!sign_match && addr_match && !link_up) { + /* The peer node has rebooted. + * Accept signature, since it is a known peer. + */ + accept_sign = true; + respond = true; + } else if (!sign_match && !addr_match && link_up) { + /* Peer rebooted with new address, or a new/duplicate peer. + * Ignore until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (!sign_match && !addr_match && !link_up) { + /* Peer rebooted with new address, or it is a new peer. + * Accept signature and address. + */ + accept_sign = true; + accept_addr = true; + respond = true; } - /* - * Ensure requesting node's media address is correct - * - * If media address doesn't match and the link is working, reject the - * request (must be from a duplicate node). - * - * If media address doesn't match and the link is not working, accept - * the new media address and reset the link to ensure it starts up - * cleanly. - */ - if (addr_mismatch) { - if (tipc_link_is_up(link)) { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } else { - memcpy(&link->media_addr, &media_addr, - sizeof(media_addr)); - tipc_link_reset(link); - } - } + if (accept_sign) + node->signature = signature; - /* Create a link endpoint for this bearer, if necessary */ - if (!link) { - link = tipc_link_create(n_ptr, b_ptr, &media_addr); - if (!link) { - tipc_node_unlock(n_ptr); - return; + if (accept_addr) { + if (!link) + link = tipc_link_create(node, bearer, &maddr); + if (link) { + memcpy(&link->media_addr, &maddr, sizeof(maddr)); + tipc_link_reset(link); + } else { + respond = false; } } - /* Accept discovery message & send response, if necessary */ - link_fully_up = link_working_working(link); - - if ((type == DSC_REQ_MSG) && !link_fully_up) { - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr); + /* Send response, if necessary */ + if (respond && (mtyp == DSC_REQ_MSG)) { + rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { - tipc_bearer_send(b_ptr, rbuf, &media_addr); + tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer); + tipc_bearer_send(bearer->identity, rbuf, &maddr); kfree_skb(rbuf); } } - - tipc_node_unlock(n_ptr); + tipc_node_unlock(node); } /** @@ -303,7 +306,7 @@ static void disc_timeout(struct tipc_link_req *req) spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ - if (tipc_node(req->bearer->domain) && req->num_nodes) { + if (tipc_node(req->domain) && req->num_nodes) { req->timer_intv = TIPC_LINK_REQ_INACTIVE; goto exit; } @@ -315,7 +318,7 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); req->timer_intv *= 2; @@ -347,21 +350,23 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) if (!req) return -ENOMEM; - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, b_ptr); + req->buf = tipc_buf_acquire(INT_H_SIZE); if (!req->buf) { kfree(req); - return -ENOMSG; + return -ENOMEM; } + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); - req->bearer = b_ptr; + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); return 0; } @@ -376,3 +381,23 @@ void tipc_disc_delete(struct tipc_link_req *req) kfree_skb(req->buf); kfree(req); } + +/** + * tipc_disc_reset - reset object to send periodic link setup requests + * @b_ptr: ptr to bearer issuing requests + * @dest_domain: network domain to which links can be established + */ +void tipc_disc_reset(struct tipc_bearer *b_ptr) +{ + struct tipc_link_req *req = b_ptr->link_req; + + spin_lock_bh(&req->lock); + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; + req->num_nodes = 0; + req->timer_intv = TIPC_LINK_REQ_INIT; + k_start_timer(&req->timer, req->timer_intv); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + spin_unlock_bh(&req->lock); +} diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 07f34729459d..515b57392f4d 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -41,6 +41,7 @@ struct tipc_link_req; int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); +void tipc_disc_reset(struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 67cf3f935dba..5e1426f1751f 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,7 +1,7 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2007, 2013, Ericsson AB + * Copyright (c) 2001-2007, 2013-2014, Ericsson AB * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * @@ -37,39 +37,52 @@ #include "core.h" #include "bearer.h" -#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ +#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */ -/* convert Ethernet address to string */ -static int tipc_eth_addr2str(struct tipc_media_addr *a, char *str_buf, - int str_size) +/* Convert Ethernet address (media address format) to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *addr, + char *strbuf, int bufsz) { - if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ + if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ return 1; - sprintf(str_buf, "%pM", a->value); + sprintf(strbuf, "%pM", addr->value); return 0; } -/* convert Ethernet address format to message header format */ -static int tipc_eth_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert from media address format to discovery message addr format */ +static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; - memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN); + memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; + memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } -/* convert message header address format to Ethernet format */ -static int tipc_eth_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert raw mac address format to media addr format */ +static int tipc_eth_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) { - if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) - return 1; + char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - tipc_l2_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); + memset(addr, 0, sizeof(*addr)); + ether_addr_copy(addr->value, msg); + addr->media_id = TIPC_MEDIA_TYPE_ETH; + addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN); return 0; } +/* Convert discovery msg addr format to Ethernet media addr format */ +static int tipc_eth_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + /* Skip past preamble: */ + msg += ETH_ADDR_OFFSET; + return tipc_eth_raw2addr(b, addr, msg); +} + /* Ethernet media registration info */ struct tipc_media eth_media_info = { .send_msg = tipc_l2_send_msg, @@ -78,6 +91,7 @@ struct tipc_media eth_media_info = { .addr2str = tipc_eth_addr2str, .addr2msg = tipc_eth_addr2msg, .msg2addr = tipc_eth_msg2addr, + .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, @@ -85,4 +99,3 @@ struct tipc_media eth_media_info = { .hwaddr_len = ETH_ALEN, .name = "eth" }; - diff --git a/net/tipc/handler.c b/net/tipc/handler.c deleted file mode 100644 index 1fabf160501f..000000000000 --- a/net/tipc/handler.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * net/tipc/handler.c: TIPC signal handling - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" - -struct queue_item { - struct list_head next_signal; - void (*handler) (unsigned long); - unsigned long data; -}; - -static struct kmem_cache *tipc_queue_item_cache; -static struct list_head signal_queue_head; -static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled __read_mostly; - -static void process_signal_queue(unsigned long dummy); - -static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0); - - -unsigned int tipc_k_signal(Handler routine, unsigned long argument) -{ - struct queue_item *item; - - spin_lock_bh(&qitem_lock); - if (!handler_enabled) { - spin_unlock_bh(&qitem_lock); - return -ENOPROTOOPT; - } - - item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); - if (!item) { - pr_err("Signal queue out of memory\n"); - spin_unlock_bh(&qitem_lock); - return -ENOMEM; - } - item->handler = routine; - item->data = argument; - list_add_tail(&item->next_signal, &signal_queue_head); - spin_unlock_bh(&qitem_lock); - tasklet_schedule(&tipc_tasklet); - return 0; -} - -static void process_signal_queue(unsigned long dummy) -{ - struct queue_item *__volatile__ item; - struct list_head *l, *n; - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - spin_unlock_bh(&qitem_lock); - item->handler(item->data); - spin_lock_bh(&qitem_lock); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); -} - -int tipc_handler_start(void) -{ - tipc_queue_item_cache = - kmem_cache_create("tipc_queue_items", sizeof(struct queue_item), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!tipc_queue_item_cache) - return -ENOMEM; - - INIT_LIST_HEAD(&signal_queue_head); - tasklet_enable(&tipc_tasklet); - handler_enabled = 1; - return 0; -} - -void tipc_handler_stop(void) -{ - struct list_head *l, *n; - struct queue_item *item; - - spin_lock_bh(&qitem_lock); - if (!handler_enabled) { - spin_unlock_bh(&qitem_lock); - return; - } - handler_enabled = 0; - spin_unlock_bh(&qitem_lock); - - tasklet_kill(&tipc_tasklet); - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); - - kmem_cache_destroy(tipc_queue_item_cache); -} diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 844a77e25828..8522eef9c136 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,7 +42,7 @@ #include "core.h" #include "bearer.h" -/* convert InfiniBand address to string */ +/* convert InfiniBand address (media address format) media address to string */ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) { @@ -54,23 +54,35 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, return 0; } -/* convert InfiniBand address format to message header format */ -static int tipc_ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert from media address format to discovery message addr format */ +static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; - memcpy(msg_area, a->value, INFINIBAND_ALEN); + memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memcpy(msg, addr->value, INFINIBAND_ALEN); return 0; } -/* convert message header address format to InfiniBand format */ -static int tipc_ib_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert raw InfiniBand address format to media addr format */ +static int tipc_ib_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) { - tipc_l2_media_addr_set(tb_ptr, a, msg_area); + memset(addr, 0, sizeof(*addr)); + memcpy(addr->value, msg, INFINIBAND_ALEN); + addr->media_id = TIPC_MEDIA_TYPE_IB; + addr->broadcast = !memcmp(msg, b->bcast_addr.value, + INFINIBAND_ALEN); return 0; } +/* Convert discovery msg addr format to InfiniBand media addr format */ +static int tipc_ib_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + return tipc_ib_raw2addr(b, addr, msg); +} + /* InfiniBand media registration info */ struct tipc_media ib_media_info = { .send_msg = tipc_l2_send_msg, @@ -79,6 +91,7 @@ struct tipc_media ib_media_info = { .addr2str = tipc_ib_addr2str, .addr2msg = tipc_ib_addr2msg, .msg2addr = tipc_ib_msg2addr, + .raw2addr = tipc_ib_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, @@ -86,4 +99,3 @@ struct tipc_media ib_media_info = { .hwaddr_len = INFINIBAND_ALEN, .name = "ib" }; - diff --git a/net/tipc/link.c b/net/tipc/link.c index c5190ab75290..ad2c57f5868d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -37,6 +37,7 @@ #include "core.h" #include "link.h" #include "port.h" +#include "socket.h" #include "name_distr.h" #include "discover.h" #include "config.h" @@ -101,9 +102,18 @@ static unsigned int align(unsigned int i) static void link_init_max_pkt(struct tipc_link *l_ptr) { + struct tipc_bearer *b_ptr; u32 max_pkt; - max_pkt = (l_ptr->b_ptr->mtu & ~3); + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (!b_ptr) { + rcu_read_unlock(); + return; + } + max_pkt = (b_ptr->mtu & ~3); + rcu_read_unlock(); + if (max_pkt > MAX_MSG_SIZE) max_pkt = MAX_MSG_SIZE; @@ -248,7 +258,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; l_ptr->peer_session = INVALID_SESSION; - l_ptr->b_ptr = b_ptr; + l_ptr->bearer_id = b_ptr->identity; link_set_supervision_props(l_ptr, b_ptr->tolerance); l_ptr->state = RESET_UNKNOWN; @@ -263,6 +273,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); + l_ptr->net_plane = b_ptr->net_plane; link_init_max_pkt(l_ptr); l_ptr->next_out_no = 1; @@ -287,14 +298,14 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) { tipc_link_reset(l_ptr); if (shutting_down || !tipc_node_is_up(n_ptr)) { tipc_node_detach_link(l_ptr->owner, l_ptr); tipc_link_reset_fragments(l_ptr); - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); /* Nobody else can access this link now: */ del_timer_sync(&l_ptr->timer); @@ -302,12 +313,12 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) } else { /* Detach/delete when failover is finished: */ l_ptr->flags |= LINK_STOPPED; - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); del_timer_sync(&l_ptr->timer); } continue; } - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); } @@ -388,9 +399,8 @@ static void link_release_outqueue(struct tipc_link *l_ptr) */ void tipc_link_reset_fragments(struct tipc_link *l_ptr) { - kfree_skb(l_ptr->reasm_head); - l_ptr->reasm_head = NULL; - l_ptr->reasm_tail = NULL; + kfree_skb(l_ptr->reasm_buf); + l_ptr->reasm_buf = NULL; } /** @@ -426,7 +436,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) return; tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; @@ -464,11 +474,11 @@ void tipc_link_reset_list(unsigned int bearer_id) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) tipc_link_reset(l_ptr); - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); } @@ -477,7 +487,7 @@ static void link_activate(struct tipc_link *l_ptr) { l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr); } /** @@ -777,7 +787,7 @@ int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) if (likely(!link_congested(l_ptr))) { link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; return dsz; } @@ -825,7 +835,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) struct tipc_node *n_ptr; int res = -ELINKCONG; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -838,7 +847,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) } else { kfree_skb(buf); } - read_unlock_bh(&tipc_net_lock); return res; } @@ -902,7 +910,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) if (list_empty(message_list)) return; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -917,7 +924,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) } tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); /* discard the messages if they couldn't be sent */ list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { @@ -941,7 +947,7 @@ static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, if (likely(!link_congested(l_ptr))) { if (likely(msg_size(msg) <= l_ptr->max_pkt)) { link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; return res; @@ -979,7 +985,6 @@ again: if (unlikely(res < 0)) return res; - read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); if (likely(node)) { tipc_node_lock(node); @@ -990,7 +995,6 @@ again: &sender->max_pkt); exit: tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return res; } @@ -1007,7 +1011,6 @@ exit: */ sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) @@ -1018,7 +1021,6 @@ exit: } tipc_node_unlock(node); } - read_unlock_bh(&tipc_net_lock); /* Couldn't find a link to the destination node */ kfree_skb(buf); @@ -1204,7 +1206,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (r_q_size && buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->retransm_queue_head = mod(++r_q_head); l_ptr->retransm_queue_size = --r_q_size; l_ptr->stats.retransmitted++; @@ -1216,7 +1218,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); l_ptr->proto_msg_queue = NULL; @@ -1233,7 +1235,8 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (mod(next - first) < l_ptr->queue_limit[0]) { msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, + &l_ptr->media_addr); if (msg_user(msg) == MSG_BUNDLER) msg_set_type(msg, CLOSED_MSG); l_ptr->next_out = buf->next; @@ -1256,33 +1259,24 @@ void tipc_link_push_queue(struct tipc_link *l_ptr) } while (!res); } -static void link_reset_all(unsigned long addr) +void tipc_link_reset_all(struct tipc_node *node) { - struct tipc_node *n_ptr; char addr_string[16]; u32 i; - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find((u32)addr); - if (!n_ptr) { - read_unlock_bh(&tipc_net_lock); - return; /* node no longer exists */ - } - - tipc_node_lock(n_ptr); + tipc_node_lock(node); pr_warn("Resetting all links to %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + tipc_addr_string_fill(addr_string, node->addr)); for (i = 0; i < MAX_BEARERS; i++) { - if (n_ptr->links[i]) { - link_print(n_ptr->links[i], "Resetting link\n"); - tipc_link_reset(n_ptr->links[i]); + if (node->links[i]) { + link_print(node->links[i], "Resetting link\n"); + tipc_link_reset(node->links[i]); } } - tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); + tipc_node_unlock(node); } static void link_retransmit_failure(struct tipc_link *l_ptr, @@ -1319,10 +1313,9 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr); - tipc_node_unlock(n_ptr); + tipc_bclink_set_flags(TIPC_BCLINK_RESET); l_ptr->stale_count = 0; } } @@ -1352,7 +1345,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); buf = buf->next; retransmits--; l_ptr->stats.retransmitted++; @@ -1440,14 +1433,13 @@ static int link_recv_buf_validate(struct sk_buff *buf) /** * tipc_rcv - process TIPC packets/messages arriving from off-node * @head: pointer to message buffer chain - * @tb_ptr: pointer to bearer message arrived on + * @b_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); while (head) { struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -1497,14 +1489,14 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) goto unlock_discard; /* Verify that communication with node is currently allowed */ - if ((n_ptr->block_setup & WAIT_PEER_DOWN) && - msg_user(msg) == LINK_PROTOCOL && - (msg_type(msg) == RESET_MSG || - msg_type(msg) == ACTIVATE_MSG) && - !msg_redundant_link(msg)) - n_ptr->block_setup &= ~WAIT_PEER_DOWN; - - if (n_ptr->block_setup) + if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && + msg_user(msg) == LINK_PROTOCOL && + (msg_type(msg) == RESET_MSG || + msg_type(msg) == ACTIVATE_MSG) && + !msg_redundant_link(msg)) + n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN; + + if (tipc_node_blocked(n_ptr)) goto unlock_discard; /* Validate message sequence number info */ @@ -1581,17 +1573,12 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) } msg = buf_msg(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { - int rc; - l_ptr->stats.recv_fragments++; - rc = tipc_link_frag_rcv(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); - if (rc == LINK_REASM_COMPLETE) { + if (tipc_buf_append(&l_ptr->reasm_buf, &buf)) { l_ptr->stats.recv_fragmented++; msg = buf_msg(buf); } else { - if (rc == LINK_REASM_ERROR) + if (!l_ptr->reasm_buf) tipc_link_reset(l_ptr); tipc_node_unlock(n_ptr); continue; @@ -1604,7 +1591,7 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: tipc_node_unlock(n_ptr); - tipc_port_rcv(buf); + tipc_sk_rcv(buf); continue; case MSG_BUNDLER: l_ptr->stats.recv_bundles++; @@ -1635,7 +1622,6 @@ unlock_discard: discard: kfree_skb(buf); } - read_unlock_bh(&tipc_net_lock); } /** @@ -1747,12 +1733,12 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, return; /* Abort non-RESET send if communication with node is prohibited */ - if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG)) + if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG)) return; /* Create protocol message with "out-of-sequence" sequence number */ msg_set_type(msg, msg_typ); - msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); + msg_set_net_plane(msg, l_ptr->net_plane); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); @@ -1818,7 +1804,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); } @@ -1840,12 +1826,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) if (l_ptr->exp_msg_count) goto exit; - /* record unnumbered packet arrival (force mismatch on next timeout) */ - l_ptr->checkpoint--; - - if (l_ptr->b_ptr->net_plane != msg_net_plane(msg)) + if (l_ptr->net_plane != msg_net_plane(msg)) if (tipc_own_addr > msg_prevnode(msg)) - l_ptr->b_ptr->net_plane = msg_net_plane(msg); + l_ptr->net_plane = msg_net_plane(msg); switch (msg_type(msg)) { @@ -1862,7 +1845,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) * peer has lost contact -- don't allow peer's links * to reactivate before we recognize loss & clean up */ - l_ptr->owner->block_setup = WAIT_NODE_DOWN; + l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN; } link_state_event(l_ptr, RESET_MSG); @@ -1918,6 +1901,10 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) tipc_link_reset(l_ptr); /* Enforce change to take effect */ break; } + + /* Record reception; force mismatch at next timeout: */ + l_ptr->checkpoint--; + link_state_event(l_ptr, TRAFFIC_MSG_EVT); l_ptr->stats.recv_states++; if (link_reset_unknown(l_ptr)) @@ -2177,9 +2164,7 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, } if (msg_user(msg) == MSG_FRAGMENTER) { l_ptr->stats.recv_fragments++; - tipc_link_frag_rcv(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); + tipc_buf_append(&l_ptr->reasm_buf, &buf); } } exit: @@ -2317,53 +2302,6 @@ static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) return dsz; } -/* tipc_link_frag_rcv(): Called with node lock on. Returns - * the reassembled buffer if message is complete. - */ -int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff **fbuf) -{ - struct sk_buff *frag = *fbuf; - struct tipc_msg *msg = buf_msg(frag); - u32 fragid = msg_type(msg); - bool headstolen; - int delta; - - skb_pull(frag, msg_hdr_sz(msg)); - if (fragid == FIRST_FRAGMENT) { - if (*head || skb_unclone(frag, GFP_ATOMIC)) - goto out_free; - *head = frag; - skb_frag_list_init(*head); - *fbuf = NULL; - return 0; - } else if (*head && - skb_try_coalesce(*head, frag, &headstolen, &delta)) { - kfree_skb_partial(frag, headstolen); - } else { - if (!*head) - goto out_free; - if (!skb_has_frag_list(*head)) - skb_shinfo(*head)->frag_list = frag; - else - (*tail)->next = frag; - *tail = frag; - (*head)->truesize += frag->truesize; - } - if (fragid == LAST_FRAGMENT) { - *fbuf = *head; - *tail = *head = NULL; - return LINK_REASM_COMPLETE; - } - *fbuf = NULL; - return 0; -out_free: - pr_warn_ratelimited("Link unable to reassemble fragmented message\n"); - kfree_skb(*fbuf); - *fbuf = NULL; - return LINK_REASM_ERROR; -} - static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) { if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) @@ -2397,8 +2335,6 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) /* tipc_link_find_owner - locate owner node of link by link's name * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. - * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted; - * this also prevents link deletion. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ @@ -2460,7 +2396,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value) * @new_value: new value of link, bearer, or media setting * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) * - * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted. + * Caller must hold RTNL lock to ensure link/bearer/media is not deleted. * * Returns 0 if value updated and negative value on error. */ @@ -2566,9 +2502,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space " (cannot change setting on broadcast link)"); } - read_lock_bh(&tipc_net_lock); res = link_cmd_set_value(args->name, new_value, cmd); - read_unlock_bh(&tipc_net_lock); if (res) return tipc_cfg_reply_error_string("cannot change link setting"); @@ -2602,22 +2536,18 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ return tipc_cfg_reply_error_string("link not found"); return tipc_cfg_reply_none(); } - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(link_name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return tipc_cfg_reply_error_string("link not found"); - } + tipc_node_lock(node); l_ptr = node->links[bearer_id]; if (!l_ptr) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string("link not found"); } link_reset_statistics(l_ptr); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } @@ -2650,18 +2580,15 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) if (!strcmp(name, tipc_bclink_name)) return tipc_bclink_stats(buf, buf_size); - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return 0; - } + tipc_node_lock(node); l = node->links[bearer_id]; if (!l) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return 0; } @@ -2727,7 +2654,6 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) (s->accu_queue_sz / s->queue_sz_counts) : 0); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return ret; } @@ -2778,7 +2704,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) if (dest == tipc_own_addr) return MAX_MSG_SIZE; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -2787,13 +2712,18 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) res = l_ptr->max_pkt; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); return res; } static void link_print(struct tipc_link *l_ptr, const char *str) { - pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (b_ptr) + pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); + rcu_read_unlock(); if (link_working_unknown(l_ptr)) pr_cont(":WU\n"); diff --git a/net/tipc/link.h b/net/tipc/link.h index 8c0b49b5b2ee..200d518b218e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -40,11 +40,6 @@ #include "msg.h" #include "node.h" -/* Link reassembly status codes - */ -#define LINK_REASM_ERROR -1 -#define LINK_REASM_COMPLETE 1 - /* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 @@ -107,7 +102,7 @@ struct tipc_stats { * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint - * @b_ptr: pointer to bearer used by link + * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] * @continuity_interval: link continuity testing interval [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link @@ -116,6 +111,7 @@ struct tipc_stats { * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') * @queue_limit: outbound message queue congestion thresholds (indexed by user) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset @@ -139,8 +135,7 @@ struct tipc_stats { * @next_out: ptr to first unsent outbound message in queue * @waiting_ports: linked list of ports waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @reasm_head: list head of partially reassembled inbound message fragments - * @reasm_tail: last fragment received + * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity */ struct tipc_link { @@ -155,7 +150,7 @@ struct tipc_link { u32 checkpoint; u32 peer_session; u32 peer_bearer_id; - struct tipc_bearer *b_ptr; + u32 bearer_id; u32 tolerance; u32 continuity_interval; u32 abort_limit; @@ -167,6 +162,7 @@ struct tipc_link { } proto_msg; struct tipc_msg *pmsg; u32 priority; + char net_plane; u32 queue_limit[15]; /* queue_limit[0]==window limit */ /* Changeover */ @@ -202,8 +198,7 @@ struct tipc_link { /* Fragmentation/reassembly */ u32 long_msg_seq_no; - struct sk_buff *reasm_head; - struct sk_buff *reasm_tail; + struct sk_buff *reasm_buf; /* Statistics */ struct tipc_stats stats; @@ -228,6 +223,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); +void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); @@ -239,9 +235,6 @@ int tipc_link_iovec_xmit_fast(struct tipc_port *sender, struct iovec const *msg_sect, unsigned int len, u32 destnode); void tipc_link_bundle_rcv(struct sk_buff *buf); -int tipc_link_frag_rcv(struct sk_buff **reasm_head, - struct sk_buff **reasm_tail, - struct sk_buff **fbuf); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); void tipc_link_push_queue(struct tipc_link *l_ptr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e525f8ce1dee..8be6e94a1ca9 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -1,7 +1,7 @@ /* * net/tipc/msg.c: TIPC message header routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -99,3 +99,56 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, } return dsz; } + +/* tipc_buf_append(): Append a buffer to the fragment list of another buffer + * Let first buffer become head buffer + * Returns 1 and sets *buf to headbuf if chain is complete, otherwise 0 + * Leaves headbuf pointer at NULL if failure + */ +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) +{ + struct sk_buff *head = *headbuf; + struct sk_buff *frag = *buf; + struct sk_buff *tail; + struct tipc_msg *msg = buf_msg(frag); + u32 fragid = msg_type(msg); + bool headstolen; + int delta; + + skb_pull(frag, msg_hdr_sz(msg)); + + if (fragid == FIRST_FRAGMENT) { + if (head || skb_unclone(frag, GFP_ATOMIC)) + goto out_free; + head = *headbuf = frag; + skb_frag_list_init(head); + return 0; + } + if (!head) + goto out_free; + tail = TIPC_SKB_CB(head)->tail; + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { + kfree_skb_partial(frag, headstolen); + } else { + if (!skb_has_frag_list(head)) + skb_shinfo(head)->frag_list = frag; + else + tail->next = frag; + head->truesize += frag->truesize; + head->data_len += frag->len; + head->len += frag->len; + TIPC_SKB_CB(head)->tail = frag; + } + if (fragid == LAST_FRAGMENT) { + *buf = head; + TIPC_SKB_CB(head)->tail = NULL; + *headbuf = NULL; + return 1; + } + *buf = NULL; + return 0; +out_free: + pr_warn_ratelimited("Unable to build fragment list\n"); + kfree_skb(*buf); + return 0; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 76d1269b9443..503511903d1d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, Ericsson AB + * Copyright (c) 2000-2007, 2014, Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -711,4 +711,7 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, unsigned int len, int max_size, struct sk_buff **buf); + +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); + #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index aff8041dc157..8ce730984aa1 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -38,34 +38,6 @@ #include "link.h" #include "name_distr.h" -#define ITEM_SIZE sizeof(struct distr_item) - -/** - * struct distr_item - publication info distributed to other nodes - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound - * @ref: publishing port reference - * @key: publication key - * - * ===> All fields are stored in network byte order. <=== - * - * First 3 fields identify (name or) name sequence being published. - * Reference field uniquely identifies port that published name sequence. - * Key field uniquely identifies publication, in the event a port has - * multiple publications of the same name sequence. - * - * Note: There is no field that identifies the publishing node because it is - * the same for all items contained within a publication message. - */ -struct distr_item { - __be32 type; - __be32 lower; - __be32 upper; - __be32 ref; - __be32 key; -}; - /** * struct publ_list - list of publications made by this node * @list: circular list of publications @@ -127,7 +99,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) return buf; } -static void named_cluster_distribute(struct sk_buff *buf) +void named_cluster_distribute(struct sk_buff *buf) { struct sk_buff *buf_copy; struct tipc_node *n_ptr; @@ -135,18 +107,18 @@ static void named_cluster_distribute(struct sk_buff *buf) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[n_ptr->addr & 1]; if (l_ptr) { buf_copy = skb_copy(buf, GFP_ATOMIC); if (!buf_copy) { - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); break; } msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); __tipc_link_xmit(l_ptr, buf_copy); } - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); @@ -156,7 +128,7 @@ static void named_cluster_distribute(struct sk_buff *buf) /** * tipc_named_publish - tell other nodes about a new publication by this node */ -void tipc_named_publish(struct publication *publ) +struct sk_buff *tipc_named_publish(struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -165,23 +137,23 @@ void tipc_named_publish(struct publication *publ) publ_lists[publ->scope]->size++; if (publ->scope == TIPC_NODE_SCOPE) - return; + return NULL; buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); if (!buf) { pr_warn("Publication distribution failure\n"); - return; + return NULL; } item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - named_cluster_distribute(buf); + return buf; } /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node */ -void tipc_named_withdraw(struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -190,17 +162,17 @@ void tipc_named_withdraw(struct publication *publ) publ_lists[publ->scope]->size--; if (publ->scope == TIPC_NODE_SCOPE) - return; + return NULL; buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { pr_warn("Withdrawal distribution failure\n"); - return; + return NULL; } item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - named_cluster_distribute(buf); + return buf; } /* @@ -239,31 +211,9 @@ static void named_distribute(struct list_head *message_list, u32 node, /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(unsigned long nodearg) +void tipc_named_node_up(u32 max_item_buf, u32 node) { - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct list_head message_list; - u32 node = (u32)nodearg; - u32 max_item_buf = 0; - - /* compute maximum amount of publication data to send per message */ - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(node); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[0]; - if (l_ptr) - max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) / - ITEM_SIZE) * ITEM_SIZE; - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - if (!max_item_buf) - return; - - /* create list of publication messages, then send them as a unit */ - INIT_LIST_HEAD(&message_list); + LIST_HEAD(message_list); read_lock_bh(&tipc_nametbl_lock); named_distribute(&message_list, node, &publ_cluster, max_item_buf); diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 9b312ccfd43e..b2eed4ec1526 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -39,9 +39,38 @@ #include "name_table.h" -void tipc_named_publish(struct publication *publ); -void tipc_named_withdraw(struct publication *publ); -void tipc_named_node_up(unsigned long node); +#define ITEM_SIZE sizeof(struct distr_item) + +/** + * struct distr_item - publication info distributed to other nodes + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @ref: publishing port reference + * @key: publication key + * + * ===> All fields are stored in network byte order. <=== + * + * First 3 fields identify (name or) name sequence being published. + * Reference field uniquely identifies port that published name sequence. + * Key field uniquely identifies publication, in the event a port has + * multiple publications of the same name sequence. + * + * Note: There is no field that identifies the publishing node because it is + * the same for all items contained within a publication message. + */ +struct distr_item { + __be32 type; + __be32 lower; + __be32 upper; + __be32 ref; + __be32 key; +}; + +struct sk_buff *tipc_named_publish(struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct publication *publ); +void named_cluster_distribute(struct sk_buff *buf); +void tipc_named_node_up(u32 max_item_buf, u32 node); void tipc_named_rcv(struct sk_buff *buf); void tipc_named_reinit(void); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 042e8e3cabc0..9d7d37d95187 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -664,6 +664,7 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, u32 scope, u32 port_ref, u32 key) { struct publication *publ; + struct sk_buff *buf = NULL; if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", @@ -676,9 +677,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, tipc_own_addr, port_ref, key); if (likely(publ)) { table.local_publ_count++; - tipc_named_publish(publ); + buf = tipc_named_publish(publ); } write_unlock_bh(&tipc_nametbl_lock); + + if (buf) + named_cluster_distribute(buf); return publ; } @@ -688,15 +692,19 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; + struct sk_buff *buf; write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); if (likely(publ)) { table.local_publ_count--; - tipc_named_withdraw(publ); + buf = tipc_named_withdraw(publ); write_unlock_bh(&tipc_nametbl_lock); list_del_init(&publ->pport_list); kfree(publ); + + if (buf) + named_cluster_distribute(buf); return 1; } write_unlock_bh(&tipc_nametbl_lock); @@ -961,6 +969,7 @@ static void tipc_purge_publications(struct name_seq *seq) list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); + kfree(publ); } } @@ -982,7 +991,6 @@ void tipc_nametbl_stop(void) hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { tipc_purge_publications(seq); } - continue; } kfree(table.types); table.types = NULL; diff --git a/net/tipc/net.c b/net/tipc/net.c index 4c564eb69e1a..f64375e7f99f 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -39,45 +39,41 @@ #include "name_distr.h" #include "subscr.h" #include "port.h" +#include "socket.h" #include "node.h" #include "config.h" /* * The TIPC locking policy is designed to ensure a very fine locking * granularity, permitting complete parallel access to individual - * port and node/link instances. The code consists of three major + * port and node/link instances. The code consists of four major * locking domains, each protected with their own disjunct set of locks. * - * 1: The routing hierarchy. - * Comprises the structures 'zone', 'cluster', 'node', 'link' - * and 'bearer'. The whole hierarchy is protected by a big - * read/write lock, tipc_net_lock, to enssure that nothing is added - * or removed while code is accessing any of these structures. - * This layer must not be called from the two others while they - * hold any of their own locks. - * Neither must it itself do any upcalls to the other two before - * it has released tipc_net_lock and other protective locks. + * 1: The bearer level. + * RTNL lock is used to serialize the process of configuring bearer + * on update side, and RCU lock is applied on read side to make + * bearer instance valid on both paths of message transmission and + * reception. * - * Within the tipc_net_lock domain there are two sub-domains;'node' and - * 'bearer', where local write operations are permitted, - * provided that those are protected by individual spin_locks - * per instance. Code holding tipc_net_lock(read) and a node spin_lock - * is permitted to poke around in both the node itself and its - * subordinate links. I.e, it can update link counters and queues, - * change link state, send protocol messages, and alter the - * "active_links" array in the node; but it can _not_ remove a link - * or a node from the overall structure. - * Correspondingly, individual bearers may change status within a - * tipc_net_lock(read), protected by an individual spin_lock ber bearer - * instance, but it needs tipc_net_lock(write) to remove/add any bearers. + * 2: The node and link level. + * All node instances are saved into two tipc_node_list and node_htable + * lists. The two lists are protected by node_list_lock on write side, + * and they are guarded with RCU lock on read side. Especially node + * instance is destroyed only when TIPC module is removed, and we can + * confirm that there has no any user who is accessing the node at the + * moment. Therefore, Except for iterating the two lists within RCU + * protection, it's no needed to hold RCU that we access node instance + * in other places. * + * In addition, all members in node structure including link instances + * are protected by node spin lock. * - * 2: The transport level of the protocol. - * This consists of the structures port, (and its user level - * representations, such as user_port and tipc_sock), reference and - * tipc_user (port.c, reg.c, socket.c). + * 3: The transport level of the protocol. + * This consists of the structures port, (and its user level + * representations, such as user_port and tipc_sock), reference and + * tipc_user (port.c, reg.c, socket.c). * - * This layer has four different locks: + * This layer has four different locks: * - The tipc_port spin_lock. This is protecting each port instance * from parallel data access and removal. Since we can not place * this lock in the port itself, it has been placed in the @@ -96,7 +92,7 @@ * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * - * 3: The name table (name_table.c, name_distr.c, subscription.c) + * 4: The name table (name_table.c, name_distr.c, subscription.c) * - There is one big read/write-lock (tipc_nametbl_lock) protecting the * overall name table structure. Nothing must be added/removed to * this structure without holding write access to it. @@ -108,8 +104,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -DEFINE_RWLOCK(tipc_net_lock); - static void net_route_named_msg(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -148,7 +142,7 @@ void tipc_net_route_msg(struct sk_buff *buf) if (msg_mcast(msg)) tipc_port_mcast_rcv(buf, NULL); else if (msg_destport(msg)) - tipc_port_rcv(buf); + tipc_sk_rcv(buf); else net_route_named_msg(buf); return; @@ -171,22 +165,25 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_xmit(buf, dnode, msg_link_selector(msg)); } -void tipc_net_start(u32 addr) +int tipc_net_start(u32 addr) { char addr_string[16]; + int res; - write_lock_bh(&tipc_net_lock); tipc_own_addr = addr; tipc_named_reinit(); tipc_port_reinit(); - tipc_bclink_init(); - write_unlock_bh(&tipc_net_lock); + res = tipc_bclink_init(); + if (res) + return res; tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, TIPC_ZONE_SCOPE, 0, tipc_own_addr); + pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); + return 0; } void tipc_net_stop(void) @@ -195,11 +192,11 @@ void tipc_net_stop(void) return; tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); - write_lock_bh(&tipc_net_lock); + rtnl_lock(); tipc_bearer_stop(); tipc_bclink_stop(); tipc_node_stop(); - write_unlock_bh(&tipc_net_lock); + rtnl_unlock(); pr_info("Left network mode\n"); } diff --git a/net/tipc/net.h b/net/tipc/net.h index 079daadb3f72..c6c2b46f7c28 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,11 +37,9 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -extern rwlock_t tipc_net_lock; - void tipc_net_route_msg(struct sk_buff *buf); -void tipc_net_start(u32 addr); +int tipc_net_start(u32 addr); void tipc_net_stop(void); #endif diff --git a/net/tipc/node.c b/net/tipc/node.c index 1d3a4999a70f..5b44c3041be4 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -108,7 +108,7 @@ struct tipc_node *tipc_node_create(u32 addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->block_setup = WAIT_PEER_DOWN; + n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; tipc_num_nodes++; @@ -144,11 +144,13 @@ void tipc_node_stop(void) void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active = &n_ptr->active_links[0]; + u32 addr = n_ptr->addr; n_ptr->working_links++; - + tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, + l_ptr->bearer_id, addr); pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); if (!active[0]) { active[0] = active[1] = l_ptr; @@ -203,16 +205,18 @@ static void node_select_active_links(struct tipc_node *n_ptr) void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active; + u32 addr = n_ptr->addr; n_ptr->working_links--; + tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr); if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); return; } pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); active = &n_ptr->active_links[0]; if (active[0] == l_ptr) @@ -239,7 +243,7 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; + n_ptr->links[l_ptr->bearer_id] = l_ptr; spin_lock_bh(&node_list_lock); tipc_num_links++; spin_unlock_bh(&node_list_lock); @@ -263,26 +267,12 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) static void node_established_contact(struct tipc_node *n_ptr) { - tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); + n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; n_ptr->bclink.oos_state = 0; n_ptr->bclink.acked = tipc_bclink_get_last_sent(); tipc_bclink_add_node(n_ptr->addr); } -static void node_name_purge_complete(unsigned long node_addr) -{ - struct tipc_node *n_ptr; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(node_addr); - if (n_ptr) { - tipc_node_lock(n_ptr); - n_ptr->block_setup &= ~WAIT_NAMES_GONE; - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); -} - static void node_lost_contact(struct tipc_node *n_ptr) { char addr_string[16]; @@ -296,10 +286,9 @@ static void node_lost_contact(struct tipc_node *n_ptr) kfree_skb_list(n_ptr->bclink.deferred_head); n_ptr->bclink.deferred_size = 0; - if (n_ptr->bclink.reasm_head) { - kfree_skb(n_ptr->bclink.reasm_head); - n_ptr->bclink.reasm_head = NULL; - n_ptr->bclink.reasm_tail = NULL; + if (n_ptr->bclink.reasm_buf) { + kfree_skb(n_ptr->bclink.reasm_buf); + n_ptr->bclink.reasm_buf = NULL; } tipc_bclink_remove_node(n_ptr->addr); @@ -318,12 +307,13 @@ static void node_lost_contact(struct tipc_node *n_ptr) tipc_link_reset_fragments(l_ptr); } - /* Notify subscribers */ - tipc_nodesub_notify(n_ptr); + n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; - /* Prevent re-contact with node until cleanup is done */ - n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE; - tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); + /* Notify subscribers and prevent re-contact with node until + * cleanup is done. + */ + n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN; } struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) @@ -436,3 +426,63 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) rcu_read_unlock(); return buf; } + +/** + * tipc_node_get_linkname - get the name of a link + * + * @bearer_id: id of the bearer + * @node: peer node address + * @linkname: link name output buffer + * + * Returns 0 on success + */ +int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) +{ + struct tipc_link *link; + struct tipc_node *node = tipc_node_find(addr); + + if ((bearer_id >= MAX_BEARERS) || !node) + return -EINVAL; + tipc_node_lock(node); + link = node->links[bearer_id]; + if (link) { + strncpy(linkname, link->name, len); + tipc_node_unlock(node); + return 0; + } + tipc_node_unlock(node); + return -EINVAL; +} + +void tipc_node_unlock(struct tipc_node *node) +{ + LIST_HEAD(nsub_list); + struct tipc_link *link; + int pkt_sz = 0; + u32 addr = 0; + + if (likely(!node->action_flags)) { + spin_unlock_bh(&node->lock); + return; + } + + if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) { + list_replace_init(&node->nsub, &nsub_list); + node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; + } + if (node->action_flags & TIPC_NOTIFY_NODE_UP) { + link = node->active_links[0]; + node->action_flags &= ~TIPC_NOTIFY_NODE_UP; + if (link) { + pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) * + ITEM_SIZE; + addr = node->addr; + } + } + spin_unlock_bh(&node->lock); + + if (!list_empty(&nsub_list)) + tipc_nodesub_notify(&nsub_list); + if (pkt_sz) + tipc_named_node_up(pkt_sz, addr); +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 7cbb8cec1a93..9087063793f2 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -47,62 +47,73 @@ */ #define INVALID_NODE_SIG 0x10000 -/* Flags used to block (re)establishment of contact with a neighboring node */ -#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */ -#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */ -#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */ +/* Flags used to take different actions according to flag type + * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down + * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + */ +enum { + TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), + TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4) +}; + +/** + * struct tipc_node_bclink - TIPC node bclink structure + * @acked: sequence # of last outbound b'cast message acknowledged by node + * @last_in: sequence # of last in-sequence b'cast message received from node + * @last_sent: sequence # of last b'cast message sent by node + * @oos_state: state tracker for handling OOS b'cast messages + * @deferred_size: number of OOS b'cast messages in deferred queue + * @deferred_head: oldest OOS b'cast message received from node + * @deferred_tail: newest OOS b'cast message received from node + * @reasm_buf: broadcast reassembly queue head from node + * @recv_permitted: true if node is allowed to receive b'cast messages + */ +struct tipc_node_bclink { + u32 acked; + u32 last_in; + u32 last_sent; + u32 oos_state; + u32 deferred_size; + struct sk_buff *deferred_head; + struct sk_buff *deferred_tail; + struct sk_buff *reasm_buf; + bool recv_permitted; +}; /** * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure * @hash: links to adjacent nodes in unsorted hash chain - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node + * @action_flags: bit mask of different types of node actions + * @bclink: broadcast-related info + * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) - * @block_setup: bit mask of conditions preventing link establishment to node * @link_cnt: number of links to node * @signature: node instance identifier - * @bclink: broadcast-related info + * @nsub: list of "node down" subscriptions monitoring node * @rcu: rcu struct for tipc_node - * @acked: sequence # of last outbound b'cast message acknowledged by node - * @last_in: sequence # of last in-sequence b'cast message received from node - * @last_sent: sequence # of last b'cast message sent by node - * @oos_state: state tracker for handling OOS b'cast messages - * @deferred_size: number of OOS b'cast messages in deferred queue - * @deferred_head: oldest OOS b'cast message received from node - * @deferred_tail: newest OOS b'cast message received from node - * @reasm_head: broadcast reassembly queue head from node - * @reasm_tail: last broadcast fragment received from node - * @recv_permitted: true if node is allowed to receive b'cast messages */ struct tipc_node { u32 addr; spinlock_t lock; struct hlist_node hash; - struct list_head list; - struct list_head nsub; struct tipc_link *active_links[2]; struct tipc_link *links[MAX_BEARERS]; + unsigned int action_flags; + struct tipc_node_bclink bclink; + struct list_head list; int link_cnt; int working_links; - int block_setup; u32 signature; + struct list_head nsub; struct rcu_head rcu; - struct { - u32 acked; - u32 last_in; - u32 last_sent; - u32 oos_state; - u32 deferred_size; - struct sk_buff *deferred_head; - struct sk_buff *deferred_tail; - struct sk_buff *reasm_head; - struct sk_buff *reasm_tail; - bool recv_permitted; - } bclink; }; extern struct list_head tipc_node_list; @@ -118,15 +129,18 @@ int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); +int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); +void tipc_node_unlock(struct tipc_node *node); -static inline void tipc_node_lock(struct tipc_node *n_ptr) +static inline void tipc_node_lock(struct tipc_node *node) { - spin_lock_bh(&n_ptr->lock); + spin_lock_bh(&node->lock); } -static inline void tipc_node_unlock(struct tipc_node *n_ptr) +static inline bool tipc_node_blocked(struct tipc_node *node) { - spin_unlock_bh(&n_ptr->lock); + return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } #endif diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 8a7384c04add..7c59ab1d6ecb 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -81,14 +81,13 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) * * Note: node is locked by caller */ -void tipc_nodesub_notify(struct tipc_node *node) +void tipc_nodesub_notify(struct list_head *nsub_list) { - struct tipc_node_subscr *ns; + struct tipc_node_subscr *ns, *safe; - list_for_each_entry(ns, &node->nsub, nodesub_list) { + list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) { if (ns->handle_node_down) { - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); + ns->handle_node_down(ns->usr_handle); ns->handle_node_down = NULL; } } diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h index c95d20727ded..d91b8cc81e3d 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/node_subscr.h @@ -58,6 +58,6 @@ struct tipc_node_subscr { void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down); void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); -void tipc_nodesub_notify(struct tipc_node *node); +void tipc_nodesub_notify(struct list_head *nsub_list); #endif diff --git a/net/tipc/port.c b/net/tipc/port.c index 5c14c7801ee6..5fd7acce01ea 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -165,7 +165,7 @@ void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp) msg_set_destnode(msg, tipc_own_addr); if (dp->count == 1) { msg_set_destport(msg, dp->ports[0]); - tipc_port_rcv(buf); + tipc_sk_rcv(buf); tipc_port_list_free(dp); return; } @@ -180,7 +180,7 @@ void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp) if ((index == 0) && (cnt != 0)) item = item->next; msg_set_destport(buf_msg(b), item->ports[index]); - tipc_port_rcv(b); + tipc_sk_rcv(b); } } exit: @@ -343,7 +343,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) /* send returned message & dispose of rejected message */ src_node = msg_prevnode(msg); if (in_own_node(src_node)) - tipc_port_rcv(rbuf); + tipc_sk_rcv(rbuf); else tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg)); exit: @@ -754,37 +754,6 @@ int tipc_port_shutdown(u32 ref) return tipc_port_disconnect(ref); } -/** - * tipc_port_rcv - receive message from lower layer and deliver to port user - */ -int tipc_port_rcv(struct sk_buff *buf) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg = buf_msg(buf); - u32 destport = msg_destport(msg); - u32 dsz = msg_data_sz(msg); - u32 err; - - /* forward unresolved named message */ - if (unlikely(!destport)) { - tipc_net_route_msg(buf); - return dsz; - } - - /* validate destination & pass to port, otherwise reject message */ - p_ptr = tipc_port_lock(destport); - if (likely(p_ptr)) { - err = tipc_sk_rcv(&tipc_port_to_sock(p_ptr)->sk, buf); - tipc_port_unlock(p_ptr); - if (likely(!err)) - return dsz; - } else { - err = TIPC_ERR_NO_PORT; - } - - return tipc_reject_msg(buf, err); -} - /* * tipc_port_iovec_rcv: Concatenate and deliver sectioned * message for this node. @@ -798,7 +767,7 @@ static int tipc_port_iovec_rcv(struct tipc_port *sender, res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); if (likely(buf)) - tipc_port_rcv(buf); + tipc_sk_rcv(buf); return res; } diff --git a/net/tipc/port.h b/net/tipc/port.h index a00397393bd1..cf4ca5b1d9a4 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -42,9 +42,10 @@ #include "msg.h" #include "node_subscr.h" -#define TIPC_FLOW_CONTROL_WIN 512 -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) +#define TIPC_CONNACK_INTV 256 +#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) +#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) /** * struct tipc_port - TIPC port structure @@ -134,7 +135,6 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); /* * TIPC messaging routines */ -int tipc_port_rcv(struct sk_buff *buf); int tipc_send(struct tipc_port *port, struct iovec const *msg_sect, @@ -187,7 +187,7 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr) static inline int tipc_port_congested(struct tipc_port *p_ptr) { - return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); + return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3c0256962f7d..ef0475568f9e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,6 +36,7 @@ #include "core.h" #include "port.h" +#include "node.h" #include <linux/export.h> @@ -44,7 +45,7 @@ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -static int backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); @@ -195,11 +196,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock->state = state; sock_init_data(sock, sk); - sk->sk_backlog_rcv = backlog_rcv; + sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; - tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + atomic_set(&tsk->dupl_rcvcnt, 0); tipc_port_unlock(port); if (sock->state == SS_READY) { @@ -983,10 +985,11 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } -static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) +static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; DEFINE_WAIT(wait); + long timeo = *timeop; int err; for (;;) { @@ -1011,6 +1014,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) break; } finish_wait(sk_sleep(sk), &wait); + *timeop = timeo; return err; } @@ -1054,7 +1058,7 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, restart: /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, timeo); + res = tipc_wait_for_rcvmsg(sock, &timeo); if (res) goto exit; @@ -1100,7 +1104,7 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && - (++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) + (++port->conn_unacked >= TIPC_CONNACK_INTV)) tipc_acknowledge(port->ref, port->conn_unacked); advance_rx_queue(sk); } @@ -1152,7 +1156,7 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, restart: /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, timeo); + res = tipc_wait_for_rcvmsg(sock, &timeo); if (res) goto exit; @@ -1209,7 +1213,7 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) + if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV)) tipc_acknowledge(port->ref, port->conn_unacked); advance_rx_queue(sk); } @@ -1415,7 +1419,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) } /** - * backlog_rcv - handle incoming message from backlog queue + * tipc_backlog_rcv - handle incoming message from backlog queue * @sk: socket * @buf: message * @@ -1423,47 +1427,74 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) * * Returns 0 */ -static int backlog_rcv(struct sock *sk, struct sk_buff *buf) +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { u32 res; + struct tipc_sock *tsk = tipc_sk(sk); + uint truesize = buf->truesize; res = filter_rcv(sk, buf); - if (res) + if (unlikely(res)) tipc_reject_msg(buf, res); + + if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, &tsk->dupl_rcvcnt); + return 0; } /** * tipc_sk_rcv - handle incoming message - * @sk: socket receiving message - * @buf: message - * - * Called with port lock already taken. - * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * @buf: buffer containing arriving message + * Consumes buffer + * Returns 0 if success, or errno: -EHOSTUNREACH */ -u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf) +int tipc_sk_rcv(struct sk_buff *buf) { - u32 res; + struct tipc_sock *tsk; + struct tipc_port *port; + struct sock *sk; + u32 dport = msg_destport(buf_msg(buf)); + int err = TIPC_OK; + uint limit; - /* - * Process message if socket is unlocked; otherwise add to backlog queue - * - * This code is based on sk_receive_skb(), but must be distinct from it - * since a TIPC-specific filter/reject mechanism is utilized - */ + /* Forward unresolved named message */ + if (unlikely(!dport)) { + tipc_net_route_msg(buf); + return 0; + } + + /* Validate destination */ + port = tipc_port_lock(dport); + if (unlikely(!port)) { + err = TIPC_ERR_NO_PORT; + goto exit; + } + + tsk = tipc_port_to_sock(port); + sk = &tsk->sk; + + /* Queue message */ bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { - res = filter_rcv(sk, buf); + err = filter_rcv(sk, buf); } else { - if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf))) - res = TIPC_ERR_OVERLOAD; - else - res = TIPC_OK; + if (sk->sk_backlog.len == 0) + atomic_set(&tsk->dupl_rcvcnt, 0); + limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); + if (sk_add_backlog(sk, buf, limit)) + err = TIPC_ERR_OVERLOAD; } + bh_unlock_sock(sk); + tipc_port_unlock(port); - return res; + if (likely(!err)) + return 0; +exit: + tipc_reject_msg(buf, err); + return -EHOSTUNREACH; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -1905,6 +1936,28 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } +int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +{ + struct tipc_sioc_ln_req lnr; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case SIOCGETLINKNAME: + if (copy_from_user(&lnr, argp, sizeof(lnr))) + return -EFAULT; + if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, + lnr.linkname, TIPC_MAX_LINK_NAME)) { + if (copy_to_user(argp, &lnr, sizeof(lnr))) + return -EFAULT; + return 0; + } + return -EADDRNOTAVAIL; + break; + default: + return -ENOIOCTLCMD; + } +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -1917,7 +1970,7 @@ static const struct proto_ops msg_ops = { .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1938,7 +1991,7 @@ static const struct proto_ops packet_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1959,7 +2012,7 @@ static const struct proto_ops stream_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 74e5c7f195a6..3afcd2a70b31 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -44,12 +44,14 @@ * @port: port - interacts with 'sk' and with the rest of the TIPC stack * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue */ struct tipc_sock { struct sock sk; struct tipc_port port; unsigned int conn_timeout; + atomic_t dupl_rcvcnt; }; static inline struct tipc_sock *tipc_sk(const struct sock *sk) @@ -67,6 +69,6 @@ static inline void tipc_sock_wakeup(struct tipc_sock *tsk) tsk->sk.sk_write_space(&tsk->sk); } -u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf); +int tipc_sk_rcv(struct sk_buff *buf); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 749f80c21e22..e96884380732 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1492,10 +1492,14 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; - if (len > SKB_MAX_ALLOC) + if (len > SKB_MAX_ALLOC) { data_len = min_t(size_t, len - SKB_MAX_ALLOC, MAX_SKB_FRAGS * PAGE_SIZE); + data_len = PAGE_ALIGN(data_len); + + BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); + } skb = sock_alloc_send_pskb(sk, len - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, @@ -1670,6 +1674,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); + data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); + skb = sock_alloc_send_pskb(sk, size - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, get_order(UNIX_SKB_FRAGS_SZ)); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 16d08b399210..405f3c4cf70c 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -95,6 +95,43 @@ config CFG80211_CERTIFICATION_ONUS you are a wireless researcher and are working in a controlled and approved environment by your local regulatory agency. +config CFG80211_REG_CELLULAR_HINTS + bool "cfg80211 regulatory support for cellular base station hints" + depends on CFG80211_CERTIFICATION_ONUS + ---help--- + This option enables support for parsing regulatory hints + from cellular base stations. If enabled and at least one driver + claims support for parsing cellular base station hints the + regulatory core will allow and parse these regulatory hints. + The regulatory core will only apply these regulatory hints on + drivers that support this feature. You should only enable this + feature if you have tested and validated this feature on your + systems. + +config CFG80211_REG_RELAX_NO_IR + bool "cfg80211 support for NO_IR relaxation" + depends on CFG80211_CERTIFICATION_ONUS + ---help--- + This option enables support for relaxation of the NO_IR flag for + situations that certain regulatory bodies have provided clarifications + on how relaxation can occur. This feature has an inherent dependency on + userspace features which must have been properly tested and as such is + not enabled by default. + + A relaxation feature example is allowing the operation of a P2P group + owner (GO) on channels marked with NO_IR if there is an additional BSS + interface which associated to an AP which userspace assumes or confirms + to be an authorized master, i.e., with radar detection support and DFS + capabilities. However, note that in order to not create daisy chain + scenarios, this relaxation is not allowed in cases that the BSS client + is associated to P2P GO and in addition the P2P GO instantiated on + a channel due to this relaxation should not allow connection from + non P2P clients. + + The regulatory core will apply these relaxations only for drivers that + support this feature by declaring the appropriate channel flags and + capabilities in their registration flow. + config CFG80211_DEFAULT_PS bool "enable powersave by default" depends on CFG80211 diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 3e02ade508d8..bdad1f951561 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -6,8 +6,8 @@ #include "rdev-ops.h" -static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify) +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 9c9501a35fb5..992b34070bcb 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -326,28 +326,57 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, int cfg80211_chandef_dfs_required(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef) + const struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { int width; - int r; + int ret; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; - width = cfg80211_chandef_get_width(chandef); - if (width < 0) - return -EINVAL; + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; - r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, - width); - if (r) - return r; + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq1, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); - if (!chandef->center_freq2) - return 0; + if (!chandef->center_freq2) + return 0; + + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq2, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); - return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, - width); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + WARN_ON(1); + } + + return 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); @@ -587,12 +616,14 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, width = 5; break; case NL80211_CHAN_WIDTH_10: + prohibited_flags |= IEEE80211_CHAN_NO_10MHZ; width = 10; break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; case NL80211_CHAN_WIDTH_20_NOHT: + prohibited_flags |= IEEE80211_CHAN_NO_20MHZ; width = 20; break; case NL80211_CHAN_WIDTH_40: @@ -661,17 +692,111 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_usable); +/* + * For GO only, check if the channel can be used under permissive conditions + * mandated by the some regulatory bodies, i.e., the channel is marked with + * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface + * associated to an AP on the same channel or on the same UNII band + * (assuming that the AP is an authorized master). + * In addition allow the GO to operate on a channel on which indoor operation is + * allowed, iff we are currently operating in an indoor environment. + */ +static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, + struct ieee80211_channel *chan) +{ + struct wireless_dev *wdev_iter; + struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); + + ASSERT_RTNL(); + + if (!config_enabled(CONFIG_CFG80211_REG_RELAX_NO_IR) || + !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) + return false; + + if (regulatory_indoor_allowed() && + (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) + return true; + + if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) + return false; + + /* + * Generally, it is possible to rely on another device/driver to allow + * the GO concurrent relaxation, however, since the device can further + * enforce the relaxation (by doing a similar verifications as this), + * and thus fail the GO instantiation, consider only the interfaces of + * the current registered device. + */ + list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { + struct ieee80211_channel *other_chan = NULL; + int r1, r2; + + if (wdev_iter->iftype != NL80211_IFTYPE_STATION || + !netif_running(wdev_iter->netdev)) + continue; + + wdev_lock(wdev_iter); + if (wdev_iter->current_bss) + other_chan = wdev_iter->current_bss->pub.channel; + wdev_unlock(wdev_iter); + + if (!other_chan) + continue; + + if (chan == other_chan) + return true; + + if (chan->band != IEEE80211_BAND_5GHZ) + continue; + + r1 = cfg80211_get_unii(chan->center_freq); + r2 = cfg80211_get_unii(other_chan->center_freq); + + if (r1 != -EINVAL && r1 == r2) { + /* + * At some locations channels 149-165 are considered a + * bundle, but at other locations, e.g., Indonesia, + * channels 149-161 are considered a bundle while + * channel 165 is left out and considered to be in a + * different bundle. Thus, in case that there is a + * station interface connected to an AP on channel 165, + * it is assumed that channels 149-161 are allowed for + * GO operations. However, having a station interface + * connected to an AP on channels 149-161, does not + * allow GO operation on channel 165. + */ + if (chan->center_freq == 5825 && + other_chan->center_freq != 5825) + continue; + return true; + } + } + + return false; +} + bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR; - trace_cfg80211_reg_can_beacon(wiphy, chandef); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); - if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 && + /* + * Under certain conditions suggested by the some regulatory bodies + * a GO can operate on channels marked with IEEE80211_NO_IR + * so set this flag only if such relaxations are not enabled and + * the conditions are not met. + */ + if (iftype != NL80211_IFTYPE_P2P_GO || + !cfg80211_go_permissive_chan(rdev, chandef->chan)) + prohibited_flags |= IEEE80211_CHAN_NO_IR; + + if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ prohibited_flags = IEEE80211_CHAN_DISABLED; @@ -701,6 +826,8 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, enum cfg80211_chan_mode *chanmode, u8 *radar_detect) { + int ret; + *chan = NULL; *chanmode = CHAN_MODE_UNDEFINED; @@ -743,8 +870,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; @@ -753,8 +883,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; diff --git a/net/wireless/core.c b/net/wireless/core.c index 086cddd03ba6..a1c40654dd9b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -69,7 +69,7 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) int get_wiphy_idx(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); return rdev->wiphy_idx; } @@ -130,7 +130,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, newname)) pr_err("failed to rename debugfs dir to %s!\n", newname); - nl80211_notify_dev_rename(rdev); + nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); return 0; } @@ -210,15 +210,12 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, } } -static int cfg80211_rfkill_set_block(void *data, bool blocked) +void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = data; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct wireless_dev *wdev; - if (!blocked) - return 0; - - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -234,7 +231,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) break; } } +} +EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces); + +static int cfg80211_rfkill_set_block(void *data, bool blocked) +{ + struct cfg80211_registered_device *rdev = data; + + if (!blocked) + return 0; + rtnl_lock(); + cfg80211_shutdown_all_interfaces(&rdev->wiphy); rtnl_unlock(); return 0; @@ -260,6 +268,45 @@ static void cfg80211_event_work(struct work_struct *work) rtnl_unlock(); } +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_iface_destroy *item; + + ASSERT_RTNL(); + + spin_lock_irq(&rdev->destroy_list_lock); + while ((item = list_first_entry_or_null(&rdev->destroy_list, + struct cfg80211_iface_destroy, + list))) { + struct wireless_dev *wdev, *tmp; + u32 nlportid = item->nlportid; + + list_del(&item->list); + kfree(item); + spin_unlock_irq(&rdev->destroy_list_lock); + + list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) { + if (nlportid == wdev->owner_nlportid) + rdev_del_virtual_intf(rdev, wdev); + } + + spin_lock_irq(&rdev->destroy_list_lock); + } + spin_unlock_irq(&rdev->destroy_list_lock); +} + +static void cfg80211_destroy_iface_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + destroy_work); + + rtnl_lock(); + cfg80211_destroy_ifaces(rdev); + rtnl_unlock(); +} + /* exported functions */ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) @@ -318,6 +365,10 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; + INIT_LIST_HEAD(&rdev->destroy_list); + spin_lock_init(&rdev->destroy_list_lock); + INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); + #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; #endif @@ -351,6 +402,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; + rdev->wiphy.max_num_csa_counters = 1; + return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); @@ -396,10 +449,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) for (j = 0; j < c->n_limits; j++) { u16 types = c->limits[j].types; - /* - * interface types shouldn't overlap, this is - * used in cfg80211_can_change_interface() - */ + /* interface types shouldn't overlap */ if (WARN_ON(types & all_iftypes)) return -EINVAL; all_iftypes |= types; @@ -435,7 +485,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) int wiphy_register(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); int res; enum ieee80211_band band; struct ieee80211_supported_band *sband; @@ -610,13 +660,15 @@ int wiphy_register(struct wiphy *wiphy) return res; } + nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + return 0; } EXPORT_SYMBOL(wiphy_register); void wiphy_rfkill_start_polling(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (!rdev->ops->rfkill_poll) return; @@ -627,7 +679,7 @@ EXPORT_SYMBOL(wiphy_rfkill_start_polling); void wiphy_rfkill_stop_polling(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); rfkill_pause_polling(rdev->rfkill); } @@ -635,7 +687,7 @@ EXPORT_SYMBOL(wiphy_rfkill_stop_polling); void wiphy_unregister(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); wait_event(rdev->dev_wait, ({ int __count; @@ -648,9 +700,10 @@ void wiphy_unregister(struct wiphy *wiphy) rfkill_unregister(rdev->rfkill); rtnl_lock(); + nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); rdev->wiphy.registered = false; - BUG_ON(!list_empty(&rdev->wdev_list)); + WARN_ON(!list_empty(&rdev->wdev_list)); /* * First remove the hardware from everywhere, this makes @@ -675,6 +728,7 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); + flush_work(&rdev->destroy_work); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -707,7 +761,7 @@ EXPORT_SYMBOL(wiphy_free); void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (rfkill_set_hw_state(rdev->rfkill, blocked)) schedule_work(&rdev->rfkill_sync); @@ -716,7 +770,7 @@ EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); void cfg80211_unregister_wdev(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); ASSERT_RTNL(); @@ -751,23 +805,23 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; ASSERT_RTNL(); + ASSERT_WDEV_LOCK(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - cfg80211_leave_ibss(rdev, dev, true); + __cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) __cfg80211_stop_sched_scan(rdev, false); - wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); wdev->wext.ie = NULL; @@ -776,32 +830,60 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, #endif cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); - wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: - cfg80211_leave_mesh(rdev, dev); + __cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - cfg80211_stop_ap(rdev, dev, true); + __cfg80211_stop_ap(rdev, dev, true); break; default: break; } } +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + wdev_lock(wdev); + __cfg80211_leave(rdev, wdev); + wdev_unlock(wdev); +} + +void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_event *ev; + unsigned long flags; + + trace_cfg80211_stop_iface(wiphy, wdev); + + ev = kzalloc(sizeof(*ev), gfp); + if (!ev) + return; + + ev->type = EVENT_STOPPED; + + spin_lock_irqsave(&wdev->event_lock, flags); + list_add_tail(&ev->list, &wdev->event_list); + spin_unlock_irqrestore(&wdev->event_lock, flags); + queue_work(cfg80211_wq, &rdev->event_work); +} +EXPORT_SYMBOL(cfg80211_stop_iface); + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; - int ret; if (!wdev) return NOTIFY_DONE; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); WARN_ON(wdev->iftype == NL80211_IFTYPE_UNSPECIFIED); @@ -959,13 +1041,14 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) return notifier_from_errno(-EOPNOTSUPP); - ret = cfg80211_can_add_interface(rdev, wdev->iftype); - if (ret) - return notifier_from_errno(ret); + if (rfkill_blocked(rdev->rfkill)) + return notifier_from_errno(-ERFKILL); break; + default: + return NOTIFY_DONE; } - return NOTIFY_DONE; + return NOTIFY_OK; } static struct notifier_block cfg80211_netdev_notifier = { diff --git a/net/wireless/core.h b/net/wireless/core.h index 5b1fdcadd469..e9afbf10e756 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -80,13 +80,17 @@ struct cfg80211_registered_device { struct cfg80211_coalesce *coalesce; + spinlock_t destroy_list_lock; + struct list_head destroy_list; + struct work_struct destroy_work; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline -struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) +struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy) { BUG_ON(!wiphy); return container_of(wiphy, struct cfg80211_registered_device, wiphy); @@ -181,6 +185,7 @@ enum cfg80211_event_type { EVENT_ROAMED, EVENT_DISCONNECTED, EVENT_IBSS_JOINED, + EVENT_STOPPED, }; struct cfg80211_event { @@ -232,6 +237,13 @@ struct cfg80211_beacon_registration { u32 nlportid; }; +struct cfg80211_iface_destroy { + struct list_head list; + u32 nlportid; +}; + +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); + /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); @@ -240,8 +252,8 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void cfg80211_bss_expire(struct cfg80211_registered_device *dev); -void cfg80211_bss_age(struct cfg80211_registered_device *dev, +void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); +void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); /* IBSS */ @@ -270,6 +282,8 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, @@ -277,6 +291,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); /* AP */ +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, bool notify); @@ -401,35 +417,6 @@ unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); -static inline int -cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_iftype iftype) -{ - return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED, 0); -} - -static inline int -cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, - enum nl80211_iftype iftype) -{ - if (rfkill_blocked(rdev->rfkill)) - return -ERFKILL; - - return cfg80211_can_change_interface(rdev, NULL, iftype); -} - -static inline int -cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) -{ - return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode, 0); -} - static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { unsigned long end = jiffies; @@ -459,6 +446,8 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index e37862f1b127..d4860bfc020e 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -43,7 +43,7 @@ static void cfg80211_get_ringparam(struct net_device *dev, struct ethtool_ringparam *rp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); memset(rp, 0, sizeof(*rp)); @@ -56,7 +56,7 @@ static int cfg80211_set_ringparam(struct net_device *dev, struct ethtool_ringparam *rp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) return -EINVAL; @@ -70,7 +70,7 @@ static int cfg80211_set_ringparam(struct net_device *dev, static int cfg80211_get_sset_count(struct net_device *dev, int sset) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rdev->ops->get_et_sset_count) return rdev_get_et_sset_count(rdev, dev, sset); return -EOPNOTSUPP; @@ -80,7 +80,7 @@ static void cfg80211_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rdev->ops->get_et_stats) rdev_get_et_stats(rdev, dev, stats, data); } @@ -88,7 +88,7 @@ static void cfg80211_get_stats(struct net_device *dev, static void cfg80211_get_strings(struct net_device *dev, u32 sset, u8 *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rdev->ops->get_et_strings) rdev_get_et_strings(rdev, dev, sset, data); } diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index b35da8dc85de..40c37fc5b67c 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -68,17 +68,7 @@ function parse_reg_rule() sub(/,/, "", units) dfs_cac = $9 if (units == "mW") { - if (power == 100) { - power = 20 - } else if (power == 200) { - power = 23 - } else if (power == 500) { - power = 27 - } else if (power == 1000) { - power = 30 - } else { - print "Unknown power value in database!" - } + power = 10 * log(power)/log(10) } else { dfs_cac = $8 } @@ -117,7 +107,7 @@ function parse_reg_rule() } flags = flags "0" - printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags + printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags rules++ } diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index a6b5bdad039c..8f345da3ea5f 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -45,7 +45,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, cfg80211_upload_connect_keys(wdev); - nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, + nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); @@ -58,7 +58,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -88,8 +88,6 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct ieee80211_channel *check_chan; - u8 radar_detect_width = 0; int err; ASSERT_WDEV_LOCK(wdev); @@ -126,28 +124,6 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif - check_chan = params->chandef.chan; - if (params->userspace_handles_dfs) { - /* Check for radar even if the current channel is not - * a radar channel - it might decide to change to DFS - * channel later. - */ - radar_detect_width = BIT(params->chandef.width); - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - check_chan, - (params->channel_fixed && - !radar_detect_width) - ? CHAN_MODE_SHARED - : CHAN_MODE_EXCLUSIVE, - radar_detect_width); - - if (err) { - wdev->connect_keys = NULL; - return err; - } - err = rdev_join_ibss(rdev, dev, params); if (err) { wdev->connect_keys = NULL; @@ -180,7 +156,7 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; ASSERT_WDEV_LOCK(wdev); @@ -335,7 +311,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; @@ -346,7 +322,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (!rdev->ops->join_ibss) return -EOPNOTSUPP; - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; @@ -420,7 +396,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; @@ -444,8 +420,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; + memcpy(wdev->ssid, ssid, len); wdev->wext.ibss.ssid = wdev->ssid; - memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; wdev_lock(wdev); @@ -487,7 +463,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; @@ -505,6 +481,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; + if (bssid && !is_valid_ether_addr(bssid)) + return -EINVAL; + /* both automatic */ if (!bssid && !wdev->wext.ibss.bssid) return 0; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 5af5cc6b2c4c..092300b30c37 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -99,7 +99,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - u8 radar_detect_width = 0; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -175,22 +174,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, scan_width); } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef, + NL80211_IFTYPE_MESH_POINT)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); - if (err < 0) - return err; - if (err) - radar_detect_width = BIT(setup->chandef.width); - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - setup->chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - err = rdev_join_mesh(rdev, dev, conf, setup); if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); @@ -236,17 +223,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return -ENETDOWN; - /* cfg80211_can_use_chan() calls - * cfg80211_can_use_iftype_chan() with no radar - * detection, so if we're trying to use a radar - * channel here, something is wrong. - */ - WARN_ON_ONCE(chandef->chan->flags & IEEE80211_CHAN_RADAR); - err = cfg80211_can_use_chan(rdev, wdev, chandef->chan, - CHAN_MODE_SHARED); - if (err) - return err; - err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev, chandef->chan); if (!err) @@ -262,8 +238,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, return 0; } -static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c52ff59a3e96..266766b8d80b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -23,7 +23,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u8 *ie = mgmt->u.assoc_resp.variable; int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); @@ -54,7 +54,7 @@ EXPORT_SYMBOL(cfg80211_rx_assoc_resp); static void cfg80211_process_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL); cfg80211_sme_rx_auth(wdev, buf, len); @@ -63,7 +63,7 @@ static void cfg80211_process_auth(struct wireless_dev *wdev, static void cfg80211_process_deauth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); @@ -82,7 +82,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev, static void cfg80211_process_disassoc(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -123,7 +123,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_send_auth_timeout(dev, addr); @@ -136,7 +136,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_send_assoc_timeout(dev, bss->bssid); @@ -172,7 +172,7 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, const u8 *tsc, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; char *buf = kmalloc(128, gfp); @@ -233,14 +233,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, if (!req.bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, - CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_auth(rdev, dev, &req); -out: cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } @@ -306,16 +300,10 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, if (!req->bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_assoc(rdev, dev, req); if (!err) cfg80211_hold_bss(bss_from_pub(req->bss)); - -out: - if (err) + else cfg80211_put_bss(&rdev->wiphy, req->bss); return err; @@ -414,7 +402,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, int match_len) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -473,7 +461,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; spin_lock_bh(&wdev->mgmt_registrations_lock); @@ -620,7 +608,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, const u8 *buf, size_t len, u32 flags, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg; const struct ieee80211_txrx_stypes *stypes = &wiphy->mgmt_stypes[wdev->iftype]; @@ -739,7 +727,7 @@ void cfg80211_radar_event(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; trace_cfg80211_radar_event(wiphy, chandef); @@ -764,7 +752,7 @@ void cfg80211_cac_event(struct net_device *netdev, { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; trace_cfg80211_cac_event(netdev, event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 052c1bf8ffac..ba4f1723c83a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -168,8 +168,8 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) netdev = __dev_get_by_index(netns, ifindex); if (netdev) { if (netdev->ieee80211_ptr) - tmp = wiphy_to_dev( - netdev->ieee80211_ptr->wiphy); + tmp = wiphy_to_rdev( + netdev->ieee80211_ptr->wiphy); else tmp = NULL; @@ -371,8 +371,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED }, - [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 }, - [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 }, + [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY }, + [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY }, [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, @@ -385,6 +385,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, + [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, + [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, }; /* policy for the key attributes */ @@ -484,7 +486,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, err = PTR_ERR(*wdev); goto out_unlock; } - *rdev = wiphy_to_dev((*wdev)->wiphy); + *rdev = wiphy_to_rdev((*wdev)->wiphy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; @@ -497,7 +499,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, err = -ENODEV; goto out_unlock; } - *rdev = wiphy_to_dev(wiphy); + *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { @@ -566,6 +568,13 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct ieee80211_channel *chan, bool large) { + /* Some channels must be completely excluded from the + * list to protect old user-space tools from breaking + */ + if (!large && chan->flags & + (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ)) + return 0; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, chan->center_freq)) goto nla_put_failure; @@ -613,6 +622,18 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -950,8 +971,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, c->max_interfaces)) goto nla_put_failure; if (large && - nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, - c->radar_detect_widths)) + (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths) || + nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + c->radar_detect_regions))) goto nla_put_failure; nla_nest_end(msg, nl_combi); @@ -1006,42 +1029,42 @@ static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, } static int nl80211_send_wowlan(struct sk_buff *msg, - struct cfg80211_registered_device *dev, + struct cfg80211_registered_device *rdev, bool large) { struct nlattr *nl_wowlan; - if (!dev->wiphy.wowlan) + if (!rdev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; - if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && + if (((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; - if (dev->wiphy.wowlan->n_patterns) { + if (rdev->wiphy.wowlan->n_patterns) { struct nl80211_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan->n_patterns, - .min_pattern_len = dev->wiphy.wowlan->pattern_min_len, - .max_pattern_len = dev->wiphy.wowlan->pattern_max_len, - .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset, + .max_patterns = rdev->wiphy.wowlan->n_patterns, + .min_pattern_len = rdev->wiphy.wowlan->pattern_min_len, + .max_pattern_len = rdev->wiphy.wowlan->pattern_max_len, + .max_pkt_offset = rdev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, @@ -1049,7 +1072,7 @@ static int nl80211_send_wowlan(struct sk_buff *msg, return -ENOBUFS; } - if (large && nl80211_send_wowlan_tcp_caps(dev, msg)) + if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; nla_nest_end(msg, nl_wowlan); @@ -1059,19 +1082,19 @@ static int nl80211_send_wowlan(struct sk_buff *msg, #endif static int nl80211_send_coalesce(struct sk_buff *msg, - struct cfg80211_registered_device *dev) + struct cfg80211_registered_device *rdev) { struct nl80211_coalesce_rule_support rule; - if (!dev->wiphy.coalesce) + if (!rdev->wiphy.coalesce) return 0; - rule.max_rules = dev->wiphy.coalesce->n_rules; - rule.max_delay = dev->wiphy.coalesce->max_delay; - rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns; - rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len; - rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len; - rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset; + rule.max_rules = rdev->wiphy.coalesce->n_rules; + rule.max_delay = rdev->wiphy.coalesce->max_delay; + rule.pat.max_patterns = rdev->wiphy.coalesce->n_patterns; + rule.pat.min_pattern_len = rdev->wiphy.coalesce->pattern_min_len; + rule.pat.max_pattern_len = rdev->wiphy.coalesce->pattern_max_len; + rule.pat.max_pkt_offset = rdev->wiphy.coalesce->max_pkt_offset; if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule)) return -ENOBUFS; @@ -1202,7 +1225,8 @@ struct nl80211_dump_wiphy_state { bool split; }; -static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, +static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct nl80211_dump_wiphy_state *state) { @@ -1214,63 +1238,66 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, struct ieee80211_channel *chan; int i; const struct ieee80211_txrx_stypes *mgmt_stypes = - dev->wiphy.mgmt_stypes; + rdev->wiphy.mgmt_stypes; u32 features; - hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (WARN_ON(!state)) return -EINVAL; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, - wiphy_name(&dev->wiphy)) || + wiphy_name(&rdev->wiphy)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, cfg80211_rdev_list_generation)) goto nla_put_failure; + if (cmd != NL80211_CMD_NEW_WIPHY) + goto finish; + switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, - dev->wiphy.retry_short) || + rdev->wiphy.retry_short) || nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, - dev->wiphy.retry_long) || + rdev->wiphy.retry_long) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, - dev->wiphy.frag_threshold) || + rdev->wiphy.frag_threshold) || nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, - dev->wiphy.rts_threshold) || + rdev->wiphy.rts_threshold) || nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, - dev->wiphy.coverage_class) || + rdev->wiphy.coverage_class) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, - dev->wiphy.max_scan_ssids) || + rdev->wiphy.max_scan_ssids) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, - dev->wiphy.max_sched_scan_ssids) || + rdev->wiphy.max_sched_scan_ssids) || nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, - dev->wiphy.max_scan_ie_len) || + rdev->wiphy.max_scan_ie_len) || nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, - dev->wiphy.max_sched_scan_ie_len) || + rdev->wiphy.max_sched_scan_ie_len) || nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, - dev->wiphy.max_match_sets)) + rdev->wiphy.max_match_sets)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && + if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && + if ((rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && + if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && + if ((rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; state->split_start++; @@ -1278,35 +1305,35 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, - sizeof(u32) * dev->wiphy.n_cipher_suites, - dev->wiphy.cipher_suites)) + sizeof(u32) * rdev->wiphy.n_cipher_suites, + rdev->wiphy.cipher_suites)) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, - dev->wiphy.max_num_pmkids)) + rdev->wiphy.max_num_pmkids)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + if ((rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, - dev->wiphy.available_antennas_tx) || + rdev->wiphy.available_antennas_tx) || nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, - dev->wiphy.available_antennas_rx)) + rdev->wiphy.available_antennas_rx)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && + if ((rdev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, - dev->wiphy.probe_resp_offload)) + rdev->wiphy.probe_resp_offload)) goto nla_put_failure; - if ((dev->wiphy.available_antennas_tx || - dev->wiphy.available_antennas_rx) && - dev->ops->get_antenna) { + if ((rdev->wiphy.available_antennas_tx || + rdev->wiphy.available_antennas_rx) && + rdev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; int res; - res = rdev_get_antenna(dev, &tx_ant, &rx_ant); + res = rdev_get_antenna(rdev, &tx_ant, &rx_ant); if (!res) { if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, @@ -1323,7 +1350,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, - dev->wiphy.interface_modes)) + rdev->wiphy.interface_modes)) goto nla_put_failure; state->split_start++; if (state->split) @@ -1337,7 +1364,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; - sband = dev->wiphy.bands[band]; + sband = rdev->wiphy.bands[band]; if (!sband) continue; @@ -1414,7 +1441,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, i = 0; #define CMD(op, n) \ do { \ - if (dev->ops->op) { \ + if (rdev->ops->op) { \ i++; \ if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ goto nla_put_failure; \ @@ -1438,32 +1465,32 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); - if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { + if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - if (dev->ops->set_monitor_channel || dev->ops->start_ap || - dev->ops->join_mesh) { + if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || + rdev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; } CMD(set_wds_peer, SET_WDS_PEER); - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); } - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) CMD(sched_scan_start, START_SCHED_SCAN); CMD(probe_client, PROBE_CLIENT); CMD(set_noack_map, SET_NOACK_MAP); - if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { i++; if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) goto nla_put_failure; @@ -1473,7 +1500,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); } CMD(set_qos_map, SET_QOS_MAP); @@ -1484,13 +1511,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, #undef CMD - if (dev->ops->connect || dev->ops->auth) { + if (rdev->ops->connect || rdev->ops->auth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) goto nla_put_failure; } - if (dev->ops->disconnect || dev->ops->deauth) { + if (rdev->ops->disconnect || rdev->ops->deauth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) goto nla_put_failure; @@ -1501,14 +1528,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) break; case 5: - if (dev->ops->remain_on_channel && - (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && + if (rdev->ops->remain_on_channel && + (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, - dev->wiphy.max_remain_on_channel_duration)) + rdev->wiphy.max_remain_on_channel_duration)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && + if ((rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) goto nla_put_failure; @@ -1519,7 +1546,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 6: #ifdef CONFIG_PM - if (nl80211_send_wowlan(msg, dev, state->split)) + if (nl80211_send_wowlan(msg, rdev, state->split)) goto nla_put_failure; state->split_start++; if (state->split) @@ -1529,10 +1556,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, #endif case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, - dev->wiphy.software_iftypes)) + rdev->wiphy.software_iftypes)) goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg, + if (nl80211_put_iface_combinations(&rdev->wiphy, msg, state->split)) goto nla_put_failure; @@ -1540,12 +1567,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) break; case 8: - if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && + if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, - dev->wiphy.ap_sme_capa)) + rdev->wiphy.ap_sme_capa)) goto nla_put_failure; - features = dev->wiphy.features; + features = rdev->wiphy.features; /* * We can only add the per-channel limit information if the * dump is split, otherwise it makes it too big. Therefore @@ -1556,16 +1583,16 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; - if (dev->wiphy.ht_capa_mod_mask && + if (rdev->wiphy.ht_capa_mod_mask && nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, - sizeof(*dev->wiphy.ht_capa_mod_mask), - dev->wiphy.ht_capa_mod_mask)) + sizeof(*rdev->wiphy.ht_capa_mod_mask), + rdev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; - if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && - dev->wiphy.max_acl_mac_addrs && + if (rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + rdev->wiphy.max_acl_mac_addrs && nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, - dev->wiphy.max_acl_mac_addrs)) + rdev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; /* @@ -1581,41 +1608,41 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, state->split_start++; break; case 9: - if (dev->wiphy.extended_capabilities && + if (rdev->wiphy.extended_capabilities && (nla_put(msg, NL80211_ATTR_EXT_CAPA, - dev->wiphy.extended_capabilities_len, - dev->wiphy.extended_capabilities) || + rdev->wiphy.extended_capabilities_len, + rdev->wiphy.extended_capabilities) || nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, - dev->wiphy.extended_capabilities_len, - dev->wiphy.extended_capabilities_mask))) + rdev->wiphy.extended_capabilities_len, + rdev->wiphy.extended_capabilities_mask))) goto nla_put_failure; - if (dev->wiphy.vht_capa_mod_mask && + if (rdev->wiphy.vht_capa_mod_mask && nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, - sizeof(*dev->wiphy.vht_capa_mod_mask), - dev->wiphy.vht_capa_mod_mask)) + sizeof(*rdev->wiphy.vht_capa_mod_mask), + rdev->wiphy.vht_capa_mod_mask)) goto nla_put_failure; state->split_start++; break; case 10: - if (nl80211_send_coalesce(msg, dev)) + if (nl80211_send_coalesce(msg, rdev)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) goto nla_put_failure; - if (dev->wiphy.max_ap_assoc_sta && + if (rdev->wiphy.max_ap_assoc_sta && nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA, - dev->wiphy.max_ap_assoc_sta)) + rdev->wiphy.max_ap_assoc_sta)) goto nla_put_failure; state->split_start++; break; case 11: - if (dev->wiphy.n_vendor_commands) { + if (rdev->wiphy.n_vendor_commands) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; @@ -1623,15 +1650,15 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nested) goto nla_put_failure; - for (i = 0; i < dev->wiphy.n_vendor_commands; i++) { - info = &dev->wiphy.vendor_commands[i].info; + for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { + info = &rdev->wiphy.vendor_commands[i].info; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } - if (dev->wiphy.n_vendor_events) { + if (rdev->wiphy.n_vendor_events) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; @@ -1640,18 +1667,26 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nested) goto nla_put_failure; - for (i = 0; i < dev->wiphy.n_vendor_events; i++) { - info = &dev->wiphy.vendor_events[i]; + for (i = 0; i < rdev->wiphy.n_vendor_events; i++) { + info = &rdev->wiphy.vendor_events[i]; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } + state->split_start++; + break; + case 12: + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH && + nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS, + rdev->wiphy.max_num_csa_counters)) + goto nla_put_failure; /* done */ state->split_start = 0; break; } + finish: return genlmsg_end(msg, hdr); nla_put_failure: @@ -1684,7 +1719,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, if (!netdev) return -ENODEV; if (netdev->ieee80211_ptr) { - rdev = wiphy_to_dev( + rdev = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); state->filter_wiphy = rdev->wiphy_idx; } @@ -1697,7 +1732,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; rtnl_lock(); if (!state) { @@ -1716,17 +1751,18 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = (long)state; } - list_for_each_entry(dev, &cfg80211_rdev_list, list) { - if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (++idx <= state->start) continue; if (state->filter_wiphy != -1 && - state->filter_wiphy != dev->wiphy_idx) + state->filter_wiphy != rdev->wiphy_idx) continue; /* attempt to fit multiple wiphy data chunks into the skb */ do { - ret = nl80211_send_wiphy(dev, skb, + ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, + skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, state); @@ -1774,14 +1810,15 @@ static int nl80211_dump_wiphy_done(struct netlink_callback *cb) static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, + if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg, + info->snd_portid, info->snd_seq, 0, &state) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -1908,18 +1945,20 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, + struct net_device *dev, struct genl_info *info) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; + struct wireless_dev *wdev = NULL; - if (wdev) - iftype = wdev->iftype; - + if (dev) + wdev = dev->ieee80211_ptr; if (!nl80211_can_set_dev_channel(wdev)) return -EOPNOTSUPP; + if (wdev) + iftype = wdev->iftype; result = nl80211_parse_chandef(rdev, info, &chandef); if (result) @@ -1928,14 +1967,27 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { - result = -EBUSY; - break; - } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) { result = -EINVAL; break; } + if (wdev->beacon_interval) { + if (!dev || !rdev->ops->set_ap_chanwidth || + !(rdev->wiphy.features & + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) { + result = -EBUSY; + break; + } + + /* Only allow dynamic channel width changes */ + if (chandef.chan != wdev->preset_chandef.chan) { + result = -EBUSY; + break; + } + result = rdev_set_ap_chanwidth(rdev, dev, &chandef); + if (result) + break; + } wdev->preset_chandef = chandef; result = 0; break; @@ -1957,7 +2009,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *netdev = info->user_ptr[1]; - return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info); + return __nl80211_set_channel(rdev, netdev, info); } static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info) @@ -2013,7 +2065,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (netdev && netdev->ieee80211_ptr) - rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); + rdev = wiphy_to_rdev(netdev->ieee80211_ptr->wiphy); else netdev = NULL; } @@ -2079,9 +2131,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - result = __nl80211_set_channel(rdev, - nl80211_can_set_dev_channel(wdev) ? wdev : NULL, - info); + result = __nl80211_set_channel( + rdev, + nl80211_can_set_dev_channel(wdev) ? netdev : NULL, + info); if (result) return result; } @@ -2229,7 +2282,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) static inline u64 wdev_id(struct wireless_dev *wdev) { return (u64)wdev->identifier | - ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); + ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32); } static int nl80211_send_chandef(struct sk_buff *msg, @@ -2355,7 +2408,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -2363,7 +2416,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, - dev, wdev) < 0) { + rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -2514,6 +2567,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; + /* to avoid failing a new interface creation due to pending removal */ + cfg80211_destroy_ifaces(rdev); + memset(¶ms, 0, sizeof(params)); if (!info->attrs[NL80211_ATTR_IFNAME]) @@ -2563,6 +2619,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(wdev); } + if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER]) + wdev->owner_nlportid = info->snd_portid; + switch (type) { case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) @@ -3142,7 +3201,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; - u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -3258,24 +3316,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } else if (!nl80211_get_ap_channel(rdev, ¶ms)) return -EINVAL; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); - if (err < 0) - return err; - if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - params.chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - if (info->attrs[NL80211_ATTR_ACL_POLICY]) { params.acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(params.acl)) @@ -3613,6 +3657,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED, sinfo->tx_failed)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) && + nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT, + sinfo->expected_throughput)) + goto nla_put_failure; if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) && nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) @@ -3675,13 +3723,13 @@ static int nl80211_dump_station(struct sk_buff *skb, struct netlink_callback *cb) { struct station_info sinfo; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN]; int sta_idx = cb->args[2]; int err; - err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) return err; @@ -3690,14 +3738,14 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; } - if (!dev->ops->dump_station) { + if (!rdev->ops->dump_station) { err = -EOPNOTSUPP; goto out_err; } while (1) { memset(&sinfo, 0, sizeof(sinfo)); - err = rdev_dump_station(dev, wdev->netdev, sta_idx, + err = rdev_dump_station(rdev, wdev->netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; @@ -3707,7 +3755,7 @@ static int nl80211_dump_station(struct sk_buff *skb, if (nl80211_send_station(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev, wdev->netdev, mac_addr, + rdev, wdev->netdev, mac_addr, &sinfo) < 0) goto out; @@ -3719,7 +3767,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return err; } @@ -4380,18 +4428,18 @@ static int nl80211_dump_mpath(struct sk_buff *skb, struct netlink_callback *cb) { struct mpath_info pinfo; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 dst[ETH_ALEN]; u8 next_hop[ETH_ALEN]; int path_idx = cb->args[2]; int err; - err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) return err; - if (!dev->ops->dump_mpath) { + if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; goto out_err; } @@ -4402,7 +4450,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, } while (1) { - err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst, + err = rdev_dump_mpath(rdev, wdev->netdev, path_idx, dst, next_hop, &pinfo); if (err == -ENOENT) break; @@ -4423,7 +4471,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return err; } @@ -4663,7 +4711,6 @@ static int parse_reg_rule(struct nlattr *tb[], static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { - int r; char *data = NULL; enum nl80211_user_reg_hint_type user_reg_hint_type; @@ -4676,11 +4723,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) - return -EINVAL; - - data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); - if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) user_reg_hint_type = nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); @@ -4690,14 +4732,16 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) switch (user_reg_hint_type) { case NL80211_USER_REG_HINT_USER: case NL80211_USER_REG_HINT_CELL_BASE: - break; + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + return regulatory_hint_user(data, user_reg_hint_type); + case NL80211_USER_REG_HINT_INDOOR: + return regulatory_hint_indoor_user(); default: return -EINVAL; } - - r = regulatory_hint_user(data, user_reg_hint_type); - - return r; } static int nl80211_get_mesh_config(struct sk_buff *skb, @@ -5796,7 +5840,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (wdev->cac_started) return -EBUSY; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, + wdev->iftype); if (err < 0) return err; @@ -5809,12 +5854,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chandef.chan, CHAN_MODE_SHARED, - BIT(chandef.width)); - if (err) - return err; - cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; @@ -5843,6 +5882,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) u8 radar_detect_width = 0; int err; bool need_new_beacon = false; + int len, i; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) @@ -5901,26 +5941,55 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]) return -EINVAL; - params.counter_offset_beacon = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); - if (params.counter_offset_beacon >= params.beacon_csa.tail_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + if (!len || (len % sizeof(u16))) return -EINVAL; - /* sanity check - counters should be the same */ - if (params.beacon_csa.tail[params.counter_offset_beacon] != - params.count) + params.n_counter_offsets_beacon = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + params.counter_offsets_beacon = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_beacon; i++) { + u16 offset = params.counter_offsets_beacon[i]; + + if (offset >= params.beacon_csa.tail_len) + return -EINVAL; + + if (params.beacon_csa.tail[offset] != params.count) + return -EINVAL; + } + if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) { - params.counter_offset_presp = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); - if (params.counter_offset_presp >= - params.beacon_csa.probe_resp_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + if (!len || (len % sizeof(u16))) return -EINVAL; - if (params.beacon_csa.probe_resp[params.counter_offset_presp] != - params.count) + params.n_counter_offsets_presp = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + + params.counter_offsets_presp = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_presp; i++) { + u16 offset = params.counter_offsets_presp[i]; + + if (offset >= params.beacon_csa.probe_resp_len) + return -EINVAL; + + if (params.beacon_csa.probe_resp[offset] != + params.count) + return -EINVAL; + } } skip_beacons: @@ -5928,27 +5997,25 @@ skip_beacons: if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; - switch (dev->ieee80211_ptr->iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - err = cfg80211_chandef_dfs_required(wdev->wiphy, - ¶ms.chandef); - if (err < 0) - return err; - if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; - } - break; - default: - break; + err = cfg80211_chandef_dfs_required(wdev->wiphy, + ¶ms.chandef, + wdev->iftype); + if (err < 0) + return err; + + if (err > 0) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; } + /* TODO: I left this here for now. With channel switch, the + * verification is a bit more complicated, because we only do + * it later when the channel switch really happens. + */ err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, params.chandef.chan, CHAN_MODE_SHARED, @@ -6175,12 +6242,12 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { struct survey_info survey; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int survey_idx = cb->args[2]; int res; - res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) return res; @@ -6189,7 +6256,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, goto out_err; } - if (!dev->ops->dump_survey) { + if (!rdev->ops->dump_survey) { res = -EOPNOTSUPP; goto out_err; } @@ -6197,7 +6264,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, while (1) { struct ieee80211_channel *chan; - res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey); + res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); if (res == -ENOENT) break; if (res) @@ -6209,7 +6276,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, goto out; } - chan = ieee80211_get_channel(&dev->wiphy, + chan = ieee80211_get_channel(&rdev->wiphy, survey.channel->center_freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) { survey_idx++; @@ -6228,7 +6295,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, cb->args[2] = survey_idx; res = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return res; } @@ -6704,7 +6771,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef, + NL80211_IFTYPE_ADHOC)) return -EINVAL; switch (ibss.chandef.width) { @@ -6879,7 +6947,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, int vendor_event_idx, int approxlen, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct nl80211_vendor_cmd_info *info; switch (cmd) { @@ -7767,6 +7835,27 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!chandef.chan && params.offchan) return -EINVAL; + params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + + if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) { + int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + int i; + + if (len % sizeof(u16)) + return -EINVAL; + + params.n_csa_offsets = len / sizeof(u16); + params.csa_offsets = + nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + + /* check that all the offsets fit the frame */ + for (i = 0; i < params.n_csa_offsets; i++) { + if (params.csa_offsets[i] >= params.len) + return -EINVAL; + } + } + if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -7780,8 +7869,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); - params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); params.chan = chandef.chan; err = cfg80211_mlme_mgmt_tx(rdev, wdev, ¶ms, &cookie); if (err) @@ -8478,6 +8565,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); err = -EINVAL; @@ -8501,19 +8590,18 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; new_triggers.patterns[i].pkt_offset = pkt_offset; - new_triggers.patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_triggers.patterns[i].mask) { + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) { err = -ENOMEM; goto error; } - new_triggers.patterns[i].pattern = - new_triggers.patterns[i].mask + mask_len; - memcpy(new_triggers.patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), + new_triggers.patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + mask_pat += mask_len; + new_triggers.patterns[i].pattern = mask_pat; new_triggers.patterns[i].pattern_len = pat_len; - memcpy(new_triggers.patterns[i].pattern, + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; @@ -8705,6 +8793,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || @@ -8726,17 +8816,19 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, return -EINVAL; new_rule->patterns[i].pkt_offset = pkt_offset; - new_rule->patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_rule->patterns[i].mask) + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) return -ENOMEM; - new_rule->patterns[i].pattern = - new_rule->patterns[i].mask + mask_len; - memcpy(new_rule->patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + + new_rule->patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), + mask_len); + + mask_pat += mask_len; + new_rule->patterns[i].pattern = mask_pat; new_rule->patterns[i].pattern_len = pat_len; - memcpy(new_rule->patterns[i].pattern, - nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), + pat_len); i++; } @@ -8981,9 +9073,8 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->p2p_started) return 0; - err = cfg80211_can_add_interface(rdev, wdev->iftype); - if (err) - return err; + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; err = rdev_start_p2p_device(rdev, wdev); if (err) @@ -9192,7 +9283,7 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, enum nl80211_attrs attr, int approxlen) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (WARN_ON(!rdev->cur_cmd_info)) return NULL; @@ -9316,7 +9407,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, } dev = wdev->netdev; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { @@ -10017,16 +10108,20 @@ static const struct genl_ops nl80211_ops[] = { /* notification functions */ -void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) +void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd) { struct sk_buff *msg; struct nl80211_dump_wiphy_state state = {}; + WARN_ON(cmd != NL80211_CMD_NEW_WIPHY && + cmd != NL80211_CMD_DEL_WIPHY); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) { + if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } @@ -10345,7 +10440,7 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct ieee80211_mgmt *mgmt = (void *)buf; u32 cmd; @@ -10567,7 +10662,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, const u8* ie, u8 ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -10747,7 +10842,7 @@ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, unsigned int duration, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, @@ -10761,7 +10856,7 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, @@ -10773,7 +10868,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; trace_cfg80211_new_sta(dev, mac_addr, sinfo); @@ -10796,7 +10891,7 @@ EXPORT_SYMBOL(cfg80211_new_sta); void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -10833,7 +10928,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -10868,7 +10963,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); @@ -10988,7 +11083,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; @@ -11032,7 +11127,7 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11124,7 +11219,7 @@ void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_gtk_rekey_notify(dev, bssid); nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); @@ -11182,7 +11277,7 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); @@ -11229,7 +11324,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_WDEV_LOCK(wdev); @@ -11253,7 +11348,7 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11353,7 +11448,7 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11400,7 +11495,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, u64 cookie, bool acked, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -11440,7 +11535,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; struct cfg80211_beacon_registration *reg; @@ -11487,7 +11582,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; int size = 200; @@ -11597,7 +11692,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, u16 reason_code, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -11649,9 +11744,15 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) + bool schedule_destroy_work = false; + + list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (wdev->owner_nlportid == notify->portid) + schedule_destroy_work = true; + } + spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations, list) { @@ -11662,11 +11763,24 @@ static int nl80211_netlink_notify(struct notifier_block * nb, } } spin_unlock_bh(&rdev->beacon_registrations_lock); + + if (schedule_destroy_work) { + struct cfg80211_iface_destroy *destroy; + + destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC); + if (destroy) { + destroy->nlportid = notify->portid; + spin_lock(&rdev->destroy_list_lock); + list_add(&destroy->list, &rdev->destroy_list); + spin_unlock(&rdev->destroy_list_lock); + schedule_work(&rdev->destroy_work); + } + } } rcu_read_unlock(); - return NOTIFY_DONE; + return NOTIFY_OK; } static struct notifier_block nl80211_netlink_notifier = { @@ -11677,7 +11791,7 @@ void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_ft_event_params *ft_event) { struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -11724,7 +11838,7 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) void *hdr; u32 nlportid; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (!rdev->crit_proto_nlportid) return; @@ -11759,7 +11873,7 @@ EXPORT_SYMBOL(cfg80211_crit_proto_stopped); void nl80211_send_ap_stopped(struct wireless_dev *wdev) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 1e6df9630f42..49c9a482dd12 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -5,7 +5,8 @@ int nl80211_init(void); void nl80211_exit(void); -void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); +void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 74d97d33c938..d95bbe348138 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -199,7 +199,7 @@ static inline int rdev_change_station(struct cfg80211_registered_device *rdev, } static inline int rdev_get_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_info *sinfo) { int ret; @@ -950,4 +950,17 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) +{ + int ret; + + trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef); + ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f59aaac586f8..558b0e3a02d8 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -65,11 +65,26 @@ #define REG_DBG_PRINT(args...) #endif +/** + * enum reg_request_treatment - regulatory request treatment + * + * @REG_REQ_OK: continue processing the regulatory request + * @REG_REQ_IGNORE: ignore the regulatory request + * @REG_REQ_INTERSECT: the regulatory domain resulting from this request should + * be intersected with the current one. + * @REG_REQ_ALREADY_SET: the regulatory request will not change the current + * regulatory settings, and no further processing is required. + * @REG_REQ_USER_HINT_HANDLED: a non alpha2 user hint was handled and no + * further processing is required, i.e., not need to update last_request + * etc. This should be used for user hints that do not provide an alpha2 + * but some other type of regulatory hint, i.e., indoor operation. + */ enum reg_request_treatment { REG_REQ_OK, REG_REQ_IGNORE, REG_REQ_INTERSECT, REG_REQ_ALREADY_SET, + REG_REQ_USER_HINT_HANDLED, }; static struct regulatory_request core_request_world = { @@ -106,6 +121,14 @@ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; */ static int reg_num_devs_support_basehint; +/* + * State variable indicating if the platform on which the devices + * are attached is operating in an indoor environment. The state variable + * is relevant for all registered devices. + * (protected by RTNL) + */ +static bool reg_is_indoor; + static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { return rtnl_dereference(cfg80211_regdomain); @@ -240,8 +263,16 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reg_free_request(struct regulatory_request *lr) +static void reg_free_request(struct regulatory_request *request) { + if (request != get_last_request()) + kfree(request); +} + +static void reg_free_last_request(void) +{ + struct regulatory_request *lr = get_last_request(); + if (lr != &core_request_world && lr) kfree_rcu(lr, rcu_head); } @@ -254,7 +285,7 @@ static void reg_update_last_request(struct regulatory_request *request) if (lr == request) return; - reg_free_request(lr); + reg_free_last_request(); rcu_assign_pointer(last_request, request); } @@ -873,6 +904,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_RADAR; if (rd_flags & NL80211_RRF_NO_OFDM) channel_flags |= IEEE80211_CHAN_NO_OFDM; + if (rd_flags & NL80211_RRF_NO_OUTDOOR) + channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; return channel_flags; } @@ -902,7 +935,7 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); - bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20)); + bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(5)); if (band_rule_found && bw_fits) return rr; @@ -986,10 +1019,10 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, } #endif -/* - * Note that right now we assume the desired channel bandwidth - * is always 20 MHz for each individual channel (HT40 uses 20 MHz - * per channel, the primary and the extension channel). +/* Find an ieee80211_reg_rule such that a 5MHz channel with frequency + * chan->center_freq fits there. + * If there is no such reg_rule, disable the channel, otherwise set the + * flags corresponding to the bandwidths allowed in the particular reg_rule */ static void handle_channel(struct wiphy *wiphy, enum nl80211_reg_initiator initiator, @@ -1050,8 +1083,12 @@ static void handle_channel(struct wiphy *wiphy, if (reg_rule->flags & NL80211_RRF_AUTO_BW) max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); + if (max_bandwidth_khz < MHZ_TO_KHZ(10)) + bw_flags = IEEE80211_CHAN_NO_10MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(20)) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags = IEEE80211_CHAN_NO_HT40; + bw_flags |= IEEE80211_CHAN_NO_HT40; if (max_bandwidth_khz < MHZ_TO_KHZ(80)) bw_flags |= IEEE80211_CHAN_NO_80MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(160)) @@ -1071,6 +1108,13 @@ static void handle_channel(struct wiphy *wiphy, (int) MBI_TO_DBI(power_rule->max_antenna_gain); chan->max_reg_power = chan->max_power = chan->orig_mpwr = (int) MBM_TO_DBM(power_rule->max_eirp); + + if (chan->flags & IEEE80211_CHAN_RADAR) { + chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; + if (reg_rule->dfs_cac_ms) + chan->dfs_cac_ms = reg_rule->dfs_cac_ms; + } + return; } @@ -1126,12 +1170,19 @@ static bool reg_request_cell_base(struct regulatory_request *request) return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } +static bool reg_request_indoor(struct regulatory_request *request) +{ + if (request->initiator != NL80211_REGDOM_SET_BY_USER) + return false; + return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR; +} + bool reg_last_request_cell_base(void) { return reg_request_cell_base(get_last_request()); } -#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS +#ifdef CONFIG_CFG80211_REG_CELLULAR_HINTS /* Core specific check */ static enum reg_request_treatment reg_ignore_cell_hint(struct regulatory_request *pending_request) @@ -1471,8 +1522,12 @@ static void handle_channel_custom(struct wiphy *wiphy, if (reg_rule->flags & NL80211_RRF_AUTO_BW) max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); + if (max_bandwidth_khz < MHZ_TO_KHZ(10)) + bw_flags = IEEE80211_CHAN_NO_10MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(20)) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags = IEEE80211_CHAN_NO_HT40; + bw_flags |= IEEE80211_CHAN_NO_HT40; if (max_bandwidth_khz < MHZ_TO_KHZ(80)) bw_flags |= IEEE80211_CHAN_NO_80MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(160)) @@ -1568,6 +1623,11 @@ __reg_process_hint_user(struct regulatory_request *user_request) { struct regulatory_request *lr = get_last_request(); + if (reg_request_indoor(user_request)) { + reg_is_indoor = true; + return REG_REQ_USER_HINT_HANDLED; + } + if (reg_request_cell_base(user_request)) return reg_ignore_cell_hint(user_request); @@ -1615,8 +1675,9 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) { - kfree(user_request); + treatment == REG_REQ_ALREADY_SET || + treatment == REG_REQ_USER_HINT_HANDLED) { + reg_free_request(user_request); return treatment; } @@ -1676,14 +1737,15 @@ reg_process_hint_driver(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: - kfree(driver_request); + case REG_REQ_USER_HINT_HANDLED: + reg_free_request(driver_request); return treatment; case REG_REQ_INTERSECT: /* fall through */ case REG_REQ_ALREADY_SET: regd = reg_copy_regd(get_cfg80211_regdom()); if (IS_ERR(regd)) { - kfree(driver_request); + reg_free_request(driver_request); return REG_REQ_IGNORE; } rcu_assign_pointer(wiphy->regd, regd); @@ -1775,12 +1837,13 @@ reg_process_hint_country_ie(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: + case REG_REQ_USER_HINT_HANDLED: /* fall through */ case REG_REQ_ALREADY_SET: - kfree(country_ie_request); + reg_free_request(country_ie_request); return treatment; case REG_REQ_INTERSECT: - kfree(country_ie_request); + reg_free_request(country_ie_request); /* * This doesn't happen yet, not sure we * ever want to support it for this case. @@ -1813,7 +1876,8 @@ static void reg_process_hint(struct regulatory_request *reg_request) case NL80211_REGDOM_SET_BY_USER: treatment = reg_process_hint_user(reg_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) + treatment == REG_REQ_ALREADY_SET || + treatment == REG_REQ_USER_HINT_HANDLED) return; queue_delayed_work(system_power_efficient_wq, ®_timeout, msecs_to_jiffies(3142)); @@ -1841,7 +1905,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) return; out_free: - kfree(reg_request); + reg_free_request(reg_request); } /* @@ -1857,7 +1921,7 @@ static void reg_process_pending_hints(void) /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { - REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); + reg_process_hint(lr); return; } @@ -1967,6 +2031,22 @@ int regulatory_hint_user(const char *alpha2, return 0; } +int regulatory_hint_indoor_user(void) +{ + struct regulatory_request *request; + + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->wiphy_idx = WIPHY_IDX_INVALID; + request->initiator = NL80211_REGDOM_SET_BY_USER; + request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR; + queue_regulatory_request(request); + + return 0; +} + /* Driver hints */ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { @@ -2134,6 +2214,8 @@ static void restore_regulatory_settings(bool reset_user) ASSERT_RTNL(); + reg_is_indoor = false; + reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -2594,7 +2676,7 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) reg_num_devs_support_basehint--; rcu_free_regdom(get_wiphy_regdom(wiphy)); - rcu_assign_pointer(wiphy->regd, NULL); + RCU_INIT_POINTER(wiphy->regd, NULL); if (lr) request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); @@ -2614,6 +2696,40 @@ static void reg_timeout_work(struct work_struct *work) rtnl_unlock(); } +/* + * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for + * UNII band definitions + */ +int cfg80211_get_unii(int freq) +{ + /* UNII-1 */ + if (freq >= 5150 && freq <= 5250) + return 0; + + /* UNII-2A */ + if (freq > 5250 && freq <= 5350) + return 1; + + /* UNII-2B */ + if (freq > 5350 && freq <= 5470) + return 2; + + /* UNII-2C */ + if (freq > 5470 && freq <= 5725) + return 3; + + /* UNII-3 */ + if (freq > 5725 && freq <= 5825) + return 4; + + return -EINVAL; +} + +bool regulatory_indoor_allowed(void) +{ + return reg_is_indoor; +} + int __init regulatory_init(void) { int err = 0; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 37c180df34b7..5e48031ccb9a 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -25,6 +25,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); +int regulatory_hint_indoor_user(void); void wiphy_regulatory_register(struct wiphy *wiphy); void wiphy_regulatory_deregister(struct wiphy *wiphy); @@ -104,4 +105,21 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, */ void regulatory_hint_disconnect(void); +/** + * cfg80211_get_unii - get the U-NII band for the frequency + * @freq: the frequency for which we want to get the UNII band. + + * Get a value specifying the U-NII band frequency belongs to. + * U-NII bands are defined by the FCC in C.F.R 47 part 15. + * + * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A, + * 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3. + */ +int cfg80211_get_unii(int freq); + +/** + * regulatory_indoor_allowed - is indoor operation allowed + */ +bool regulatory_indoor_allowed(void); + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 88f108edfb58..0798c62e6085 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -81,10 +81,10 @@ static void bss_free(struct cfg80211_internal_bss *bss) kfree(bss); } -static inline void bss_ref_get(struct cfg80211_registered_device *dev, +static inline void bss_ref_get(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); bss->refcount++; if (bss->pub.hidden_beacon_bss) { @@ -95,10 +95,10 @@ static inline void bss_ref_get(struct cfg80211_registered_device *dev, } } -static inline void bss_ref_put(struct cfg80211_registered_device *dev, +static inline void bss_ref_put(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); if (bss->pub.hidden_beacon_bss) { struct cfg80211_internal_bss *hbss; @@ -114,10 +114,10 @@ static inline void bss_ref_put(struct cfg80211_registered_device *dev, bss_free(bss); } -static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, +static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); if (!list_empty(&bss->hidden_list)) { /* @@ -134,31 +134,31 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, } list_del_init(&bss->list); - rb_erase(&bss->rbn, &dev->bss_tree); - bss_ref_put(dev, bss); + rb_erase(&bss->rbn, &rdev->bss_tree); + bss_ref_put(rdev, bss); return true; } -static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, +static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, unsigned long expire_time) { struct cfg80211_internal_bss *bss, *tmp; bool expired = false; - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); - list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { + list_for_each_entry_safe(bss, tmp, &rdev->bss_list, list) { if (atomic_read(&bss->hold)) continue; if (!time_after(expire_time, bss->ts)) continue; - if (__cfg80211_unlink_bss(dev, bss)) + if (__cfg80211_unlink_bss(rdev, bss)) expired = true; } if (expired) - dev->bss_generation++; + rdev->bss_generation++; } void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, @@ -238,11 +238,11 @@ void __cfg80211_scan_done(struct work_struct *wk) void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) { trace_cfg80211_scan_done(request, aborted); - WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); + WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req); request->aborted = aborted; request->notified = true; - queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); + queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -278,15 +278,15 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) { trace_cfg80211_sched_scan_results(wiphy); /* ignore if we're not scanning */ - if (wiphy_to_dev(wiphy)->sched_scan_req) + if (wiphy_to_rdev(wiphy)->sched_scan_req) queue_work(cfg80211_wq, - &wiphy_to_dev(wiphy)->sched_scan_results_wk); + &wiphy_to_rdev(wiphy)->sched_scan_results_wk); } EXPORT_SYMBOL(cfg80211_sched_scan_results); void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_RTNL(); @@ -330,21 +330,21 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, return 0; } -void cfg80211_bss_age(struct cfg80211_registered_device *dev, +void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs) { struct cfg80211_internal_bss *bss; unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); - spin_lock_bh(&dev->bss_lock); - list_for_each_entry(bss, &dev->bss_list, list) + spin_lock_bh(&rdev->bss_lock); + list_for_each_entry(bss, &rdev->bss_list, list) bss->ts -= age_jiffies; - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); } -void cfg80211_bss_expire(struct cfg80211_registered_device *dev) +void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) { - __cfg80211_bss_expire(dev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); + __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) @@ -534,32 +534,34 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, const u8 *ssid, size_t ssid_len, u16 capa_mask, u16 capa_val) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; unsigned long now = jiffies; trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, capa_mask, capa_val); - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if ((bss->pub.capability & capa_mask) != capa_val) continue; if (channel && bss->pub.channel != channel) continue; + if (!is_valid_ether_addr(bss->pub.bssid)) + continue; /* Don't get expired BSS structs */ if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) && !atomic_read(&bss->hold)) continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { res = bss; - bss_ref_get(dev, res); + bss_ref_get(rdev, res); break; } } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); if (!res) return NULL; trace_cfg80211_return_bss(&res->pub); @@ -567,10 +569,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_get_bss); -static void rb_insert_bss(struct cfg80211_registered_device *dev, +static void rb_insert_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - struct rb_node **p = &dev->bss_tree.rb_node; + struct rb_node **p = &rdev->bss_tree.rb_node; struct rb_node *parent = NULL; struct cfg80211_internal_bss *tbss; int cmp; @@ -593,15 +595,15 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev, } rb_link_node(&bss->rbn, parent, p); - rb_insert_color(&bss->rbn, &dev->bss_tree); + rb_insert_color(&bss->rbn, &rdev->bss_tree); } static struct cfg80211_internal_bss * -rb_find_bss(struct cfg80211_registered_device *dev, +rb_find_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *res, enum bss_compare_mode mode) { - struct rb_node *n = dev->bss_tree.rb_node; + struct rb_node *n = rdev->bss_tree.rb_node; struct cfg80211_internal_bss *bss; int r; @@ -620,7 +622,7 @@ rb_find_bss(struct cfg80211_registered_device *dev, return NULL; } -static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, +static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *new) { const struct cfg80211_bss_ies *ies; @@ -650,7 +652,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, /* This is the bad part ... */ - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -684,7 +686,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_internal_bss * -cfg80211_bss_update(struct cfg80211_registered_device *dev, +cfg80211_bss_update(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *tmp, bool signal_valid) { @@ -695,14 +697,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, tmp->ts = jiffies; - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) { - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return NULL; } - found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); + found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR); if (found) { /* Update IEs */ @@ -789,7 +791,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * is allocated on the stack since it's not needed in the * more common case of an update */ - new = kzalloc(sizeof(*new) + dev->wiphy.bss_priv_size, + new = kzalloc(sizeof(*new) + rdev->wiphy.bss_priv_size, GFP_ATOMIC); if (!new) { ies = (void *)rcu_dereference(tmp->pub.beacon_ies); @@ -805,9 +807,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, INIT_LIST_HEAD(&new->hidden_list); if (rcu_access_pointer(tmp->pub.proberesp_ies)) { - hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN); + hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN); if (!hidden) - hidden = rb_find_bss(dev, tmp, + hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_NUL); if (hidden) { new->pub.hidden_beacon_bss = &hidden->pub; @@ -824,24 +826,24 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * expensive search for any probe responses that should * be grouped with this beacon for updates ... */ - if (!cfg80211_combine_bsses(dev, new)) { + if (!cfg80211_combine_bsses(rdev, new)) { kfree(new); goto drop; } } - list_add_tail(&new->list, &dev->bss_list); - rb_insert_bss(dev, new); + list_add_tail(&new->list, &rdev->bss_list); + rb_insert_bss(rdev, new); found = new; } - dev->bss_generation++; - bss_ref_get(dev, found); - spin_unlock_bh(&dev->bss_lock); + rdev->bss_generation++; + bss_ref_get(rdev, found); + spin_unlock_bh(&rdev->bss_lock); return found; drop: - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return NULL; } @@ -889,6 +891,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; struct cfg80211_internal_bss tmp = {}, *res; + bool signal_valid; if (WARN_ON(!wiphy)) return NULL; @@ -925,8 +928,9 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, rcu_assign_pointer(tmp.pub.beacon_ies, ies); rcu_assign_pointer(tmp.pub.ies, ies); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp, - rx_channel == channel); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; @@ -950,6 +954,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, struct cfg80211_internal_bss tmp = {}, *res; struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; + bool signal_valid; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); @@ -997,8 +1002,9 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp, - rx_channel == channel); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; @@ -1013,7 +1019,7 @@ EXPORT_SYMBOL(cfg80211_inform_bss_width_frame); void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) @@ -1021,15 +1027,15 @@ void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); - bss_ref_get(dev, bss); - spin_unlock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); + bss_ref_get(rdev, bss); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_ref_bss); void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) @@ -1037,15 +1043,15 @@ void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); - bss_ref_put(dev, bss); - spin_unlock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); + bss_ref_put(rdev, bss); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_put_bss); void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (WARN_ON(!pub)) @@ -1053,12 +1059,12 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); if (!list_empty(&bss->list)) { - if (__cfg80211_unlink_bss(dev, bss)) - dev->bss_generation++; + if (__cfg80211_unlink_bss(rdev, bss)) + rdev->bss_generation++; } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_unlink_bss); @@ -1075,7 +1081,7 @@ cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) if (!dev) return ERR_PTR(-ENODEV); if (dev->ieee80211_ptr) - rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + rdev = wiphy_to_rdev(dev->ieee80211_ptr->wiphy); else rdev = ERR_PTR(-ENODEV); dev_put(dev); @@ -1155,7 +1161,11 @@ int cfg80211_wext_siwscan(struct net_device *dev, int k; int wiphy_freq = wiphy->bands[band]->channels[j].center_freq; for (k = 0; k < wreq->num_channels; k++) { - int wext_freq = cfg80211_wext_freq(wiphy, &wreq->channel_list[k]); + struct iw_freq *freq = + &wreq->channel_list[k]; + int wext_freq = + cfg80211_wext_freq(freq); + if (wext_freq == wiphy_freq) goto wext_freq_found; } @@ -1467,7 +1477,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, } -static int ieee80211_scan_results(struct cfg80211_registered_device *dev, +static int ieee80211_scan_results(struct cfg80211_registered_device *rdev, struct iw_request_info *info, char *buf, size_t len) { @@ -1475,18 +1485,18 @@ static int ieee80211_scan_results(struct cfg80211_registered_device *dev, char *end_buf = buf + len; struct cfg80211_internal_bss *bss; - spin_lock_bh(&dev->bss_lock); - cfg80211_bss_expire(dev); + spin_lock_bh(&rdev->bss_lock); + cfg80211_bss_expire(rdev); - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if (buf + len - current_ev <= IW_EV_ADDR_LEN) { - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return -E2BIG; } - current_ev = ieee80211_bss(&dev->wiphy, info, bss, + current_ev = ieee80211_bss(&rdev->wiphy, info, bss, current_ev, end_buf); } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return current_ev - buf; } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 3546a77033de..8bbeeb302216 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -59,7 +59,7 @@ static void cfg80211_sme_free(struct wireless_dev *wdev) static int cfg80211_conn_scan(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_scan_request *request; int n_channels, err; @@ -130,7 +130,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) static int cfg80211_conn_do_work(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_connect_params *params; struct cfg80211_assoc_request req = {}; int err; @@ -149,7 +149,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_SCAN_AGAIN: return cfg80211_conn_scan(wdev); case CFG80211_CONN_AUTHENTICATE_NEXT: - BUG_ON(!rdev->ops->auth); + if (WARN_ON(!rdev->ops->auth)) + return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_AUTHENTICATING; return cfg80211_mlme_auth(rdev, wdev->netdev, params->channel, params->auth_type, @@ -161,7 +162,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_AUTH_FAILED: return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: - BUG_ON(!rdev->ops->assoc); + if (WARN_ON(!rdev->ops->assoc)) + return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_ASSOCIATING; if (wdev->conn->prev_bssid_valid) req.prev_bssid = wdev->conn->prev_bssid; @@ -244,7 +246,7 @@ void cfg80211_conn_work(struct work_struct *work) /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; u16 capa = WLAN_CAPABILITY_ESS; @@ -274,7 +276,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) static void __cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; ASSERT_WDEV_LOCK(wdev); @@ -305,7 +307,7 @@ void cfg80211_sme_scan_done(struct net_device *dev) void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u16 status_code = le16_to_cpu(mgmt->u.auth.status_code); @@ -351,7 +353,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return false; @@ -385,7 +387,7 @@ void cfg80211_sme_deauth(struct wireless_dev *wdev) void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -396,7 +398,7 @@ void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) void cfg80211_sme_disassoc(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -407,7 +409,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev) void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -420,7 +422,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, struct cfg80211_connect_params *connect, const u8 *prev_bssid) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; int err; @@ -467,7 +469,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, } wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = connect->ssid_len; + wdev->conn->params.ssid_len = wdev->ssid_len; /* see if we have the bss already */ bss = cfg80211_get_conn_bss(wdev); @@ -479,7 +481,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, /* we're good if we have a matching bss struct */ if (bss) { - wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; err = cfg80211_conn_do_work(wdev); cfg80211_put_bss(wdev->wiphy, bss); } else { @@ -505,7 +506,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err; if (!wdev->conn) @@ -593,7 +594,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, return; } - nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, + nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, status, GFP_KERNEL); @@ -624,7 +625,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, #endif if (!bss && (status == WLAN_STATUS_SUCCESS)) { - WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, @@ -687,7 +688,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, u16 status, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -742,7 +743,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bss->bssid, + nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), + wdev->netdev, bss->bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, GFP_KERNEL); @@ -801,7 +803,7 @@ void cfg80211_roamed_bss(struct net_device *dev, size_t resp_ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -834,7 +836,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; @@ -877,10 +879,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, } void cfg80211_disconnected(struct net_device *dev, u16 reason, - u8 *ie, size_t ie_len, gfp_t gfp) + const u8 *ie, size_t ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index aabccf13e07b..560ed77084e9 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1876,29 +1876,33 @@ TRACE_EVENT(rdev_channel_switch, WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY - __field(u16, counter_offset_beacon) - __field(u16, counter_offset_presp) __field(bool, radar_required) __field(bool, block_tx) __field(u8, count) + __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon) + __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(¶ms->chandef); - __entry->counter_offset_beacon = params->counter_offset_beacon; - __entry->counter_offset_presp = params->counter_offset_presp; __entry->radar_required = params->radar_required; __entry->block_tx = params->block_tx; __entry->count = params->count; + memcpy(__get_dynamic_array(bcn_ofs), + params->counter_offsets_beacon, + params->n_counter_offsets_beacon * sizeof(u16)); + + /* probe response offsets are optional */ + if (params->n_counter_offsets_presp) + memcpy(__get_dynamic_array(pres_ofs), + params->counter_offsets_presp, + params->n_counter_offsets_presp * sizeof(u16)); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT - ", block_tx: %d, count: %u, radar_required: %d" - ", counter offsets (beacon/presp): %u/%u", + ", block_tx: %d, count: %u, radar_required: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, - __entry->block_tx, __entry->count, __entry->radar_required, - __entry->counter_offset_beacon, - __entry->counter_offset_presp) + __entry->block_tx, __entry->count, __entry->radar_required) ); TRACE_EVENT(rdev_set_qos_map, @@ -1919,6 +1923,24 @@ TRACE_EVENT(rdev_set_qos_map, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des) ); +TRACE_EVENT(rdev_set_ap_chanwidth, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, netdev, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2193,18 +2215,21 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, ); TRACE_EVENT(cfg80211_reg_can_beacon, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, chandef), + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype), + TP_ARGS(wiphy, chandef, iftype), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY + __field(enum nl80211_iftype, iftype) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->iftype = iftype; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype) ); TRACE_EVENT(cfg80211_chandef_dfs_required, @@ -2615,6 +2640,21 @@ TRACE_EVENT(cfg80211_ft_event, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) ); +TRACE_EVENT(cfg80211_stop_iface, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, + WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index e5872ff2c27c..728f1c0dc70d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -476,7 +476,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, EXPORT_SYMBOL(ieee80211_data_to_8023); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, u8 *bssid, bool qos) + enum nl80211_iftype iftype, + const u8 *bssid, bool qos) { struct ieee80211_hdr hdr; u16 hdrlen, ethertype; @@ -770,7 +771,7 @@ EXPORT_SYMBOL(ieee80211_bss_get_ie); void cfg80211_upload_connect_keys(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct net_device *dev = wdev->netdev; int i; @@ -839,6 +840,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid, ev->ij.channel); break; + case EVENT_STOPPED: + __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); + break; } wdev_unlock(wdev); @@ -888,11 +892,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; if (ntype != otype && netif_running(dev)) { - err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, - ntype); - if (err) - return err; - dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); @@ -1268,6 +1267,120 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, return res; } +int cfg80211_iter_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES], + void (*iter)(const struct ieee80211_iface_combination *c, + void *data), + void *data) +{ + const struct ieee80211_regdomain *regdom; + enum nl80211_dfs_regions region = 0; + int i, j, iftype; + int num_interfaces = 0; + u32 used_iftypes = 0; + + if (radar_detect) { + rcu_read_lock(); + regdom = rcu_dereference(cfg80211_regdomain); + if (regdom) + region = regdom->dfs_region; + rcu_read_unlock(); + } + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + num_interfaces += iftype_num[iftype]; + if (iftype_num[iftype] > 0 && + !(wiphy->software_iftypes & BIT(iftype))) + used_iftypes |= BIT(iftype); + } + + for (i = 0; i < wiphy->n_iface_combinations; i++) { + const struct ieee80211_iface_combination *c; + struct ieee80211_iface_limit *limits; + u32 all_iftypes = 0; + + c = &wiphy->iface_combinations[i]; + + if (num_interfaces > c->max_interfaces) + continue; + if (num_different_channels > c->num_different_channels) + continue; + + limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, + GFP_KERNEL); + if (!limits) + return -ENOMEM; + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + if (wiphy->software_iftypes & BIT(iftype)) + continue; + for (j = 0; j < c->n_limits; j++) { + all_iftypes |= limits[j].types; + if (!(limits[j].types & BIT(iftype))) + continue; + if (limits[j].max < iftype_num[iftype]) + goto cont; + limits[j].max -= iftype_num[iftype]; + } + } + + if (radar_detect != (c->radar_detect_widths & radar_detect)) + goto cont; + + if (radar_detect && c->radar_detect_regions && + !(c->radar_detect_regions & BIT(region))) + goto cont; + + /* Finally check that all iftypes that we're currently + * using are actually part of this combination. If they + * aren't then we can't use this combination and have + * to continue to the next. + */ + if ((all_iftypes & used_iftypes) != used_iftypes) + goto cont; + + /* This combination covered all interface types and + * supported the requested numbers, so we're good. + */ + + (*iter)(c, data); + cont: + kfree(limits); + } + + return 0; +} +EXPORT_SYMBOL(cfg80211_iter_combinations); + +static void +cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c, + void *data) +{ + int *num = data; + (*num)++; +} + +int cfg80211_check_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES]) +{ + int err, num = 0; + + err = cfg80211_iter_combinations(wiphy, num_different_channels, + radar_detect, iftype_num, + cfg80211_iter_sum_ifcombs, &num); + if (err) + return err; + if (num == 0) + return -EBUSY; + + return 0; +} +EXPORT_SYMBOL(cfg80211_check_combinations); + int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, @@ -1276,7 +1389,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, u8 radar_detect) { struct wireless_dev *wdev_iter; - u32 used_iftypes = BIT(iftype); int num[NUM_NL80211_IFTYPES]; struct ieee80211_channel *used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS]; @@ -1284,7 +1396,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; - int i, j; + int i; ASSERT_RTNL(); @@ -1306,6 +1418,11 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[iftype] = 1; + /* TODO: We'll probably not need this anymore, since this + * should only be called with CHAN_MODE_UNDEFINED. There are + * still a couple of pending calls where other chanmodes are + * used, but we should get rid of them. + */ switch (chanmode) { case CHAN_MODE_UNDEFINED: break; @@ -1369,65 +1486,13 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[wdev_iter->iftype]++; total++; - used_iftypes |= BIT(wdev_iter->iftype); } if (total == 1 && !radar_detect) return 0; - for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; - struct ieee80211_iface_limit *limits; - u32 all_iftypes = 0; - - c = &rdev->wiphy.iface_combinations[i]; - - if (total > c->max_interfaces) - continue; - if (num_different_channels > c->num_different_channels) - continue; - - limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, - GFP_KERNEL); - if (!limits) - return -ENOMEM; - - for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - if (rdev->wiphy.software_iftypes & BIT(iftype)) - continue; - for (j = 0; j < c->n_limits; j++) { - all_iftypes |= limits[j].types; - if (!(limits[j].types & BIT(iftype))) - continue; - if (limits[j].max < num[iftype]) - goto cont; - limits[j].max -= num[iftype]; - } - } - - if (radar_detect && !(c->radar_detect_widths & radar_detect)) - goto cont; - - /* - * Finally check that all iftypes that we're currently - * using are actually part of this combination. If they - * aren't then we can't use this combination and have - * to continue to the next. - */ - if ((all_iftypes & used_iftypes) != used_iftypes) - goto cont; - - /* - * This combination covered all interface types and - * supported the requested numbers, so we're good. - */ - kfree(limits); - return 0; - cont: - kfree(limits); - } - - return -EBUSY; + return cfg80211_check_combinations(&rdev->wiphy, num_different_channels, + radar_detect, num); } int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, @@ -1481,6 +1546,24 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy) } EXPORT_SYMBOL(ieee80211_get_num_supported_channels); +int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + wdev = dev->ieee80211_ptr; + if (!wdev) + return -EOPNOTSUPP; + + rdev = wiphy_to_rdev(wdev->wiphy); + if (!rdev->ops->get_station) + return -EOPNOTSUPP; + + return rdev_get_station(rdev, dev, mac_addr, sinfo); +} +EXPORT_SYMBOL(cfg80211_get_station); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5661a54ac7ee..11120bb14162 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -73,7 +73,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct vif_params vifparams; enum nl80211_iftype type; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); switch (*mode) { case IW_MODE_INFRA: @@ -253,12 +253,12 @@ EXPORT_SYMBOL_GPL(cfg80211_wext_giwrange); /** * cfg80211_wext_freq - get wext frequency for non-"auto" - * @wiphy: the wiphy + * @dev: the net device * @freq: the wext freq encoding * * Returns a frequency, or a negative error code, or 0 for auto. */ -int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq) +int cfg80211_wext_freq(struct iw_freq *freq) { /* * Parse frequency - return 0 for auto and @@ -286,7 +286,7 @@ int cfg80211_wext_siwrts(struct net_device *dev, struct iw_param *rts, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 orts = wdev->wiphy->rts_threshold; int err; @@ -324,7 +324,7 @@ int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_param *frag, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 ofrag = wdev->wiphy->frag_threshold; int err; @@ -364,7 +364,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev, struct iw_param *retry, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 changed = 0; u8 olong = wdev->wiphy->retry_long; u8 oshort = wdev->wiphy->retry_short; @@ -587,7 +587,7 @@ static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_point *erq, char *keybuf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int idx, err; bool remove = false; struct key_params params; @@ -647,7 +647,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, struct iw_point *erq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; const u8 *addr; int idx; @@ -775,7 +775,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; @@ -787,7 +787,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) @@ -798,7 +798,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, return -EINVAL; return cfg80211_set_monitor_channel(rdev, &chandef); case NL80211_IFTYPE_MESH_POINT: - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) @@ -818,7 +818,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, struct iw_freq *freq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef; int ret; @@ -847,7 +847,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; @@ -899,7 +899,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err, val; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) @@ -1119,7 +1119,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev, struct iw_param *wrq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); bool ps = wdev->ps; int timeout = wdev->ps_timeout; int err; @@ -1177,7 +1177,7 @@ static int cfg80211_wds_wext_siwap(struct net_device *dev, struct sockaddr *addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err; if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS)) @@ -1221,7 +1221,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev, struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; @@ -1272,7 +1272,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use a static struct */ static struct station_info sinfo; u8 addr[ETH_ALEN]; @@ -1310,7 +1310,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use static structs */ static struct iw_statistics wstats; static struct station_info sinfo; @@ -1449,7 +1449,7 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, struct iw_point *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index 5d766b0118e8..ebcacca2f731 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -50,7 +50,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data, char *extra); -int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq); +int cfg80211_wext_freq(struct iw_freq *freq); extern const struct iw_handler_def cfg80211_wext_handler; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 86c331a65664..c7e5c8eb4f24 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -67,7 +67,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; @@ -75,7 +75,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; @@ -169,7 +169,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; @@ -260,7 +260,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; @@ -333,7 +333,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *ie = extra; int ie_len = data->length, err; @@ -390,7 +390,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, if (!wdev) return -EOPNOTSUPP; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 3bb2cdc13b46..c51e8f7b8653 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -199,6 +199,7 @@ int xfrm_output(struct sk_buff *skb) return xfrm_output2(skb); } +EXPORT_SYMBOL_GPL(xfrm_output); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -213,6 +214,7 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; return inner_mode->afinfo->extract_output(x, skb); } +EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); void xfrm_local_error(struct sk_buff *skb, int mtu) { @@ -233,7 +235,4 @@ void xfrm_local_error(struct sk_buff *skb, int mtu) afinfo->local_error(skb, mtu); xfrm_state_put_afinfo(afinfo); } - -EXPORT_SYMBOL_GPL(xfrm_output); -EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c08fbd11ceff..a8ef5108e0d8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -769,7 +769,7 @@ EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { int dir, err = 0; @@ -783,10 +783,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi continue; err = security_xfrm_policy_delete(pol->security); if (err) { - xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } @@ -800,9 +797,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + task_valid); return err; } } @@ -812,19 +807,19 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi } #else static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { return 0; } #endif -int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; write_lock_bh(&net->xfrm.xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(net, type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) goto out; @@ -841,9 +836,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -862,10 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); write_lock_bh(&net->xfrm.xfrm_policy_lock); @@ -2783,21 +2773,19 @@ static struct notifier_block xfrm_dev_notifier = { static int __net_init xfrm_statistics_init(struct net *net) { int rv; - - if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, - sizeof(struct linux_xfrm_mib), - __alignof__(struct linux_xfrm_mib)) < 0) + net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); + if (!net->mib.xfrm_statistics) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); return rv; } static void xfrm_statistics_fini(struct net *net) { xfrm_proc_fini(net); - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); } #else static int __net_init xfrm_statistics_init(struct net *net) @@ -2862,21 +2850,14 @@ out_byidx: static void xfrm_policy_fini(struct net *net) { - struct xfrm_audit audit_info; unsigned int sz; int dir; flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -2991,15 +2972,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } } -void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid, u32 secid) +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -3007,14 +2987,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid, u32 secid) + bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index fc5abd0b456f..9c4fbd8935f4 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -54,8 +54,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) int i; for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field((void __percpu **) - net->mib.xfrm_statistics, + snmp_fold_field(net->mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8e9c781a6bba..0ab54134bb40 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -463,9 +463,7 @@ expired: if (!err) km_state_expired(x, 1, 0); - xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_state_delete(x, err ? 0 : 1, true); out: spin_unlock(&x->lock); @@ -562,7 +560,7 @@ EXPORT_SYMBOL(xfrm_state_delete); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { int i, err = 0; @@ -572,10 +570,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { - xfrm_audit_state_delete(x, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_state_delete(x, 0, task_valid); return err; } } @@ -585,18 +580,18 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi } #else static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { return 0; } #endif -int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) { int i, err = 0, cnt = 0; spin_lock_bh(&net->xfrm.xfrm_state_lock); - err = xfrm_state_flush_secctx_check(net, proto, audit_info); + err = xfrm_state_flush_secctx_check(net, proto, task_valid); if (err) goto out; @@ -612,9 +607,7 @@ restart: err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + task_valid); xfrm_state_put(x); if (!err) cnt++; @@ -2128,14 +2121,10 @@ out_bydst: void xfrm_state_fini(struct net *net) { - struct xfrm_audit audit_info; unsigned int sz; flush_work(&net->xfrm.state_hash_work); - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; - xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); flush_work(&net->xfrm.state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); @@ -2198,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } } -void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid, u32 secid) +void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_state_add); -void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid, u32 secid) +void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 51398ae6cda8..412d9dc3a873 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -597,9 +597,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; err = verify_newsa_info(p, attrs); if (err) @@ -615,8 +612,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - security_task_getsecid(current, &sid); - xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -676,9 +672,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) @@ -703,8 +696,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: - security_task_getsecid(current, &sid); - xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } @@ -955,6 +947,20 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } +/* A wrapper for nlmsg_multicast() checking that nlsk is still available. + * Must be called with RCU read lock. + */ +static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, + u32 pid, unsigned int group) +{ + struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + + if (nlsk) + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); + else + return -1; +} + static inline size_t xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) @@ -1414,9 +1420,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; err = verify_newpolicy_info(p); if (err) @@ -1435,8 +1438,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - security_task_getsecid(current, &sid); - xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) { security_xfrm_policy_free(xp->security); @@ -1673,13 +1675,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid); } } else { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - - security_task_getsecid(current, &sid); - xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, - sid); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err != 0) goto out; @@ -1704,13 +1700,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct km_event c; struct xfrm_usersa_flush *p = nlmsg_data(nlh); - struct xfrm_audit audit_info; int err; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &audit_info.secid); - err = xfrm_state_flush(net, p->proto, &audit_info); + err = xfrm_state_flush(net, p->proto, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; @@ -1894,16 +1886,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; - struct xfrm_audit audit_info; err = copy_from_user_policy_type(&type, attrs); if (err) return err; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &audit_info.secid); - err = xfrm_policy_flush(net, type, &audit_info); + err = xfrm_policy_flush(net, type, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; @@ -1969,14 +1957,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - - security_task_getsecid(current, &sid); xfrm_policy_delete(xp, p->dir); - xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); - + xfrm_audit_policy_delete(xp, 1, true); } else { // reset the timers here? WARN(1, "Dont know what to do with soft policy expire\n"); @@ -2012,13 +1994,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - - security_task_getsecid(current, &sid); __xfrm_state_delete(x); - xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); + xfrm_audit_state_delete(x, 1, true); } err = 0; out: @@ -2265,7 +2242,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); } #else static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, @@ -2456,7 +2433,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) return -EMSGSIZE; } - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2471,7 +2448,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event if (build_aevent(skb, x, c) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } static int xfrm_notify_sa_flush(const struct km_event *c) @@ -2497,7 +2474,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2584,7 +2561,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); out_free_skb: kfree_skb(skb); @@ -2675,7 +2652,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2789,7 +2766,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2851,7 +2828,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); @@ -2879,7 +2856,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); @@ -2948,7 +2925,7 @@ static int xfrm_send_report(struct net *net, u8 proto, if (build_report(skb, proto, sel, addr) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } static inline size_t xfrm_mapping_msgsize(void) @@ -3000,7 +2977,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (build_mapping(skb, x, ipaddr, sport) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); } static bool xfrm_is_alive(const struct km_event *c) |