diff options
Diffstat (limited to 'net')
286 files changed, 13506 insertions, 9133 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 90cc2bdd4064..61bf2a06e85d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = vlan_tx_tag_get_id(skb); + u16 vlan_id = skb_vlan_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 376805005cc7..118956448cf6 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -579,11 +579,12 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_PRESENT); dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | - NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM | NETIF_F_ALL_FCOE; - dev->features |= real_dev->vlan_features | NETIF_F_LLTX; + dev->features |= real_dev->vlan_features | NETIF_F_LLTX | + NETIF_F_GSO_SOFTWARE; dev->gso_max_size = real_dev->gso_max_size; if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); @@ -648,7 +649,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, features |= NETIF_F_RXCSUM; features = netdev_intersect_features(features, real_dev->features); - features |= old_features & NETIF_F_SOFT_FEATURES; + features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE); features |= NETIF_F_LLTX; return features; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 8ac8a5cc2143..c92b52f37d38 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -238,6 +238,13 @@ nla_put_failure: return -EMSGSIZE; } +static struct net *vlan_get_link_net(const struct net_device *dev) +{ + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; + + return dev_net(real_dev); +} + struct rtnl_link_ops vlan_link_ops __read_mostly = { .kind = "vlan", .maxtype = IFLA_VLAN_MAX, @@ -250,6 +257,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = { .dellink = unregister_vlan_dev, .get_size = vlan_get_size, .fill_info = vlan_fill_info, + .get_link_net = vlan_get_link_net, }; int __init vlan_netlink_init(void) diff --git a/net/Makefile b/net/Makefile index 95fc694e4ddc..38704bdf941a 100644 --- a/net/Makefile +++ b/net/Makefile @@ -5,8 +5,6 @@ # Rewritten to use lists instead of if-statements. # -obj-y := nonet.o - obj-$(CONFIG_NET) := socket.o core/ tmp-$(CONFIG_COMPAT) := compat.o diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 11660a3aab5a..c6fc8f756c9a 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -62,6 +62,7 @@ config BATMAN_ADV_MCAST config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV + depends on DEBUG_FS help This is an option for use by developers; most people should say N here. This enables compilation of support for diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1e8053976e83..00e00e09b000 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -26,9 +26,8 @@ #include "bat_algo.h" #include "network-coding.h" - /** - * batadv_dup_status - duplicate status + * enum batadv_dup_status - duplicate status * @BATADV_NO_DUP: the packet is a duplicate * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the * neighbor) @@ -517,7 +516,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @packet_len: (total) length of the OGM * @send_time: timestamp (jiffies) when the packet is to be sent - * @direktlink: true if this is a direct link packet + * @directlink: true if this is a direct link packet * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @forw_packet: the forwarded packet which should be checked @@ -879,7 +878,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bat_iv.bcast_own[word_index]); + word = &orig_node->bat_iv.bcast_own[word_index]; batadv_bit_get_packet(bat_priv, word, 1, 0); if_num = hard_iface->if_num; @@ -1362,10 +1361,10 @@ out: return ret; } - /** * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if * @skb: the skb containing the OGM + * @ogm_offset: offset from skb->data to start of ogm header * @orig_node: the (cached) orig node for the originator of this OGM * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered @@ -1664,7 +1663,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, offset = if_num * BATADV_NUM_WORDS; spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); - word = &(orig_neigh_node->bat_iv.bcast_own[offset]); + word = &orig_neigh_node->bat_iv.bcast_own[offset]; bit_pos = if_incoming_seqno - 2; bit_pos -= ntohl(ogm_packet->seqno); batadv_set_bit(word, bit_pos); @@ -1902,10 +1901,10 @@ out: * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than * neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison - * @if_outgoing: outgoing interface for the first neighbor + * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor - + * * Returns true if the metric via neigh1 is equally good or better than * the metric via neigh2, false otherwise. */ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 9586750022f5..e3da07a64026 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -29,7 +29,6 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n) bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE); } - /* receive and process one packet within the sequence number window. * * returns: diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index cc2407351d36..2acaafe60188 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -29,8 +29,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, diff = last_seqno - curr_seqno; if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) return 0; - else - return test_bit(diff, seq_bits) != 0; + return test_bit(diff, seq_bits) != 0; } /* turn corresponding bit on, so we can remember that we got the packet */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index a957c8140721..ac4b96eccade 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -69,7 +69,6 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data, return hash % size; } - /* compares address and vid of two backbone gws */ static int batadv_compare_backbone_gw(const struct hlist_node *node, const void *data2) @@ -245,14 +244,14 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) spin_unlock_bh(list_lock); } - /* all claims gone, intialize CRC */ + /* all claims gone, initialize CRC */ backbone_gw->crc = BATADV_BLA_CRC_INIT; } /** * batadv_bla_send_claim - sends a claim frame according to the provided info * @bat_priv: the bat priv with all the soft interface information - * @orig: the mac address to be announced within the claim + * @mac: the mac address to be announced within the claim * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ @@ -364,6 +363,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @orig: the mac address of the originator * @vid: the VLAN ID + * @own_backbone: set if the requested backbone is local * * searches for the backbone gw or creates a new one if it could not * be found. @@ -454,6 +454,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, /** * batadv_bla_answer_request - answer a bla request by sending own claims * @bat_priv: the bat priv with all the soft interface information + * @primary_if: interface where the request came on * @vid: the vid where the request came on * * Repeat all of our own claims, and finally send an ANNOUNCE frame @@ -660,7 +661,6 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, if (unlikely(!backbone_gw)) return 1; - /* handle as ANNOUNCE frame */ backbone_gw->lasttime = jiffies; crc = ntohs(*((__be16 *)(&an_addr[4]))); @@ -775,6 +775,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, /** * batadv_check_claim_group * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary interface of this batman interface * @hw_src: the Hardware source in the ARP Header * @hw_dst: the Hardware destination in the ARP Header * @ethhdr: pointer to the Ethernet header of the claim frame @@ -846,10 +847,10 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, return 2; } - /** * batadv_bla_process_claim * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary hard interface of this batman soft interface * @skb: the frame to be checked * * Check if this is a claim frame, and process it accordingly. @@ -1327,7 +1328,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, goto out; } /* not found, add a new entry (overwrite the oldest entry) - * and allow it, its the first occurence. + * and allow it, its the first occurrence. */ curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1); curr %= BATADV_DUPLIST_SIZE; @@ -1343,8 +1344,6 @@ out: return ret; } - - /** * batadv_bla_is_backbone_gw_orig * @bat_priv: the bat priv with all the soft interface information @@ -1386,7 +1385,6 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig, return false; } - /** * batadv_bla_is_backbone_gw * @skb: the frame to be checked @@ -1476,7 +1474,6 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto allow; - if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index a12e25efaf6f..a4972874c056 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -233,7 +233,6 @@ static int batadv_debug_log_setup(struct batadv_priv *bat_priv) static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) { - return; } #endif @@ -405,6 +404,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ .release = single_release, \ }, \ } + static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, batadv_originators_hardif_open); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b5981113c9a7..aad022dd15df 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1100,6 +1100,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); } + /** * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local * DAT storage only diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index d76e1d06c5b5..2fe0764c64be 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -25,9 +25,7 @@ #include <linux/if_arp.h> -/** - * BATADV_DAT_ADDR_MAX - maximum address value in the DHT space - */ +/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */ #define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) void batadv_dat_status_update(struct net_device *net_dev); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index fc1835c6bb40..3d1dcaa3e8b5 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -23,7 +23,6 @@ #include "hard-interface.h" #include "soft-interface.h" - /** * batadv_frag_clear_chain - delete entries in the fragment buffer chain * @head: head of chain with entries. @@ -251,7 +250,7 @@ batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb) kfree(entry); /* Make room for the rest of the fragments. */ - if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) { + if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { kfree_skb(skb_out); skb_out = NULL; goto free; @@ -434,7 +433,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb, * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE */ mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE); - max_fragment_size = (mtu - header_size - ETH_HLEN); + max_fragment_size = mtu - header_size; max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; /* Don't even try to fragment, if we need more than 16 fragments */ diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 5d7a0e66a22b..d848cf6676a2 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -41,8 +41,7 @@ batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) if (!hlist_empty(&frags_entry->head) && batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT)) return true; - else - return false; + return false; } #endif /* _NET_BATMAN_ADV_FRAGMENTATION_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 90cff585b37d..27649e85f3f6 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -775,6 +775,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, return ret; } + /** * batadv_gw_out_of_range - check if the dhcp request destination is the best gw * @bat_priv: the bat priv with all the soft interface information @@ -810,7 +811,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, goto out; gw_node = batadv_gw_node_get(bat_priv, orig_dst_node); - if (!gw_node->bandwidth_down == 0) + if (!gw_node) goto out; switch (atomic_read(&bat_priv->gw_mode)) { diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d1183e882167..12fc77bef23f 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -41,7 +41,6 @@ #include "network-coding.h" #include "fragmentation.h" - /* List manipulations on hardif_list have to be rtnl_lock()'ed, * list traversals just rcu-locked */ @@ -403,6 +402,9 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_free; } + /* reset control block to avoid left overs from previous users */ + memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); + /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ @@ -651,7 +653,7 @@ static struct batadv_tvlv_handler /** * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and * possibly free it - * @tvlv_handler: the tvlv container to free + * @tvlv: the tvlv container to free */ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) { @@ -796,11 +798,11 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accomodate + * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate * requested packet size * @packet_buff: packet buffer * @packet_buff_len: packet buffer size - * @packet_min_len: requested packet minimum size + * @min_packet_len: requested packet minimum size * @additional_packet_len: requested additional packet size on top of minimum * size * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index a1fcd884f0b1..4d2318829a34 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2014.4.0" +#define BATADV_SOURCE_VERSION "2015.0" #endif /* B.A.T.M.A.N. parameters */ @@ -92,9 +92,8 @@ /* numbers of originator to contact for any PUT/GET DHT operation */ #define BATADV_DAT_CANDIDATES_NUM 3 -/** - * BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ - * at most from the primary one in order to be still considered acceptable +/* BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ + * at most from the primary one in order to be still considered acceptable */ #define BATADV_TQ_SIMILARITY_THRESHOLD 50 @@ -313,10 +312,10 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, * - when adding 128 - it is neither a predecessor nor a successor, * - after adding more than 127 to the starting value - it is a successor */ -#define batadv_seq_before(x, y) ({typeof(x) _d1 = (x); \ - typeof(y) _d2 = (y); \ - typeof(x) _dummy = (_d1 - _d2); \ - (void) (&_d1 == &_d2); \ +#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \ + typeof(y)_d2 = (y); \ + typeof(x)_dummy = (_d1 - _d2); \ + (void)(&_d1 == &_d2); \ _dummy > batadv_smallest_signed_int(_dummy); }) #define batadv_seq_after(x, y) batadv_seq_before(y, x) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index ab6bb2af1d45..b24e4bb64fb5 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -685,11 +685,13 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, if (orig_initialized) atomic_dec(&bat_priv->mcast.num_disabled); orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST; - /* If mcast support is being switched off increase the disabled - * mcast node counter. + /* If mcast support is being switched off or if this is an initial + * OGM without mcast support then increase the disabled mcast + * node counter. */ } else if (!orig_mcast_enabled && - orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) { + (orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST || + !orig_initialized)) { atomic_inc(&bat_priv->mcast.num_disabled); orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST; } @@ -738,7 +740,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; - if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) + if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) && + orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST) atomic_dec(&bat_priv->mcast.num_disabled); batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 73b5d45819c1..3a44ebdb43cb 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -50,7 +50,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv) { - return; } static inline enum batadv_forw_mode @@ -67,12 +66,10 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv) static inline void batadv_mcast_free(struct batadv_priv *bat_priv) { - return; } static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node) { - return; } #endif /* CONFIG_BATMAN_ADV_MCAST */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 8d04d174669e..127cc4d7380a 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -133,7 +133,7 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) if (!bat_priv->nc.decoding_hash) goto err; - batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); @@ -1212,8 +1212,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, { if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) return false; - else - return true; + return true; } /** diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6a484514cd3e..90e805aba379 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -570,9 +570,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) batadv_frag_purge_orig(orig_node, NULL); - batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, -1, - "originator timed out"); - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); @@ -678,6 +675,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; + orig_node->last_seen = jiffies; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; #ifdef CONFIG_BATMAN_ADV_MCAST @@ -799,7 +797,6 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, return ifinfo_purged; } - /** * batadv_purge_orig_neighbors - purges neighbors from originator * @bat_priv: the bat priv with all the soft interface information @@ -977,6 +974,9 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) if (batadv_purge_orig_node(bat_priv, orig_node)) { batadv_gw_node_delete(bat_priv, orig_node); hlist_del_rcu(&orig_node->hash_entry); + batadv_tt_global_del_orig(orig_node->bat_priv, + orig_node, -1, + "originator timed out"); batadv_orig_node_free_ref(orig_node); continue; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index db3a9ed734cb..aa4a43696295 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -70,7 +70,6 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid); void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan); - /* hashfunction to choose an entry in a hash table of given size * hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 34e096d2dce1..b81fbbf21a63 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -198,6 +198,7 @@ struct batadv_bla_claim_dst { uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ }; + #pragma pack() /** @@ -376,7 +377,7 @@ struct batadv_frag_packet { uint8_t reserved:4; uint8_t no:4; #else -#error "unknown bitfield endianess" +#error "unknown bitfield endianness" #endif uint8_t dest[ETH_ALEN]; uint8_t orig[ETH_ALEN]; @@ -452,7 +453,7 @@ struct batadv_coded_packet { * @src: address of the source * @dst: address of the destination * @tvlv_len: length of tvlv data following the unicast tvlv header - * @align: 2 bytes to align the header to a 4 byte boundry + * @align: 2 bytes to align the header to a 4 byte boundary */ struct batadv_unicast_tvlv_packet { uint8_t packet_type; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 35f76f2f7824..da83982bf974 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -292,7 +292,6 @@ out: return ret; } - int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -443,11 +442,13 @@ batadv_find_router(struct batadv_priv *bat_priv, router = batadv_orig_router_get(orig_node, recv_if); + if (!router) + return router; + /* only consider bonding for recv_if == BATADV_IF_DEFAULT (first hop) * and if activated. */ - if (recv_if == BATADV_IF_DEFAULT || !atomic_read(&bat_priv->bonding) || - !router) + if (!(recv_if == BATADV_IF_DEFAULT && atomic_read(&bat_priv->bonding))) return router; /* bonding: loop through the list of possible routers found @@ -455,7 +456,7 @@ batadv_find_router(struct batadv_priv *bat_priv, * the last chosen bonding candidate (next_candidate). If no such * router is found, use the first candidate found (the previously * chosen bonding candidate might have been the last one in the list). - * If this can't be found either, return the previously choosen + * If this can't be found either, return the previously chosen * router - obviously there are no other candidates. */ rcu_read_lock(); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5467955eb27c..5ec31d7de24f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -36,7 +36,6 @@ #include "bridge_loop_avoidance.h" #include "network-coding.h" - static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index f40cb0436eba..a75dc12f96f8 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -151,7 +151,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ batadv_store_##_name) - #define BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func) \ ssize_t batadv_store_##_name(struct kobject *kobj, \ struct attribute *attr, char *buff, \ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5f59e7f899a0..07b263a437d1 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1780,7 +1780,6 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, batadv_tt_global_del_roaming(bat_priv, tt_global_entry, orig_node, message); - out: if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); @@ -2769,9 +2768,8 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, { if (batadv_is_my_mac(bat_priv, req_dst)) return batadv_send_my_tt_response(bat_priv, tt_data, req_src); - else - return batadv_send_other_tt_response(bat_priv, tt_data, - req_src, req_dst); + return batadv_send_other_tt_response(bat_priv, tt_data, req_src, + req_dst); } static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, @@ -2854,7 +2852,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, /** * batadv_is_my_client - check if a client is served by the local node * @bat_priv: the bat priv with all the soft interface information - * @addr: the mac adress of the client to check + * @addr: the mac address of the client to check * @vid: VLAN identifier * * Returns true if the client is served by this node, false otherwise. diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 8854c05622a9..9398c3fb4174 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -199,7 +199,6 @@ struct batadv_orig_bat_iv { /** * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh * @orig: originator ethernet address - * @primary_addr: hosts primary interface address * @ifinfo_list: list for routers per outgoing interface * @last_bonding_candidate: pointer to last ifinfo of last used router * @batadv_dat_addr_t: address of the orig node in the distributed hash @@ -244,7 +243,6 @@ struct batadv_orig_bat_iv { */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; - uint8_t primary_addr[ETH_ALEN]; struct hlist_head ifinfo_list; struct batadv_orig_ifinfo *last_bonding_candidate; #ifdef CONFIG_BATMAN_ADV_DAT @@ -970,7 +968,7 @@ struct batadv_tt_orig_list_entry { }; /** - * struct batadv_tt_change_node - structure for tt changes occured + * struct batadv_tt_change_node - structure for tt changes occurred * @list: list node for batadv_priv_tt::changes_list * @change: holds the actual translation table diff data */ diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 76617be1e797..1742b849fcff 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -31,7 +31,7 @@ #define VERSION "0.1" -static struct dentry *lowpan_psm_debugfs; +static struct dentry *lowpan_enable_debugfs; static struct dentry *lowpan_control_debugfs; #define IFACE_NAME_TEMPLATE "bt%d" @@ -55,11 +55,7 @@ struct skb_cb { static LIST_HEAD(bt_6lowpan_devices); static DEFINE_SPINLOCK(devices_lock); -/* If psm is set to 0 (default value), then 6lowpan is disabled. - * Other values are used to indicate a Protocol Service Multiplexer - * value for 6lowpan. - */ -static u16 psm_6lowpan; +static bool enable_6lowpan; /* We are listening incoming connections via this channel */ @@ -390,7 +386,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, drop: dev->stats.rx_dropped++; - kfree_skb(skb); return NET_RX_DROP; } @@ -762,7 +757,7 @@ static bool is_bt_6lowpan(struct hci_conn *hcon) if (hcon->type != LE_LINK) return false; - if (!psm_6lowpan) + if (!enable_6lowpan) return false; return true; @@ -1086,7 +1081,7 @@ static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type) if (!pchan) return -EINVAL; - err = l2cap_chan_connect(pchan, cpu_to_le16(psm_6lowpan), 0, + err = l2cap_chan_connect(pchan, cpu_to_le16(L2CAP_PSM_IPSP), 0, addr, dst_type); BT_DBG("chan %p err %d", pchan, err); @@ -1119,7 +1114,7 @@ static struct l2cap_chan *bt_6lowpan_listen(void) struct l2cap_chan *pchan; int err; - if (psm_6lowpan == 0) + if (!enable_6lowpan) return NULL; pchan = chan_get(); @@ -1131,10 +1126,9 @@ static struct l2cap_chan *bt_6lowpan_listen(void) atomic_set(&pchan->nesting, L2CAP_NESTING_PARENT); - BT_DBG("psm 0x%04x chan %p src type %d", psm_6lowpan, pchan, - pchan->src_type); + BT_DBG("chan %p src type %d", pchan, pchan->src_type); - err = l2cap_add_psm(pchan, addr, cpu_to_le16(psm_6lowpan)); + err = l2cap_add_psm(pchan, addr, cpu_to_le16(L2CAP_PSM_IPSP)); if (err) { l2cap_chan_put(pchan); BT_ERR("psm cannot be added err %d", err); @@ -1220,22 +1214,23 @@ static void disconnect_all_peers(void) spin_unlock(&devices_lock); } -struct set_psm { +struct set_enable { struct work_struct work; - u16 psm; + bool flag; }; -static void do_psm_set(struct work_struct *work) +static void do_enable_set(struct work_struct *work) { - struct set_psm *set_psm = container_of(work, struct set_psm, work); + struct set_enable *set_enable = container_of(work, + struct set_enable, work); - if (set_psm->psm == 0 || psm_6lowpan != set_psm->psm) + if (!set_enable->flag || enable_6lowpan != set_enable->flag) /* Disconnect existing connections if 6lowpan is - * disabled (psm = 0), or if psm changes. + * disabled */ disconnect_all_peers(); - psm_6lowpan = set_psm->psm; + enable_6lowpan = set_enable->flag; if (listen_chan) { l2cap_chan_close(listen_chan, 0); @@ -1244,33 +1239,33 @@ static void do_psm_set(struct work_struct *work) listen_chan = bt_6lowpan_listen(); - kfree(set_psm); + kfree(set_enable); } -static int lowpan_psm_set(void *data, u64 val) +static int lowpan_enable_set(void *data, u64 val) { - struct set_psm *set_psm; + struct set_enable *set_enable; - set_psm = kzalloc(sizeof(*set_psm), GFP_KERNEL); - if (!set_psm) + set_enable = kzalloc(sizeof(*set_enable), GFP_KERNEL); + if (!set_enable) return -ENOMEM; - set_psm->psm = val; - INIT_WORK(&set_psm->work, do_psm_set); + set_enable->flag = !!val; + INIT_WORK(&set_enable->work, do_enable_set); - schedule_work(&set_psm->work); + schedule_work(&set_enable->work); return 0; } -static int lowpan_psm_get(void *data, u64 *val) +static int lowpan_enable_get(void *data, u64 *val) { - *val = psm_6lowpan; + *val = enable_6lowpan; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_psm_fops, lowpan_psm_get, - lowpan_psm_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get, + lowpan_enable_set, "%llu\n"); static ssize_t lowpan_control_write(struct file *fp, const char __user *user_buffer, @@ -1440,9 +1435,9 @@ static struct notifier_block bt_6lowpan_dev_notifier = { static int __init bt_6lowpan_init(void) { - lowpan_psm_debugfs = debugfs_create_file("6lowpan_psm", 0644, - bt_debugfs, NULL, - &lowpan_psm_fops); + lowpan_enable_debugfs = debugfs_create_file("6lowpan_enable", 0644, + bt_debugfs, NULL, + &lowpan_enable_fops); lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644, bt_debugfs, NULL, &lowpan_control_fops); @@ -1452,7 +1447,7 @@ static int __init bt_6lowpan_init(void) static void __exit bt_6lowpan_exit(void) { - debugfs_remove(lowpan_psm_debugfs); + debugfs_remove(lowpan_enable_debugfs); debugfs_remove(lowpan_control_debugfs); if (listen_chan) { diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 29bcafc41adf..7de74635a110 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -64,4 +64,31 @@ config BT_6LOWPAN help IPv6 compression over Bluetooth Low Energy. +config BT_SELFTEST + bool "Bluetooth self testing support" + depends on BT && DEBUG_KERNEL + help + Run self tests when initializing the Bluetooth subsystem. This + is a developer option and can cause significant delay when booting + the system. + + When the Bluetooth subsystem is built as module, then the test + cases are run first thing at module load time. When the Bluetooth + subsystem is compiled into the kernel image, then the test cases + are run late in the initcall hierarchy. + +config BT_SELFTEST_ECDH + bool "ECDH test cases" + depends on BT_LE && BT_SELFTEST + help + Run test cases for ECDH cryptographic functionality used by the + Bluetooth Low Energy Secure Connections feature. + +config BT_SELFTEST_SMP + bool "SMP test cases" + depends on BT_LE && BT_SELFTEST + help + Run test cases for SMP cryptographic functionality, including both + legacy SMP as well as the Secure Connections features. + source "drivers/bluetooth/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index a5432a6a0ae6..8e96e3072266 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -13,6 +13,8 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ - a2mp.o amp.o ecc.o + a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o + +bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 012e3b03589d..ce22e0cfa923 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,6 +31,8 @@ #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> +#include "selftest.h" + #define VERSION "2.20" /* Bluetooth sockets */ @@ -716,6 +718,10 @@ static int __init bt_init(void) BT_INFO("Core ver %s", VERSION); + err = bt_selftest(); + if (err < 0) + return err; + bt_debugfs = debugfs_create_dir("bluetooth", NULL); err = bt_sysfs_init(); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 85bcc21e84d2..05f57e491ccb 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -511,13 +511,12 @@ static int bnep_session(void *arg) static struct device *bnep_get_device(struct bnep_session *session) { - struct hci_conn *conn; + struct l2cap_conn *conn = l2cap_pi(session->sock->sk)->chan->conn; - conn = l2cap_pi(session->sock->sk)->chan->conn->hcon; - if (!conn) + if (!conn || !conn->hcon) return NULL; - return &conn->dev; + return &conn->hcon->dev; } static struct device_type bnep_type = { @@ -533,6 +532,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) BT_DBG(""); + if (!l2cap_is_socket(sock)) + return -EBADFD; + baswap((void *) dst, &l2cap_pi(sock->sk)->chan->dst); baswap((void *) src, &l2cap_pi(sock->sk)->chan->src); diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 1ca8a87a0787..75bd2c42e3e7 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -253,8 +253,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 15) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 10); - if (!info && ctrl) { int len = min_t(uint, CAPI_MANUFACTURER_LEN, skb->data[CAPI_MSG_BASELEN + 14]); @@ -270,8 +268,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 32) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); - if (!info && ctrl) { ctrl->version.majorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 16); ctrl->version.minorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 20); @@ -285,8 +281,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 17) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); - if (!info && ctrl) { int len = min_t(uint, CAPI_SERIAL_LEN, skb->data[CAPI_MSG_BASELEN + 16]); diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 67fe5e84e68f..278a194e6af4 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -334,6 +334,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) BT_DBG(""); + if (!l2cap_is_socket(sock)) + return -EBADFD; + session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); if (!session) return -ENOMEM; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 79d84b88b8f0..c9b8fa544785 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -25,11 +25,13 @@ /* Bluetooth HCI connection handling. */ #include <linux/export.h> +#include <linux/debugfs.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> +#include "hci_request.h" #include "smp.h" #include "a2mp.h" @@ -546,6 +548,8 @@ int hci_conn_del(struct hci_conn *conn) hci_conn_del_sysfs(conn); + debugfs_remove_recursive(conn->debugfs); + if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); @@ -629,7 +633,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_reenable_advertising(hdev); } -static void create_le_conn_complete(struct hci_dev *hdev, u8 status) +static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_conn *conn; @@ -661,7 +665,7 @@ static void hci_req_add_le_create_conn(struct hci_request *req, memset(&cp, 0, sizeof(cp)); /* Update random address, but set require_privacy to false so - * that we never connect with an unresolvable address. + * that we never connect with an non-resolvable address. */ if (hci_update_random_address(req, false, &own_addr_type)) return; @@ -1080,21 +1084,6 @@ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level) } EXPORT_SYMBOL(hci_conn_check_secure); -/* Change link key */ -int hci_conn_change_link_key(struct hci_conn *conn) -{ - BT_DBG("hcon %p", conn); - - if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { - struct hci_cp_change_conn_link_key cp; - cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, - sizeof(cp), &cp); - } - - return 0; -} - /* Switch role */ int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 93f92a085506..3322d3f4c85a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -37,6 +37,8 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" +#include "hci_debugfs.h" #include "smp.h" static void hci_rx_work(struct work_struct *work); @@ -137,941 +139,9 @@ static const struct file_operations dut_mode_fops = { .llseek = default_llseek, }; -static int features_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - u8 p; - - hci_dev_lock(hdev); - for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { - seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p, - hdev->features[p][0], hdev->features[p][1], - hdev->features[p][2], hdev->features[p][3], - hdev->features[p][4], hdev->features[p][5], - hdev->features[p][6], hdev->features[p][7]); - } - if (lmp_le_capable(hdev)) - seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", - hdev->le_features[0], hdev->le_features[1], - hdev->le_features[2], hdev->le_features[3], - hdev->le_features[4], hdev->le_features[5], - hdev->le_features[6], hdev->le_features[7]); - hci_dev_unlock(hdev); - - return 0; -} - -static int features_open(struct inode *inode, struct file *file) -{ - return single_open(file, features_show, inode->i_private); -} - -static const struct file_operations features_fops = { - .open = features_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int blacklist_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->blacklist, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - hci_dev_unlock(hdev); - - return 0; -} - -static int blacklist_open(struct inode *inode, struct file *file) -{ - return single_open(file, blacklist_show, inode->i_private); -} - -static const struct file_operations blacklist_fops = { - .open = blacklist_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int uuids_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct bt_uuid *uuid; - - hci_dev_lock(hdev); - list_for_each_entry(uuid, &hdev->uuids, list) { - u8 i, val[16]; - - /* The Bluetooth UUID values are stored in big endian, - * but with reversed byte order. So convert them into - * the right order for the %pUb modifier. - */ - for (i = 0; i < 16; i++) - val[i] = uuid->uuid[15 - i]; - - seq_printf(f, "%pUb\n", val); - } - hci_dev_unlock(hdev); - - return 0; -} - -static int uuids_open(struct inode *inode, struct file *file) -{ - return single_open(file, uuids_show, inode->i_private); -} - -static const struct file_operations uuids_fops = { - .open = uuids_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int inquiry_cache_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct discovery_state *cache = &hdev->discovery; - struct inquiry_entry *e; - - hci_dev_lock(hdev); - - list_for_each_entry(e, &cache->all, all) { - struct inquiry_data *data = &e->data; - seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", - &data->bdaddr, - data->pscan_rep_mode, data->pscan_period_mode, - data->pscan_mode, data->dev_class[2], - data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), - data->rssi, data->ssp_mode, e->timestamp); - } - - hci_dev_unlock(hdev); - - return 0; -} - -static int inquiry_cache_open(struct inode *inode, struct file *file) -{ - return single_open(file, inquiry_cache_show, inode->i_private); -} - -static const struct file_operations inquiry_cache_fops = { - .open = inquiry_cache_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int link_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct link_key *key; - - rcu_read_lock(); - list_for_each_entry_rcu(key, &hdev->link_keys, list) - seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type, - HCI_LINK_KEY_SIZE, key->val, key->pin_len); - rcu_read_unlock(); - - return 0; -} - -static int link_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, link_keys_show, inode->i_private); -} - -static const struct file_operations link_keys_fops = { - .open = link_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int dev_class_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2], - hdev->dev_class[1], hdev->dev_class[0]); - hci_dev_unlock(hdev); - - return 0; -} - -static int dev_class_open(struct inode *inode, struct file *file) -{ - return single_open(file, dev_class_show, inode->i_private); -} - -static const struct file_operations dev_class_fops = { - .open = dev_class_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int voice_setting_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->voice_setting; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get, - NULL, "0x%4.4llx\n"); - -static int auto_accept_delay_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - hdev->auto_accept_delay = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int auto_accept_delay_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->auto_accept_delay; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, - auto_accept_delay_set, "%llu\n"); - -static ssize_t force_sc_support_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_sc_support_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (test_bit(HCI_UP, &hdev->flags)) - return -EBUSY; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_SC, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_sc_support_fops = { - .open = simple_open, - .read = force_sc_support_read, - .write = force_sc_support_write, - .llseek = default_llseek, -}; - -static ssize_t force_lesc_support_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_LESC, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_lesc_support_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_LESC, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_lesc_support_fops = { - .open = simple_open, - .read = force_lesc_support_read, - .write = force_lesc_support_write, - .llseek = default_llseek, -}; - -static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static const struct file_operations sc_only_mode_fops = { - .open = simple_open, - .read = sc_only_mode_read, - .llseek = default_llseek, -}; - -static int idle_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val != 0 && (val < 500 || val > 3600000)) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->idle_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int idle_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->idle_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, - idle_timeout_set, "%llu\n"); - -static int rpa_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - /* Require the RPA timeout to be at least 30 seconds and at most - * 24 hours. - */ - if (val < 30 || val > (60 * 60 * 24)) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->rpa_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int rpa_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->rpa_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, - rpa_timeout_set, "%llu\n"); - -static int sniff_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->sniff_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int sniff_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->sniff_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, - sniff_min_interval_set, "%llu\n"); - -static int sniff_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->sniff_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int sniff_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->sniff_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, - sniff_max_interval_set, "%llu\n"); - -static int conn_info_min_age_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val > hdev->conn_info_max_age) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->conn_info_min_age = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_info_min_age_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->conn_info_min_age; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, - conn_info_min_age_set, "%llu\n"); - -static int conn_info_max_age_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val < hdev->conn_info_min_age) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->conn_info_max_age = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_info_max_age_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->conn_info_max_age; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, - conn_info_max_age_set, "%llu\n"); - -static int identity_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - bdaddr_t addr; - u8 addr_type; - - hci_dev_lock(hdev); - - hci_copy_identity_address(hdev, &addr, &addr_type); - - seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type, - 16, hdev->irk, &hdev->rpa); - - hci_dev_unlock(hdev); - - return 0; -} - -static int identity_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_show, inode->i_private); -} - -static const struct file_operations identity_fops = { - .open = identity_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int random_address_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "%pMR\n", &hdev->random_addr); - hci_dev_unlock(hdev); - - return 0; -} - -static int random_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, random_address_show, inode->i_private); -} - -static const struct file_operations random_address_fops = { - .open = random_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int static_address_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "%pMR\n", &hdev->static_addr); - hci_dev_unlock(hdev); - - return 0; -} - -static int static_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, static_address_show, inode->i_private); -} - -static const struct file_operations static_address_fops = { - .open = static_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static ssize_t force_static_address_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_static_address_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (test_bit(HCI_UP, &hdev->flags)) - return -EBUSY; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_static_address_fops = { - .open = simple_open, - .read = force_static_address_read, - .write = force_static_address_write, - .llseek = default_llseek, -}; - -static int white_list_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->le_white_list, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - hci_dev_unlock(hdev); - - return 0; -} - -static int white_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, white_list_show, inode->i_private); -} - -static const struct file_operations white_list_fops = { - .open = white_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int identity_resolving_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct smp_irk *irk; - - rcu_read_lock(); - list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { - seq_printf(f, "%pMR (type %u) %*phN %pMR\n", - &irk->bdaddr, irk->addr_type, - 16, irk->val, &irk->rpa); - } - rcu_read_unlock(); - - return 0; -} - -static int identity_resolving_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_resolving_keys_show, - inode->i_private); -} - -static const struct file_operations identity_resolving_keys_fops = { - .open = identity_resolving_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int long_term_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct smp_ltk *ltk; - - rcu_read_lock(); - list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list) - seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n", - <k->bdaddr, ltk->bdaddr_type, ltk->authenticated, - ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv), - __le64_to_cpu(ltk->rand), 16, ltk->val); - rcu_read_unlock(); - - return 0; -} - -static int long_term_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, long_term_keys_show, inode->i_private); -} - -static const struct file_operations long_term_keys_fops = { - .open = long_term_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int conn_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, - conn_min_interval_set, "%llu\n"); - -static int conn_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, - conn_max_interval_set, "%llu\n"); - -static int conn_latency_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val > 0x01f3) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_latency = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_latency_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_latency; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, - conn_latency_set, "%llu\n"); - -static int supervision_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x000a || val > 0x0c80) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_supv_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int supervision_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_supv_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, - supervision_timeout_set, "%llu\n"); - -static int adv_channel_map_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x01 || val > 0x07) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_channel_map = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_channel_map_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_channel_map; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, - adv_channel_map_set, "%llu\n"); - -static int adv_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, - adv_min_interval_set, "%llu\n"); - -static int adv_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, - adv_max_interval_set, "%llu\n"); - -static int device_list_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct hci_conn_params *p; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->whitelist, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - list_for_each_entry(p, &hdev->le_conn_params, list) { - seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type, - p->auto_connect); - } - hci_dev_unlock(hdev); - - return 0; -} - -static int device_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, device_list_show, inode->i_private); -} - -static const struct file_operations device_list_fops = { - .open = device_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* ---- HCI requests ---- */ -static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode) { BT_DBG("%s result 0x%2.2x", hdev->name, result); @@ -1373,8 +443,6 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt) static void bredr_setup(struct hci_request *req) { - struct hci_dev *hdev = req->hdev; - __le16 param; __u8 flt_type; @@ -1403,14 +471,6 @@ static void bredr_setup(struct hci_request *req) /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); - - /* AVM Berlin (31), aka "BlueFRITZ!", reports version 1.2, - * but it does not support page scan related HCI commands. - */ - if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) { - hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); - hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL); - } } static void le_setup(struct hci_request *req) @@ -1437,43 +497,6 @@ static void le_setup(struct hci_request *req) set_bit(HCI_LE_ENABLED, &hdev->dev_flags); } -static u8 hci_get_inquiry_mode(struct hci_dev *hdev) -{ - if (lmp_ext_inq_capable(hdev)) - return 0x02; - - if (lmp_inq_rssi_capable(hdev)) - return 0x01; - - if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && - hdev->lmp_subver == 0x0757) - return 0x01; - - if (hdev->manufacturer == 15) { - if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) - return 0x01; - if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) - return 0x01; - if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) - return 0x01; - } - - if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && - hdev->lmp_subver == 0x1805) - return 0x01; - - return 0x00; -} - -static void hci_setup_inquiry_mode(struct hci_request *req) -{ - u8 mode; - - mode = hci_get_inquiry_mode(req->hdev); - - hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); -} - static void hci_setup_event_mask(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -1563,10 +586,16 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (lmp_le_capable(hdev)) le_setup(req); - /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read - * local supported commands HCI command. + /* All Bluetooth 1.2 and later controllers should support the + * HCI command for reading the local supported commands. + * + * Unfortunately some controllers indicate Bluetooth 1.2 support, + * but do not have support for this command. If that is the case, + * the driver can quirk the behavior and skip reading the local + * supported commands. */ - if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) + if (hdev->hci_ver > BLUETOOTH_VER_1_1 && + !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks)) hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { @@ -1580,6 +609,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { u8 mode = 0x01; + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); } else { @@ -1592,8 +622,18 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) } } - if (lmp_inq_rssi_capable(hdev)) - hci_setup_inquiry_mode(req); + if (lmp_inq_rssi_capable(hdev) || + test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) { + u8 mode; + + /* If Extended Inquiry Result events are supported, then + * they are clearly preferred over Inquiry Result with RSSI + * events. + */ + mode = lmp_ext_inq_capable(hdev) ? 0x02 : 0x01; + + hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); + } if (lmp_inq_tx_pwr_capable(hdev)) hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); @@ -1692,32 +732,27 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - /* Some Broadcom based Bluetooth controllers do not support the - * Delete Stored Link Key command. They are clearly indicating its - * absence in the bit mask of supported commands. - * - * Check the supported commands and only if the the command is marked - * as supported send it. If not supported assume that the controller - * does not have actual support for stored link keys which makes this - * command redundant anyway. - * - * Some controllers indicate that they support handling deleting - * stored link keys, but they don't. The quirk lets a driver - * just disable this command. - */ - if (hdev->commands[6] & 0x80 && - !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { - struct hci_cp_delete_stored_link_key cp; + if (hdev->commands[6] & 0x20) { + struct hci_cp_read_stored_link_key cp; bacpy(&cp.bdaddr, BDADDR_ANY); - cp.delete_all = 0x01; - hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, - sizeof(cp), &cp); + cp.read_all = 0x01; + hci_req_add(req, HCI_OP_READ_STORED_LINK_KEY, sizeof(cp), &cp); } if (hdev->commands[5] & 0x10) hci_setup_link_policy(req); + if (hdev->commands[8] & 0x01) + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + + /* Some older Broadcom based Bluetooth 1.2 controllers do not + * support the Read Page Scan Type command. Check support for + * this command in the bit mask of supported commands. + */ + if (hdev->commands[13] & 0x01) + hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL); + if (lmp_le_capable(hdev)) { u8 events[8]; @@ -1735,6 +770,12 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) * Parameter Request */ + /* If the controller supports the Data Length Extension + * feature, enable the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) + events[0] |= 0x40; /* LE Data Length Change */ + /* If the controller supports Extended Scanner Filter * Policies, enable the correspondig event. */ @@ -1765,6 +806,14 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); } + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { + /* Read LE Maximum Data Length */ + hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); + + /* Read LE Suggested Default Data Length */ + hci_req_add(req, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL); + } + hci_set_le_support(req); } @@ -1782,6 +831,29 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; + /* Some Broadcom based Bluetooth controllers do not support the + * Delete Stored Link Key command. They are clearly indicating its + * absence in the bit mask of supported commands. + * + * Check the supported commands and only if the the command is marked + * as supported send it. If not supported assume that the controller + * does not have actual support for stored link keys which makes this + * command redundant anyway. + * + * Some controllers indicate that they support handling deleting + * stored link keys, but they don't. The quirk lets a driver + * just disable this command. + */ + if (hdev->commands[6] & 0x80 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { + struct hci_cp_delete_stored_link_key cp; + + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.delete_all = 0x01; + hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, + sizeof(cp), &cp); + } + /* Set event mask page 2 if the HCI command for it is supported */ if (hdev->commands[22] & 0x04) hci_set_event_mask_page_2(req); @@ -1799,8 +871,10 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); /* Enable Secure Connections if supported and configured */ - if (bredr_sc_enabled(hdev)) { + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + bredr_sc_enabled(hdev)) { u8 support = 0x01; + hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, sizeof(support), &support); } @@ -1841,110 +915,29 @@ static int __hci_init(struct hci_dev *hdev) if (err < 0) return err; - /* Only create debugfs entries during the initial setup - * phase and not every time the controller gets powered on. + /* This function is only called when the controller is actually in + * configured state. When the controller is marked as unconfigured, + * this initialization procedure is not run. + * + * It means that it is possible that a controller runs through its + * setup phase and then discovers missing settings. If that is the + * case, then this function will not be called. It then will only + * be called during the config phase. + * + * So only when in setup phase or config phase, create the debugfs + * entries and register the SMP channels. */ - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_CONFIG, &hdev->dev_flags)) return 0; - debugfs_create_file("features", 0444, hdev->debugfs, hdev, - &features_fops); - debugfs_create_u16("manufacturer", 0444, hdev->debugfs, - &hdev->manufacturer); - debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver); - debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); - debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, - &device_list_fops); - debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, - &blacklist_fops); - debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); - - debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, - &conn_info_min_age_fops); - debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, - &conn_info_max_age_fops); + hci_debugfs_create_common(hdev); - if (lmp_bredr_capable(hdev)) { - debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, - hdev, &inquiry_cache_fops); - debugfs_create_file("link_keys", 0400, hdev->debugfs, - hdev, &link_keys_fops); - debugfs_create_file("dev_class", 0444, hdev->debugfs, - hdev, &dev_class_fops); - debugfs_create_file("voice_setting", 0444, hdev->debugfs, - hdev, &voice_setting_fops); - } - - if (lmp_ssp_capable(hdev)) { - debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, - hdev, &auto_accept_delay_fops); - debugfs_create_file("force_sc_support", 0644, hdev->debugfs, - hdev, &force_sc_support_fops); - debugfs_create_file("sc_only_mode", 0444, hdev->debugfs, - hdev, &sc_only_mode_fops); - if (lmp_le_capable(hdev)) - debugfs_create_file("force_lesc_support", 0644, - hdev->debugfs, hdev, - &force_lesc_support_fops); - } - - if (lmp_sniff_capable(hdev)) { - debugfs_create_file("idle_timeout", 0644, hdev->debugfs, - hdev, &idle_timeout_fops); - debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs, - hdev, &sniff_min_interval_fops); - debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs, - hdev, &sniff_max_interval_fops); - } + if (lmp_bredr_capable(hdev)) + hci_debugfs_create_bredr(hdev); - if (lmp_le_capable(hdev)) { - debugfs_create_file("identity", 0400, hdev->debugfs, - hdev, &identity_fops); - debugfs_create_file("rpa_timeout", 0644, hdev->debugfs, - hdev, &rpa_timeout_fops); - debugfs_create_file("random_address", 0444, hdev->debugfs, - hdev, &random_address_fops); - debugfs_create_file("static_address", 0444, hdev->debugfs, - hdev, &static_address_fops); - - /* For controllers with a public address, provide a debug - * option to force the usage of the configured static - * address. By default the public address is used. - */ - if (bacmp(&hdev->bdaddr, BDADDR_ANY)) - debugfs_create_file("force_static_address", 0644, - hdev->debugfs, hdev, - &force_static_address_fops); - - debugfs_create_u8("white_list_size", 0444, hdev->debugfs, - &hdev->le_white_list_size); - debugfs_create_file("white_list", 0444, hdev->debugfs, hdev, - &white_list_fops); - debugfs_create_file("identity_resolving_keys", 0400, - hdev->debugfs, hdev, - &identity_resolving_keys_fops); - debugfs_create_file("long_term_keys", 0400, hdev->debugfs, - hdev, &long_term_keys_fops); - debugfs_create_file("conn_min_interval", 0644, hdev->debugfs, - hdev, &conn_min_interval_fops); - debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, - hdev, &conn_max_interval_fops); - debugfs_create_file("conn_latency", 0644, hdev->debugfs, - hdev, &conn_latency_fops); - debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, - hdev, &supervision_timeout_fops); - debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, - hdev, &adv_channel_map_fops); - debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, - hdev, &adv_min_interval_fops); - debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, - hdev, &adv_max_interval_fops); - debugfs_create_u16("discov_interleaved_timeout", 0644, - hdev->debugfs, - &hdev->discov_interleaved_timeout); - - smp_register(hdev); - } + if (lmp_le_capable(hdev)) + hci_debugfs_create_le(hdev); return 0; } @@ -2624,6 +1617,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work(&hdev->service_cache); cancel_delayed_work_sync(&hdev->le_scan_disable); + cancel_delayed_work_sync(&hdev->le_scan_restart); if (test_bit(HCI_MGMT, &hdev->dev_flags)) cancel_delayed_work_sync(&hdev->rpa_expired); @@ -2634,11 +1628,21 @@ static int hci_dev_do_close(struct hci_dev *hdev) drain_workqueue(hdev->workqueue); hci_dev_lock(hdev); + + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + + if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (hdev->dev_type == HCI_BREDR) + mgmt_powered(hdev, 0); + } + hci_inquiry_cache_flush(hdev); hci_pend_le_actions_clear(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); + smp_unregister(hdev); + hci_notify(hdev, HCI_DEV_DOWN); if (hdev->flush) @@ -2681,14 +1685,6 @@ static int hci_dev_do_close(struct hci_dev *hdev) hdev->flags &= BIT(HCI_RAW); hdev->dev_flags &= ~HCI_PERSISTENT_MASK; - if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { - if (hdev->dev_type == HCI_BREDR) { - hci_dev_lock(hdev); - mgmt_powered(hdev, 0); - hci_dev_unlock(hdev); - } - } - /* Controller radio is available but is currently powered down */ hdev->amp_status = AMP_STATUS_POWERED_DOWN; @@ -2726,32 +1722,14 @@ done: return err; } -int hci_dev_reset(__u16 dev) +static int hci_dev_do_reset(struct hci_dev *hdev) { - struct hci_dev *hdev; - int ret = 0; + int ret; - hdev = hci_dev_get(dev); - if (!hdev) - return -ENODEV; + BT_DBG("%s %p", hdev->name, hdev); hci_req_lock(hdev); - if (!test_bit(HCI_UP, &hdev->flags)) { - ret = -ENETDOWN; - goto done; - } - - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { - ret = -EBUSY; - goto done; - } - - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { - ret = -EOPNOTSUPP; - goto done; - } - /* Drop queues */ skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); @@ -2774,12 +1752,41 @@ int hci_dev_reset(__u16 dev) ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); -done: hci_req_unlock(hdev); - hci_dev_put(hdev); return ret; } +int hci_dev_reset(__u16 dev) +{ + struct hci_dev *hdev; + int err; + + hdev = hci_dev_get(dev); + if (!hdev) + return -ENODEV; + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = -ENETDOWN; + goto done; + } + + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EBUSY; + goto done; + } + + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + + err = hci_dev_do_reset(hdev); + +done: + hci_dev_put(hdev); + return err; +} + int hci_dev_reset_stat(__u16 dev) { struct hci_dev *hdev; @@ -3083,7 +2090,9 @@ static void hci_power_on(struct work_struct *work) err = hci_dev_do_open(hdev); if (err < 0) { + hci_dev_lock(hdev); mgmt_set_powered_failed(hdev, err); + hci_dev_unlock(hdev); return; } @@ -3143,6 +2152,24 @@ static void hci_power_off(struct work_struct *work) hci_dev_do_close(hdev); } +static void hci_error_reset(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + + BT_DBG("%s", hdev->name); + + if (hdev->hw_error) + hdev->hw_error(hdev, hdev->hw_error_code); + else + BT_ERR("%s hardware error 0x%2.2x", hdev->name, + hdev->hw_error_code); + + if (hci_dev_do_close(hdev)) + return; + + hci_dev_do_open(hdev); +} + static void hci_discov_off(struct work_struct *work) { struct hci_dev *hdev; @@ -3555,9 +2582,15 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, if (hash192 && rand192) { memcpy(data->hash192, hash192, sizeof(data->hash192)); memcpy(data->rand192, rand192, sizeof(data->rand192)); + if (hash256 && rand256) + data->present = 0x03; } else { memset(data->hash192, 0, sizeof(data->hash192)); memset(data->rand192, 0, sizeof(data->rand192)); + if (hash256 && rand256) + data->present = 0x02; + else + data->present = 0x00; } if (hash256 && rand256) { @@ -3566,6 +2599,8 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, } else { memset(data->hash256, 0, sizeof(data->hash256)); memset(data->rand256, 0, sizeof(data->rand256)); + if (hash192 && rand192) + data->present = 0x01; } BT_DBG("%s for %pMR", hdev->name, bdaddr); @@ -3659,23 +2694,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, return NULL; } -static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) -{ - struct hci_conn *conn; - - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); - if (!conn) - return false; - - if (conn->dst_type != type) - return false; - - if (conn->state != BT_CONNECTED) - return false; - - return true; -} - /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type) @@ -3731,47 +2749,6 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, return params; } -/* This function requires the caller holds hdev->lock */ -int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, - u8 auto_connect) -{ - struct hci_conn_params *params; - - params = hci_conn_params_add(hdev, addr, addr_type); - if (!params) - return -EIO; - - if (params->auto_connect == auto_connect) - return 0; - - list_del_init(¶ms->action); - - switch (auto_connect) { - case HCI_AUTO_CONN_DISABLED: - case HCI_AUTO_CONN_LINK_LOSS: - hci_update_background_scan(hdev); - break; - case HCI_AUTO_CONN_REPORT: - list_add(¶ms->action, &hdev->pend_le_reports); - hci_update_background_scan(hdev); - break; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - if (!is_connected(hdev, addr, addr_type)) { - list_add(¶ms->action, &hdev->pend_le_conns); - hci_update_background_scan(hdev); - } - break; - } - - params->auto_connect = auto_connect; - - BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, - auto_connect); - - return 0; -} - static void hci_conn_params_free(struct hci_conn_params *params) { if (params->conn) { @@ -3828,7 +2805,7 @@ void hci_conn_params_clear_all(struct hci_dev *hdev) BT_DBG("All LE connection parameters were removed"); } -static void inquiry_complete(struct hci_dev *hdev, u8 status) +static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) { if (status) { BT_ERR("Failed to start inquiry: status %d", status); @@ -3840,7 +2817,8 @@ static void inquiry_complete(struct hci_dev *hdev, u8 status) } } -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; @@ -3853,6 +2831,8 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) return; } + hdev->discovery.scan_start = 0; + switch (hdev->discovery.type) { case DISCOV_TYPE_LE: hci_dev_lock(hdev); @@ -3892,6 +2872,8 @@ static void le_scan_disable_work(struct work_struct *work) BT_DBG("%s", hdev->name); + cancel_delayed_work_sync(&hdev->le_scan_restart); + hci_req_init(&req, hdev); hci_req_add_le_scan_disable(&req); @@ -3901,98 +2883,72 @@ static void le_scan_disable_work(struct work_struct *work) BT_ERR("Disable LE scanning request failed: err %d", err); } -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { - struct hci_dev *hdev = req->hdev; + unsigned long timeout, duration, scan_start, now; - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || - hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { - BT_DBG("Deferring random address update"); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + BT_DBG("%s", hdev->name); + + if (status) { + BT_ERR("Failed to restart LE scan: status %d", status); return; } - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - -int hci_update_random_address(struct hci_request *req, bool require_privacy, - u8 *own_addr_type) -{ - struct hci_dev *hdev = req->hdev; - int err; + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || + !hdev->discovery.scan_start) + return; - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired or there is something else than - * the current RPA in use, then generate a new one. + /* When the scan was started, hdev->le_scan_disable has been queued + * after duration from scan_start. During scan restart this job + * has been canceled, and we need to queue it again after proper + * timeout, to make sure that scan does not run indefinitely. */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { - int to; - - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && - !bacmp(&hdev->random_addr, &hdev->rpa)) - return 0; - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); - return err; - } - - set_random_addr(req, &hdev->rpa); - - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); + duration = hdev->discovery.scan_duration; + scan_start = hdev->discovery.scan_start; + now = jiffies; + if (now - scan_start <= duration) { + int elapsed; + + if (now >= scan_start) + elapsed = now - scan_start; + else + elapsed = ULONG_MAX - scan_start + now; - return 0; + timeout = duration - elapsed; + } else { + timeout = 0; } + queue_delayed_work(hdev->workqueue, + &hdev->le_scan_disable, timeout); +} - /* In case of required privacy without resolvable private address, - * use an unresolvable private address. This is useful for active - * scanning and non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t urpa; +static void le_scan_restart_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); + struct hci_request req; + struct hci_cp_le_set_scan_enable cp; + int err; - get_random_bytes(&urpa, 6); - urpa.b[5] &= 0x3f; /* Clear two most significant bits */ + BT_DBG("%s", hdev->name); - *own_addr_type = ADDR_LE_DEV_RANDOM; - set_random_addr(req, &urpa); - return 0; - } + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; - /* If forcing static address is in use or there is no public - * address use the static address as random address (but skip - * the HCI command if the current random address is already the - * static one. - */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY)) { - *own_addr_type = ADDR_LE_DEV_RANDOM; - if (bacmp(&hdev->static_addr, &hdev->random_addr)) - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, - &hdev->static_addr); - return 0; - } + hci_req_init(&req, hdev); - /* Neither privacy nor static address is being used so use a - * public address. - */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; + hci_req_add_le_scan_disable(&req); - return 0; + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + + err = hci_req_run(&req, le_scan_restart_work_complete); + if (err) + BT_ERR("Restart LE scan request failed: err %d", err); } /* Copy the Identity Address of the controller. @@ -4003,12 +2959,18 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * * For debugging purposes it is possible to force controllers with a * public address to use the static random address instead. + * + * In case BR/EDR has been disabled on a dual-mode controller and + * userspace has configured a static address, then that address + * becomes the identity address instead of the public BR/EDR address. */ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type) { if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY)) { + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { bacpy(bdaddr, &hdev->static_addr); *bdaddr_type = ADDR_LE_DEV_RANDOM; } else { @@ -4047,6 +3009,12 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_conn_max_interval = 0x0038; hdev->le_conn_latency = 0x0000; hdev->le_supv_timeout = 0x002a; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; + hdev->le_max_tx_time = 0x0148; + hdev->le_max_rx_len = 0x001b; + hdev->le_max_rx_time = 0x0148; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; @@ -4074,10 +3042,12 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); + INIT_WORK(&hdev->error_reset, hci_error_reset); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); + INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); @@ -4247,8 +3217,6 @@ void hci_unregister_dev(struct hci_dev *hdev) rfkill_destroy(hdev->rfkill); } - smp_unregister(hdev); - device_del(&hdev->dev); debugfs_remove_recursive(hdev->debugfs); @@ -4527,76 +3495,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) } } -void hci_req_init(struct hci_request *req, struct hci_dev *hdev) -{ - skb_queue_head_init(&req->cmd_q); - req->hdev = hdev; - req->err = 0; -} - -int hci_req_run(struct hci_request *req, hci_req_complete_t complete) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - unsigned long flags; - - BT_DBG("length %u", skb_queue_len(&req->cmd_q)); - - /* If an error occurred during request building, remove all HCI - * commands queued on the HCI request queue. - */ - if (req->err) { - skb_queue_purge(&req->cmd_q); - return req->err; - } - - /* Do not allow empty requests */ - if (skb_queue_empty(&req->cmd_q)) - return -ENODATA; - - skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->req.complete = complete; - - spin_lock_irqsave(&hdev->cmd_q.lock, flags); - skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); - spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); - - queue_work(hdev->workqueue, &hdev->cmd_work); - - return 0; -} - bool hci_req_pending(struct hci_dev *hdev) { return (hdev->req_status == HCI_REQ_PEND); } -static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, - u32 plen, const void *param) -{ - int len = HCI_COMMAND_HDR_SIZE + plen; - struct hci_command_hdr *hdr; - struct sk_buff *skb; - - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) - return NULL; - - hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); - hdr->opcode = cpu_to_le16(opcode); - hdr->plen = plen; - - if (plen) - memcpy(skb_put(skb, plen), param, plen); - - BT_DBG("skb len %d", skb->len); - - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - bt_cb(skb)->opcode = opcode; - - return skb; -} - /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param) @@ -4622,43 +3525,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, return 0; } -/* Queue a command to an asynchronous HCI request */ -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, - const void *param, u8 event) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - - BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); - - /* If an error occurred during request building, there is no point in - * queueing the HCI command. We can simply return. - */ - if (req->err) - return; - - skb = hci_prepare_cmd(hdev, opcode, plen, param); - if (!skb) { - BT_ERR("%s no memory for command (opcode 0x%4.4x)", - hdev->name, opcode); - req->err = -ENOMEM; - return; - } - - if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->req.start = true; - - bt_cb(skb)->req.event = event; - - skb_queue_tail(&req->cmd_q, skb); -} - -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, - const void *param) -{ - hci_req_add_ev(req, opcode, plen, param, 0); -} - /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { @@ -5417,7 +4283,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) call_complete: if (req_complete) - req_complete(hdev, status); + req_complete(hdev, status, status ? opcode : HCI_OP_NOP); } static void hci_rx_work(struct work_struct *work) @@ -5506,302 +4372,3 @@ static void hci_cmd_work(struct work_struct *work) } } } - -void hci_req_add_le_scan_disable(struct hci_request *req) -{ - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_DISABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); -} - -static void add_to_white_list(struct hci_request *req, - struct hci_conn_params *params) -{ - struct hci_cp_le_add_to_white_list cp; - - cp.bdaddr_type = params->addr_type; - bacpy(&cp.bdaddr, ¶ms->addr); - - hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); -} - -static u8 update_white_list(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_conn_params *params; - struct bdaddr_list *b; - uint8_t white_list_entries = 0; - - /* Go through the current white list programmed into the - * controller one by one and check if that address is still - * in the list of pending connections or list of devices to - * report. If not present in either list, then queue the - * command to remove it from the controller. - */ - list_for_each_entry(b, &hdev->le_white_list, list) { - struct hci_cp_le_del_from_white_list cp; - - if (hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) || - hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - white_list_entries++; - continue; - } - - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); - } - - /* Since all no longer valid white list entries have been - * removed, walk through the list of pending connections - * and ensure that any new device gets programmed into - * the controller. - * - * If the list of the devices is larger than the list of - * available white list entries in the controller, then - * just abort and return filer policy value to not use the - * white list. - */ - list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); - } - - /* After adding all new pending connections, walk through - * the list of pending reports and also add these to the - * white list if there is still space. - */ - list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); - } - - /* Select filter policy to use white list */ - return 0x01; -} - -void hci_req_add_le_passive_scan(struct hci_request *req) -{ - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - struct hci_dev *hdev = req->hdev; - u8 own_addr_type; - u8 filter_policy; - - /* Set require_privacy to false since no SCAN_REQ are send - * during passive scanning. Not using an unresolvable address - * here is important so that peer devices using direct - * advertising with our address will be correctly reported - * by the controller. - */ - if (hci_update_random_address(req, false, &own_addr_type)) - return; - - /* Adding or removing entries from the white list must - * happen before enabling scanning. The controller does - * not allow white list modification while scanning. - */ - filter_policy = update_white_list(req); - - /* When the controller is using random resolvable addresses and - * with that having LE privacy enabled, then controllers with - * Extended Scanner Filter Policies support can now enable support - * for handling directed advertising. - * - * So instead of using filter polices 0x00 (no whitelist) - * and 0x01 (whitelist enabled) use the new filter policies - * 0x02 (no whitelist) and 0x03 (whitelist enabled). - */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && - (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) - filter_policy |= 0x02; - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = LE_SCAN_PASSIVE; - param_cp.interval = cpu_to_le16(hdev->le_scan_interval); - param_cp.window = cpu_to_le16(hdev->le_scan_window); - param_cp.own_address_type = own_addr_type; - param_cp.filter_policy = filter_policy; - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); -} - -static void update_background_scan_complete(struct hci_dev *hdev, u8 status) -{ - if (status) - BT_DBG("HCI request failed to update background scanning: " - "status 0x%2.2x", status); -} - -/* This function controls the background scanning based on hdev->pend_le_conns - * list. If there are pending LE connection we start the background scanning, - * otherwise we stop it. - * - * This function requires the caller holds hdev->lock. - */ -void hci_update_background_scan(struct hci_dev *hdev) -{ - struct hci_request req; - struct hci_conn *conn; - int err; - - if (!test_bit(HCI_UP, &hdev->flags) || - test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags) || - test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || - test_bit(HCI_UNREGISTER, &hdev->dev_flags)) - return; - - /* No point in doing scanning if LE support hasn't been enabled */ - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - return; - - /* If discovery is active don't interfere with it */ - if (hdev->discovery.state != DISCOVERY_STOPPED) - return; - - /* Reset RSSI and UUID filters when starting background scanning - * since these filters are meant for service discovery only. - * - * The Start Discovery and Start Service Discovery operations - * ensure to set proper values for RSSI threshold and UUID - * filter list. So it is safe to just reset them here. - */ - hci_discovery_filter_clear(hdev); - - hci_req_init(&req, hdev); - - if (list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) { - /* If there is no pending LE connections or devices - * to be scanned for, we should stop the background - * scanning. - */ - - /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return; - - hci_req_add_le_scan_disable(&req); - - BT_DBG("%s stopping background scanning", hdev->name); - } else { - /* If there is at least one pending LE connection, we should - * keep the background scan running. - */ - - /* If controller is connecting, we should not start scanning - * since some controllers are not able to scan and connect at - * the same time. - */ - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (conn) - return; - - /* If controller is currently scanning, we stop it to ensure we - * don't miss any advertising (due to duplicates filter). - */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - hci_req_add_le_scan_disable(&req); - - hci_req_add_le_passive_scan(&req); - - BT_DBG("%s starting background scanning", hdev->name); - } - - err = hci_req_run(&req, update_background_scan_complete); - if (err) - BT_ERR("Failed to run HCI request: err %d", err); -} - -static bool disconnected_whitelist_entries(struct hci_dev *hdev) -{ - struct bdaddr_list *b; - - list_for_each_entry(b, &hdev->whitelist, list) { - struct hci_conn *conn; - - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); - if (!conn) - return true; - - if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) - return true; - } - - return false; -} - -void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req) -{ - u8 scan; - - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return; - - if (!hdev_is_powered(hdev)) - return; - - if (mgmt_powering_down(hdev)) - return; - - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || - disconnected_whitelist_entries(hdev)) - scan = SCAN_PAGE; - else - scan = SCAN_DISABLED; - - if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) - return; - - if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) - scan |= SCAN_INQUIRY; - - if (req) - hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); - else - hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); -} diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c new file mode 100644 index 000000000000..65261e5d4b84 --- /dev/null +++ b/net/bluetooth/hci_debugfs.c @@ -0,0 +1,1056 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <linux/debugfs.h> + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "hci_debugfs.h" + +static int features_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + u8 p; + + hci_dev_lock(hdev); + for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { + seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " + "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p, + hdev->features[p][0], hdev->features[p][1], + hdev->features[p][2], hdev->features[p][3], + hdev->features[p][4], hdev->features[p][5], + hdev->features[p][6], hdev->features[p][7]); + } + if (lmp_le_capable(hdev)) + seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " + "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", + hdev->le_features[0], hdev->le_features[1], + hdev->le_features[2], hdev->le_features[3], + hdev->le_features[4], hdev->le_features[5], + hdev->le_features[6], hdev->le_features[7]); + hci_dev_unlock(hdev); + + return 0; +} + +static int features_open(struct inode *inode, struct file *file) +{ + return single_open(file, features_show, inode->i_private); +} + +static const struct file_operations features_fops = { + .open = features_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int device_list_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct hci_conn_params *p; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->whitelist, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + list_for_each_entry(p, &hdev->le_conn_params, list) { + seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type, + p->auto_connect); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int device_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, device_list_show, inode->i_private); +} + +static const struct file_operations device_list_fops = { + .open = device_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int blacklist_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->blacklist, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + hci_dev_unlock(hdev); + + return 0; +} + +static int blacklist_open(struct inode *inode, struct file *file) +{ + return single_open(file, blacklist_show, inode->i_private); +} + +static const struct file_operations blacklist_fops = { + .open = blacklist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uuids_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bt_uuid *uuid; + + hci_dev_lock(hdev); + list_for_each_entry(uuid, &hdev->uuids, list) { + u8 i, val[16]; + + /* The Bluetooth UUID values are stored in big endian, + * but with reversed byte order. So convert them into + * the right order for the %pUb modifier. + */ + for (i = 0; i < 16; i++) + val[i] = uuid->uuid[15 - i]; + + seq_printf(f, "%pUb\n", val); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int uuids_open(struct inode *inode, struct file *file) +{ + return single_open(file, uuids_show, inode->i_private); +} + +static const struct file_operations uuids_fops = { + .open = uuids_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int remote_oob_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct oob_data *data; + + hci_dev_lock(hdev); + list_for_each_entry(data, &hdev->remote_oob_data, list) { + seq_printf(f, "%pMR (type %u) %u %*phN %*phN %*phN %*phN\n", + &data->bdaddr, data->bdaddr_type, data->present, + 16, data->hash192, 16, data->rand192, + 16, data->hash256, 19, data->rand256); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int remote_oob_open(struct inode *inode, struct file *file) +{ + return single_open(file, remote_oob_show, inode->i_private); +} + +static const struct file_operations remote_oob_fops = { + .open = remote_oob_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int conn_info_min_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val > hdev->conn_info_max_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_min_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_min_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_min_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, + conn_info_min_age_set, "%llu\n"); + +static int conn_info_max_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val < hdev->conn_info_min_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_max_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_max_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_max_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, + conn_info_max_age_set, "%llu\n"); + +static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations use_debug_keys_fops = { + .open = simple_open, + .read = use_debug_keys_read, + .llseek = default_llseek, +}; + +static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations sc_only_mode_fops = { + .open = simple_open, + .read = sc_only_mode_read, + .llseek = default_llseek, +}; + +void hci_debugfs_create_common(struct hci_dev *hdev) +{ + debugfs_create_file("features", 0444, hdev->debugfs, hdev, + &features_fops); + debugfs_create_u16("manufacturer", 0444, hdev->debugfs, + &hdev->manufacturer); + debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver); + debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); + debugfs_create_u8("hardware_error", 0444, hdev->debugfs, + &hdev->hw_error_code); + + debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, + &device_list_fops); + debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, + &blacklist_fops); + debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + debugfs_create_file("remote_oob", 0400, hdev->debugfs, hdev, + &remote_oob_fops); + + debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, + &conn_info_min_age_fops); + debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, + &conn_info_max_age_fops); + + if (lmp_ssp_capable(hdev) || lmp_le_capable(hdev)) + debugfs_create_file("use_debug_keys", 0444, hdev->debugfs, + hdev, &use_debug_keys_fops); + + if (lmp_sc_capable(hdev) || lmp_le_capable(hdev)) + debugfs_create_file("sc_only_mode", 0444, hdev->debugfs, + hdev, &sc_only_mode_fops); +} + +static int inquiry_cache_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct discovery_state *cache = &hdev->discovery; + struct inquiry_entry *e; + + hci_dev_lock(hdev); + + list_for_each_entry(e, &cache->all, all) { + struct inquiry_data *data = &e->data; + seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", + &data->bdaddr, + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); + } + + hci_dev_unlock(hdev); + + return 0; +} + +static int inquiry_cache_open(struct inode *inode, struct file *file) +{ + return single_open(file, inquiry_cache_show, inode->i_private); +} + +static const struct file_operations inquiry_cache_fops = { + .open = inquiry_cache_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int link_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct link_key *key; + + rcu_read_lock(); + list_for_each_entry_rcu(key, &hdev->link_keys, list) + seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type, + HCI_LINK_KEY_SIZE, key->val, key->pin_len); + rcu_read_unlock(); + + return 0; +} + +static int link_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, link_keys_show, inode->i_private); +} + +static const struct file_operations link_keys_fops = { + .open = link_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int dev_class_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2], + hdev->dev_class[1], hdev->dev_class[0]); + hci_dev_unlock(hdev); + + return 0; +} + +static int dev_class_open(struct inode *inode, struct file *file) +{ + return single_open(file, dev_class_show, inode->i_private); +} + +static const struct file_operations dev_class_fops = { + .open = dev_class_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int voice_setting_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->voice_setting; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get, + NULL, "0x%4.4llx\n"); + +static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hdev->ssp_debug_mode ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations ssp_debug_mode_fops = { + .open = simple_open, + .read = ssp_debug_mode_read, + .llseek = default_llseek, +}; + +static int auto_accept_delay_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + hdev->auto_accept_delay = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int auto_accept_delay_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->auto_accept_delay; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, + auto_accept_delay_set, "%llu\n"); + +static int idle_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val != 0 && (val < 500 || val > 3600000)) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->idle_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int idle_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->idle_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, + idle_timeout_set, "%llu\n"); + +static int sniff_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->sniff_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int sniff_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->sniff_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, + sniff_min_interval_set, "%llu\n"); + +static int sniff_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->sniff_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int sniff_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->sniff_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, + sniff_max_interval_set, "%llu\n"); + +void hci_debugfs_create_bredr(struct hci_dev *hdev) +{ + debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, + &inquiry_cache_fops); + debugfs_create_file("link_keys", 0400, hdev->debugfs, hdev, + &link_keys_fops); + debugfs_create_file("dev_class", 0444, hdev->debugfs, hdev, + &dev_class_fops); + debugfs_create_file("voice_setting", 0444, hdev->debugfs, hdev, + &voice_setting_fops); + + if (lmp_ssp_capable(hdev)) { + debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs, + hdev, &ssp_debug_mode_fops); + debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, + hdev, &auto_accept_delay_fops); + } + + if (lmp_sniff_capable(hdev)) { + debugfs_create_file("idle_timeout", 0644, hdev->debugfs, + hdev, &idle_timeout_fops); + debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs, + hdev, &sniff_min_interval_fops); + debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs, + hdev, &sniff_max_interval_fops); + } +} + +static int identity_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + bdaddr_t addr; + u8 addr_type; + + hci_dev_lock(hdev); + + hci_copy_identity_address(hdev, &addr, &addr_type); + + seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type, + 16, hdev->irk, &hdev->rpa); + + hci_dev_unlock(hdev); + + return 0; +} + +static int identity_open(struct inode *inode, struct file *file) +{ + return single_open(file, identity_show, inode->i_private); +} + +static const struct file_operations identity_fops = { + .open = identity_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int rpa_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + /* Require the RPA timeout to be at least 30 seconds and at most + * 24 hours. + */ + if (val < 30 || val > (60 * 60 * 24)) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->rpa_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int rpa_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->rpa_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, + rpa_timeout_set, "%llu\n"); + +static int random_address_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "%pMR\n", &hdev->random_addr); + hci_dev_unlock(hdev); + + return 0; +} + +static int random_address_open(struct inode *inode, struct file *file) +{ + return single_open(file, random_address_show, inode->i_private); +} + +static const struct file_operations random_address_fops = { + .open = random_address_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int static_address_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "%pMR\n", &hdev->static_addr); + hci_dev_unlock(hdev); + + return 0; +} + +static int static_address_open(struct inode *inode, struct file *file) +{ + return single_open(file, static_address_show, inode->i_private); +} + +static const struct file_operations static_address_fops = { + .open = static_address_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t force_static_address_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t force_static_address_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + bool enable; + + if (test_bit(HCI_UP, &hdev->flags)) + return -EBUSY; + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + if (strtobool(buf, &enable)) + return -EINVAL; + + if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) + return -EALREADY; + + change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); + + return count; +} + +static const struct file_operations force_static_address_fops = { + .open = simple_open, + .read = force_static_address_read, + .write = force_static_address_write, + .llseek = default_llseek, +}; + +static int white_list_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->le_white_list, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + hci_dev_unlock(hdev); + + return 0; +} + +static int white_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, white_list_show, inode->i_private); +} + +static const struct file_operations white_list_fops = { + .open = white_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int identity_resolving_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct smp_irk *irk; + + rcu_read_lock(); + list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { + seq_printf(f, "%pMR (type %u) %*phN %pMR\n", + &irk->bdaddr, irk->addr_type, + 16, irk->val, &irk->rpa); + } + rcu_read_unlock(); + + return 0; +} + +static int identity_resolving_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, identity_resolving_keys_show, + inode->i_private); +} + +static const struct file_operations identity_resolving_keys_fops = { + .open = identity_resolving_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int long_term_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct smp_ltk *ltk; + + rcu_read_lock(); + list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list) + seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n", + <k->bdaddr, ltk->bdaddr_type, ltk->authenticated, + ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv), + __le64_to_cpu(ltk->rand), 16, ltk->val); + rcu_read_unlock(); + + return 0; +} + +static int long_term_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, long_term_keys_show, inode->i_private); +} + +static const struct file_operations long_term_keys_fops = { + .open = long_term_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int conn_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, + conn_min_interval_set, "%llu\n"); + +static int conn_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, + conn_max_interval_set, "%llu\n"); + +static int conn_latency_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val > 0x01f3) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_latency = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_latency_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_latency; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, + conn_latency_set, "%llu\n"); + +static int supervision_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x000a || val > 0x0c80) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_supv_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int supervision_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_supv_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, + supervision_timeout_set, "%llu\n"); + +static int adv_channel_map_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x01 || val > 0x07) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_channel_map = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_channel_map_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_channel_map; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, + adv_channel_map_set, "%llu\n"); + +static int adv_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, + adv_min_interval_set, "%llu\n"); + +static int adv_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, + adv_max_interval_set, "%llu\n"); + +void hci_debugfs_create_le(struct hci_dev *hdev) +{ + debugfs_create_file("identity", 0400, hdev->debugfs, hdev, + &identity_fops); + debugfs_create_file("rpa_timeout", 0644, hdev->debugfs, hdev, + &rpa_timeout_fops); + debugfs_create_file("random_address", 0444, hdev->debugfs, hdev, + &random_address_fops); + debugfs_create_file("static_address", 0444, hdev->debugfs, hdev, + &static_address_fops); + + /* For controllers with a public address, provide a debug + * option to force the usage of the configured static + * address. By default the public address is used. + */ + if (bacmp(&hdev->bdaddr, BDADDR_ANY)) + debugfs_create_file("force_static_address", 0644, + hdev->debugfs, hdev, + &force_static_address_fops); + + debugfs_create_u8("white_list_size", 0444, hdev->debugfs, + &hdev->le_white_list_size); + debugfs_create_file("white_list", 0444, hdev->debugfs, hdev, + &white_list_fops); + debugfs_create_file("identity_resolving_keys", 0400, hdev->debugfs, + hdev, &identity_resolving_keys_fops); + debugfs_create_file("long_term_keys", 0400, hdev->debugfs, hdev, + &long_term_keys_fops); + debugfs_create_file("conn_min_interval", 0644, hdev->debugfs, hdev, + &conn_min_interval_fops); + debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, hdev, + &conn_max_interval_fops); + debugfs_create_file("conn_latency", 0644, hdev->debugfs, hdev, + &conn_latency_fops); + debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, hdev, + &supervision_timeout_fops); + debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, hdev, + &adv_channel_map_fops); + debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, hdev, + &adv_min_interval_fops); + debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, hdev, + &adv_max_interval_fops); + debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs, + &hdev->discov_interleaved_timeout); +} + +void hci_debugfs_create_conn(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + char name[6]; + + if (IS_ERR_OR_NULL(hdev->debugfs)) + return; + + snprintf(name, sizeof(name), "%u", conn->handle); + conn->debugfs = debugfs_create_dir(name, hdev->debugfs); +} diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h new file mode 100644 index 000000000000..fb68efe083c5 --- /dev/null +++ b/net/bluetooth/hci_debugfs.h @@ -0,0 +1,26 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +void hci_debugfs_create_common(struct hci_dev *hdev); +void hci_debugfs_create_bredr(struct hci_dev *hdev); +void hci_debugfs_create_le(struct hci_dev *hdev); +void hci_debugfs_create_conn(struct hci_conn *conn); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 322abbbbcef9..a3fb094822b6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -30,10 +30,15 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" +#include "hci_debugfs.h" #include "a2mp.h" #include "amp.h" #include "smp.h" +#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ + "\x00\x00\x00\x00\x00\x00\x00\x00" + /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) @@ -195,7 +200,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) /* Reset all non-persistent flags */ hdev->dev_flags &= ~HCI_PERSISTENT_MASK; - hdev->discovery.state = DISCOVERY_STOPPED; + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hdev->inq_tx_power = HCI_TX_POWER_INVALID; hdev->adv_tx_power = HCI_TX_POWER_INVALID; @@ -212,6 +218,40 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) hci_bdaddr_list_clear(&hdev->le_white_list); } +static void hci_cc_read_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_stored_link_key *rp = (void *)skb->data; + struct hci_cp_read_stored_link_key *sent; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_READ_STORED_LINK_KEY); + if (!sent) + return; + + if (!rp->status && sent->read_all == 0x01) { + hdev->stored_max_keys = rp->max_keys; + hdev->stored_num_keys = rp->num_keys; + } +} + +static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_delete_stored_link_key *rp = (void *)skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + if (rp->num_keys <= hdev->stored_num_keys) + hdev->stored_num_keys -= rp->num_keys; + else + hdev->stored_num_keys = 0; +} + static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -242,7 +282,8 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags)) + if (test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags)) memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH); } @@ -257,6 +298,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!sent) return; + hci_dev_lock(hdev); + if (!status) { __u8 param = *((__u8 *) sent); @@ -268,6 +311,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_auth_enable_complete(hdev, status); + + hci_dev_unlock(hdev); } static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -443,6 +488,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) if (!sent) return; + hci_dev_lock(hdev); + if (!status) { if (sent->mode) hdev->features[1][0] |= LMP_HOST_SSP; @@ -458,6 +505,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) else clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); } + + hci_dev_unlock(hdev); } static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) @@ -471,6 +520,8 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) if (!sent) return; + hci_dev_lock(hdev); + if (!status) { if (sent->support) hdev->features[1][0] |= LMP_HOST_SC; @@ -478,14 +529,14 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) hdev->features[1][0] &= ~LMP_HOST_SC; } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) - mgmt_sc_enable_complete(hdev, sent->support, status); - else if (!status) { + if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) { if (sent->support) set_bit(HCI_SC_ENABLED, &hdev->dev_flags); else clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); } + + hci_dev_unlock(hdev); } static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) @@ -497,7 +548,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags)) { hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); hdev->lmp_ver = rp->lmp_ver; @@ -516,7 +568,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags)) + if (test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags)) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); } @@ -1135,6 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, if (!cp) return; + hci_dev_lock(hdev); + switch (cp->enable) { case LE_SCAN_ENABLE: set_bit(HCI_LE_SCAN, &hdev->dev_flags); @@ -1184,6 +1239,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable); break; } + + hci_dev_unlock(hdev); } static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, @@ -1263,6 +1320,55 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev, memcpy(hdev->le_states, rp->le_states, 8); } +static void hci_cc_le_read_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_def_data_len *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_def_tx_len = le16_to_cpu(rp->tx_len); + hdev->le_def_tx_time = le16_to_cpu(rp->tx_time); +} + +static void hci_cc_le_write_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_cp_le_write_def_data_len *sent; + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN); + if (!sent) + return; + + hdev->le_def_tx_len = le16_to_cpu(sent->tx_len); + hdev->le_def_tx_time = le16_to_cpu(sent->tx_time); +} + +static void hci_cc_le_read_max_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_max_data_len *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_max_tx_len = le16_to_cpu(rp->tx_len); + hdev->le_max_tx_time = le16_to_cpu(rp->tx_time); + hdev->le_max_rx_len = le16_to_cpu(rp->rx_len); + hdev->le_max_rx_time = le16_to_cpu(rp->rx_time); +} + static void hci_cc_write_le_host_supported(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1278,6 +1384,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, if (!sent) return; + hci_dev_lock(hdev); + if (sent->le) { hdev->features[1][0] |= LMP_HOST_LE; set_bit(HCI_LE_ENABLED, &hdev->dev_flags); @@ -1291,6 +1399,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, hdev->features[1][0] |= LMP_HOST_LE_BREDR; else hdev->features[1][0] &= ~LMP_HOST_LE_BREDR; + + hci_dev_unlock(hdev); } static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb) @@ -1379,6 +1489,21 @@ unlock: hci_dev_unlock(hdev); } +static void hci_cc_write_ssp_debug_mode(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 status = *((u8 *) skb->data); + u8 *mode; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + mode = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE); + if (mode) + hdev->ssp_debug_mode = *mode; +} + static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -2092,6 +2217,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } else conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) @@ -2107,7 +2233,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); } /* Set packet type for incoming connection */ @@ -2174,7 +2300,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags) && + /* Require HCI_CONNECTABLE or a whitelist entry to accept the + * connection. These features are only touched through mgmt so + * only do the checks if HCI_MGMT is set. + */ + if (test_bit(HCI_MGMT, &hdev->dev_flags) && + !test_bit(HCI_CONNECTABLE, &hdev->dev_flags) && !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr, BDADDR_BREDR)) { hci_reject_conn(hdev, &ev->bdaddr); @@ -2288,7 +2419,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); @@ -2555,7 +2686,8 @@ static void hci_remote_features_evt(struct hci_dev *hdev, if (conn->state != BT_CONFIG) goto unlock; - if (!ev->status && lmp_ssp_capable(hdev) && lmp_ssp_capable(conn)) { + if (!ev->status && lmp_ext_feat_capable(hdev) && + lmp_ext_feat_capable(conn)) { struct hci_cp_read_remote_ext_features cp; cp.handle = ev->handle; cp.page = 0x01; @@ -2634,6 +2766,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_reset(hdev, skb); break; + case HCI_OP_READ_STORED_LINK_KEY: + hci_cc_read_stored_link_key(hdev, skb); + break; + + case HCI_OP_DELETE_STORED_LINK_KEY: + hci_cc_delete_stored_link_key(hdev, skb); + break; + case HCI_OP_WRITE_LOCAL_NAME: hci_cc_write_local_name(hdev, skb); break; @@ -2826,6 +2966,18 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_supported_states(hdev, skb); break; + case HCI_OP_LE_READ_DEF_DATA_LEN: + hci_cc_le_read_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_WRITE_DEF_DATA_LEN: + hci_cc_le_write_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_READ_MAX_DATA_LEN: + hci_cc_le_read_max_data_len(hdev, skb); + break; + case HCI_OP_WRITE_LE_HOST_SUPPORTED: hci_cc_write_le_host_supported(hdev, skb); break; @@ -2846,6 +2998,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_tx_power(hdev, skb); break; + case HCI_OP_WRITE_SSP_DEBUG_MODE: + hci_cc_write_ssp_debug_mode(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; @@ -2964,7 +3120,9 @@ static void hci_hardware_error_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_hardware_error *ev = (void *) skb->data; - BT_ERR("%s hardware error 0x%2.2x", hdev->name, ev->code); + hdev->hw_error_code = ev->code; + + queue_work(hdev->req_workqueue, &hdev->error_reset); } static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3556,6 +3714,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); break; @@ -3722,6 +3881,52 @@ static u8 hci_get_auth_req(struct hci_conn *conn) return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01); } +static u8 bredr_oob_data_present(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct oob_data *data; + + data = hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR); + if (!data) + return 0x00; + + if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)) { + if (bredr_sc_enabled(hdev)) { + /* When Secure Connections is enabled, then just + * return the present value stored with the OOB + * data. The stored value contains the right present + * information. However it can only be trusted when + * not in Secure Connection Only mode. + */ + if (!test_bit(HCI_SC_ONLY, &hdev->dev_flags)) + return data->present; + + /* When Secure Connections Only mode is enabled, then + * the P-256 values are required. If they are not + * available, then do not declare that OOB data is + * present. + */ + if (!memcmp(data->rand256, ZERO_KEY, 16) || + !memcmp(data->hash256, ZERO_KEY, 16)) + return 0x00; + + return 0x02; + } + + /* When Secure Connections is not enabled or actually + * not supported by the hardware, then check that if + * P-192 data values are present. + */ + if (!memcmp(data->rand192, ZERO_KEY, 16) || + !memcmp(data->hash192, ZERO_KEY, 16)) + return 0x00; + + return 0x01; + } + + return 0x00; +} + static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_io_capa_request *ev = (void *) skb->data; @@ -3773,12 +3978,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->auth_type &= HCI_AT_NO_BONDING_MITM; cp.authentication = conn->auth_type; - - if (hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR) && - (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags))) - cp.oob_data = 0x01; - else - cp.oob_data = 0x00; + cp.oob_data = bredr_oob_data_present(conn); hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY, sizeof(cp), &cp); @@ -4030,33 +4230,39 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, goto unlock; data = hci_find_remote_oob_data(hdev, &ev->bdaddr, BDADDR_BREDR); - if (data) { - if (bredr_sc_enabled(hdev)) { - struct hci_cp_remote_oob_ext_data_reply cp; - - bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.hash192, data->hash192, sizeof(cp.hash192)); - memcpy(cp.rand192, data->rand192, sizeof(cp.rand192)); - memcpy(cp.hash256, data->hash256, sizeof(cp.hash256)); - memcpy(cp.rand256, data->rand256, sizeof(cp.rand256)); + if (!data) { + struct hci_cp_remote_oob_data_neg_reply cp; - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY, - sizeof(cp), &cp); - } else { - struct hci_cp_remote_oob_data_reply cp; + bacpy(&cp.bdaddr, &ev->bdaddr); + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, + sizeof(cp), &cp); + goto unlock; + } - bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.hash, data->hash192, sizeof(cp.hash)); - memcpy(cp.rand, data->rand192, sizeof(cp.rand)); + if (bredr_sc_enabled(hdev)) { + struct hci_cp_remote_oob_ext_data_reply cp; - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, - sizeof(cp), &cp); + bacpy(&cp.bdaddr, &ev->bdaddr); + if (test_bit(HCI_SC_ONLY, &hdev->dev_flags)) { + memset(cp.hash192, 0, sizeof(cp.hash192)); + memset(cp.rand192, 0, sizeof(cp.rand192)); + } else { + memcpy(cp.hash192, data->hash192, sizeof(cp.hash192)); + memcpy(cp.rand192, data->rand192, sizeof(cp.rand192)); } + memcpy(cp.hash256, data->hash256, sizeof(cp.hash256)); + memcpy(cp.rand256, data->rand256, sizeof(cp.rand256)); + + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY, + sizeof(cp), &cp); } else { - struct hci_cp_remote_oob_data_neg_reply cp; + struct hci_cp_remote_oob_data_reply cp; bacpy(&cp.bdaddr, &ev->bdaddr); - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, + memcpy(cp.hash, data->hash192, sizeof(cp.hash)); + memcpy(cp.rand, data->rand192, sizeof(cp.rand)); + + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, sizeof(cp), &cp); } @@ -4096,6 +4302,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hcon->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(hcon); + hci_debugfs_create_conn(hcon); hci_conn_add_sysfs(hcon); amp_physical_cfm(bredr_hcon, hcon); @@ -4302,6 +4509,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->le_conn_latency = le16_to_cpu(ev->latency); conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout); + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c new file mode 100644 index 000000000000..b59f92c6df0c --- /dev/null +++ b/net/bluetooth/hci_request.c @@ -0,0 +1,556 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "smp.h" +#include "hci_request.h" + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; + req->err = 0; +} + +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + + /* If an error occurred during request building, remove all HCI + * commands queued on the HCI request queue. + */ + if (req->err) { + skb_queue_purge(&req->cmd_q); + return req->err; + } + + /* Do not allow empty requests */ + if (skb_queue_empty(&req->cmd_q)) + return -ENODATA; + + skb = skb_peek_tail(&req->cmd_q); + bt_cb(skb)->req.complete = complete; + + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + + queue_work(hdev->workqueue, &hdev->cmd_work); + + return 0; +} + +struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param) +{ + int len = HCI_COMMAND_HDR_SIZE + plen; + struct hci_command_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(len, GFP_ATOMIC); + if (!skb) + return NULL; + + hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); + hdr->opcode = cpu_to_le16(opcode); + hdr->plen = plen; + + if (plen) + memcpy(skb_put(skb, plen), param, plen); + + BT_DBG("skb len %d", skb->len); + + bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; + bt_cb(skb)->opcode = opcode; + + return skb; +} + +/* Queue a command to an asynchronous HCI request */ +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + + BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + + /* If an error occurred during request building, there is no point in + * queueing the HCI command. We can simply return. + */ + if (req->err) + return; + + skb = hci_prepare_cmd(hdev, opcode, plen, param); + if (!skb) { + BT_ERR("%s no memory for command (opcode 0x%4.4x)", + hdev->name, opcode); + req->err = -ENOMEM; + return; + } + + if (skb_queue_empty(&req->cmd_q)) + bt_cb(skb)->req.start = true; + + bt_cb(skb)->req.event = event; + + skb_queue_tail(&req->cmd_q, skb); +} + +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param) +{ + hci_req_add_ev(req, opcode, plen, param, 0); +} + +void hci_req_add_le_scan_disable(struct hci_request *req) +{ + struct hci_cp_le_set_scan_enable cp; + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_DISABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); +} + +static void add_to_white_list(struct hci_request *req, + struct hci_conn_params *params) +{ + struct hci_cp_le_add_to_white_list cp; + + cp.bdaddr_type = params->addr_type; + bacpy(&cp.bdaddr, ¶ms->addr); + + hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); +} + +static u8 update_white_list(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + struct bdaddr_list *b; + uint8_t white_list_entries = 0; + + /* Go through the current white list programmed into the + * controller one by one and check if that address is still + * in the list of pending connections or list of devices to + * report. If not present in either list, then queue the + * command to remove it from the controller. + */ + list_for_each_entry(b, &hdev->le_white_list, list) { + struct hci_cp_le_del_from_white_list cp; + + if (hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, b->bdaddr_type) || + hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, b->bdaddr_type)) { + white_list_entries++; + continue; + } + + cp.bdaddr_type = b->bdaddr_type; + bacpy(&cp.bdaddr, &b->bdaddr); + + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, + sizeof(cp), &cp); + } + + /* Since all no longer valid white list entries have been + * removed, walk through the list of pending connections + * and ensure that any new device gets programmed into + * the controller. + * + * If the list of the devices is larger than the list of + * available white list entries in the controller, then + * just abort and return filer policy value to not use the + * white list. + */ + list_for_each_entry(params, &hdev->pend_le_conns, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* After adding all new pending connections, walk through + * the list of pending reports and also add these to the + * white list if there is still space. + */ + list_for_each_entry(params, &hdev->pend_le_reports, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* Select filter policy to use white list */ + return 0x01; +} + +void hci_req_add_le_passive_scan(struct hci_request *req) +{ + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + struct hci_dev *hdev = req->hdev; + u8 own_addr_type; + u8 filter_policy; + + /* Set require_privacy to false since no SCAN_REQ are send + * during passive scanning. Not using an non-resolvable address + * here is important so that peer devices using direct + * advertising with our address will be correctly reported + * by the controller. + */ + if (hci_update_random_address(req, false, &own_addr_type)) + return; + + /* Adding or removing entries from the white list must + * happen before enabling scanning. The controller does + * not allow white list modification while scanning. + */ + filter_policy = update_white_list(req); + + /* When the controller is using random resolvable addresses and + * with that having LE privacy enabled, then controllers with + * Extended Scanner Filter Policies support can now enable support + * for handling directed advertising. + * + * So instead of using filter polices 0x00 (no whitelist) + * and 0x01 (whitelist enabled) use the new filter policies + * 0x02 (no whitelist) and 0x03 (whitelist enabled). + */ + if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && + (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) + filter_policy |= 0x02; + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_PASSIVE; + param_cp.interval = cpu_to_le16(hdev->le_scan_interval); + param_cp.window = cpu_to_le16(hdev->le_scan_window); + param_cp.own_address_type = own_addr_type; + param_cp.filter_policy = filter_policy; + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); +} + +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +{ + struct hci_dev *hdev = req->hdev; + + /* If we're advertising or initiating an LE connection we can't + * go ahead and change the random address at this time. This is + * because the eventual initiator address used for the + * subsequently created connection will be undefined (some + * controllers use the new address and others the one we had + * when the operation started). + * + * In this kind of scenario skip the update and let the random + * address be updated at the next cycle. + */ + if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || + hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { + BT_DBG("Deferring random address update"); + set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + return; + } + + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); +} + +int hci_update_random_address(struct hci_request *req, bool require_privacy, + u8 *own_addr_type) +{ + struct hci_dev *hdev = req->hdev; + int err; + + /* If privacy is enabled use a resolvable private address. If + * current RPA has expired or there is something else than + * the current RPA in use, then generate a new one. + */ + if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { + int to; + + *own_addr_type = ADDR_LE_DEV_RANDOM; + + if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && + !bacmp(&hdev->random_addr, &hdev->rpa)) + return 0; + + err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); + if (err < 0) { + BT_ERR("%s failed to generate new RPA", hdev->name); + return err; + } + + set_random_addr(req, &hdev->rpa); + + to = msecs_to_jiffies(hdev->rpa_timeout * 1000); + queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); + + return 0; + } + + /* In case of required privacy without resolvable private address, + * use an non-resolvable private address. This is useful for active + * scanning and non-connectable advertising. + */ + if (require_privacy) { + bdaddr_t nrpa; + + while (true) { + /* The non-resolvable private address is generated + * from random six bytes with the two most significant + * bits cleared. + */ + get_random_bytes(&nrpa, 6); + nrpa.b[5] &= 0x3f; + + /* The non-resolvable private address shall not be + * equal to the public address. + */ + if (bacmp(&hdev->bdaddr, &nrpa)) + break; + } + + *own_addr_type = ADDR_LE_DEV_RANDOM; + set_random_addr(req, &nrpa); + return 0; + } + + /* If forcing static address is in use or there is no public + * address use the static address as random address (but skip + * the HCI command if the current random address is already the + * static one. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configured, then use that + * address instead of the public BR/EDR address. + */ + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + *own_addr_type = ADDR_LE_DEV_RANDOM; + if (bacmp(&hdev->static_addr, &hdev->random_addr)) + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, + &hdev->static_addr); + return 0; + } + + /* Neither privacy nor static address is being used so use a + * public address. + */ + *own_addr_type = ADDR_LE_DEV_PUBLIC; + + return 0; +} + +static bool disconnected_whitelist_entries(struct hci_dev *hdev) +{ + struct bdaddr_list *b; + + list_for_each_entry(b, &hdev->whitelist, list) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); + if (!conn) + return true; + + if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) + return true; + } + + return false; +} + +void __hci_update_page_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 scan; + + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + return; + + if (!hdev_is_powered(hdev)) + return; + + if (mgmt_powering_down(hdev)) + return; + + if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || + disconnected_whitelist_entries(hdev)) + scan = SCAN_PAGE; + else + scan = SCAN_DISABLED; + + if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) + return; + + if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + scan |= SCAN_INQUIRY; + + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +} + +void hci_update_page_scan(struct hci_dev *hdev) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_update_page_scan(&req); + hci_req_run(&req, NULL); +} + +/* This function controls the background scanning based on hdev->pend_le_conns + * list. If there are pending LE connection we start the background scanning, + * otherwise we stop it. + * + * This function requires the caller holds hdev->lock. + */ +void __hci_update_background_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn *conn; + + if (!test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags) || + test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || + test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + return; + + /* No point in doing scanning if LE support hasn't been enabled */ + if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + return; + + /* If discovery is active don't interfere with it */ + if (hdev->discovery.state != DISCOVERY_STOPPED) + return; + + /* Reset RSSI and UUID filters when starting background scanning + * since these filters are meant for service discovery only. + * + * The Start Discovery and Start Service Discovery operations + * ensure to set proper values for RSSI threshold and UUID + * filter list. So it is safe to just reset them here. + */ + hci_discovery_filter_clear(hdev); + + if (list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports)) { + /* If there is no pending LE connections or devices + * to be scanned for, we should stop the background + * scanning. + */ + + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; + + hci_req_add_le_scan_disable(req); + + BT_DBG("%s stopping background scanning", hdev->name); + } else { + /* If there is at least one pending LE connection, we should + * keep the background scan running. + */ + + /* If controller is connecting, we should not start scanning + * since some controllers are not able to scan and connect at + * the same time. + */ + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + return; + + /* If controller is currently scanning, we stop it to ensure we + * don't miss any advertising (due to duplicates filter). + */ + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + hci_req_add_le_scan_disable(req); + + hci_req_add_le_passive_scan(req); + + BT_DBG("%s starting background scanning", hdev->name); + } +} + +static void update_background_scan_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + if (status) + BT_DBG("HCI request failed to update background scanning: " + "status 0x%2.2x", status); +} + +void hci_update_background_scan(struct hci_dev *hdev) +{ + int err; + struct hci_request req; + + hci_req_init(&req, hdev); + + __hci_update_background_scan(&req); + + err = hci_req_run(&req, update_background_scan_complete); + if (err && err != -ENODATA) + BT_ERR("Failed to run HCI request: err %d", err); +} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h new file mode 100644 index 000000000000..adf074d33544 --- /dev/null +++ b/net/bluetooth/hci_request.h @@ -0,0 +1,54 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; +}; + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev); +int hci_req_run(struct hci_request *req, hci_req_complete_t complete); +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param); +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); + +struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param); + +void hci_req_add_le_scan_disable(struct hci_request *req); +void hci_req_add_le_passive_scan(struct hci_request *req); + +void hci_update_page_scan(struct hci_dev *hdev); +void __hci_update_page_scan(struct hci_request *req); + +int hci_update_random_address(struct hci_request *req, bool require_privacy, + u8 *own_addr_type); + +void hci_update_background_scan(struct hci_dev *hdev); +void __hci_update_background_scan(struct hci_request *req); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2c245fdf319a..1d65c5be7c82 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -216,11 +216,39 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) read_unlock(&hci_sk_list.lock); } +static void queue_monitor_skb(struct sk_buff *skb) +{ + struct sock *sk; + + BT_DBG("len %d", skb->len); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *nskb; + + if (sk->sk_state != BT_BOUND) + continue; + + if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) + continue; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Send frame to monitor socket */ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { - struct sock *sk; struct sk_buff *skb_copy = NULL; + struct hci_mon_hdr *hdr; __le16 opcode; if (!atomic_read(&monitor_promisc)) @@ -251,74 +279,21 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) return; } - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) - continue; - - if (!skb_copy) { - struct hci_mon_hdr *hdr; - - /* Create a private copy with headroom */ - skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, - GFP_ATOMIC, true); - if (!skb_copy) - continue; - - /* Put header before the data */ - hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); - hdr->opcode = opcode; - hdr->index = cpu_to_le16(hdev->id); - hdr->len = cpu_to_le16(skb->len); - } - - nskb = skb_clone(skb_copy, GFP_ATOMIC); - if (!nskb) - continue; - - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } + /* Create a private copy with headroom */ + skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, GFP_ATOMIC, true); + if (!skb_copy) + return; - read_unlock(&hci_sk_list.lock); + /* Put header before the data */ + hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); + hdr->opcode = opcode; + hdr->index = cpu_to_le16(hdev->id); + hdr->len = cpu_to_le16(skb->len); + queue_monitor_skb(skb_copy); kfree_skb(skb_copy); } -static void send_monitor_event(struct sk_buff *skb) -{ - struct sock *sk; - - BT_DBG("len %d", skb->len); - - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) - continue; - - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - continue; - - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } - - read_unlock(&hci_sk_list.lock); -} - static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -422,7 +397,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) skb = create_monitor_event(hdev, event); if (skb) { - send_monitor_event(skb); + queue_monitor_skb(skb); kfree_skb(skb); } } @@ -1230,6 +1205,8 @@ int __init hci_sock_init(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_hci) > sizeof(struct sockaddr)); + err = proto_register(&hci_sk_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index cc25d0b74b36..07348e142f16 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1314,13 +1314,14 @@ int hidp_connection_add(struct hidp_connadd_req *req, { struct hidp_session *session; struct l2cap_conn *conn; - struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan; + struct l2cap_chan *chan; int ret; ret = hidp_verify_sockets(ctrl_sock, intr_sock); if (ret) return ret; + chan = l2cap_pi(ctrl_sock->sk)->chan; conn = NULL; l2cap_chan_lock(chan); if (chan->conn) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a2b6dfa38a0c..6ba33f9631e8 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -63,10 +63,10 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event); -static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type) +static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type) { - if (hcon->type == LE_LINK) { - if (type == ADDR_LE_DEV_PUBLIC) + if (link_type == LE_LINK) { + if (bdaddr_type == ADDR_LE_DEV_PUBLIC) return BDADDR_LE_PUBLIC; else return BDADDR_LE_RANDOM; @@ -75,6 +75,16 @@ static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type) return BDADDR_BREDR; } +static inline u8 bdaddr_src_type(struct hci_conn *hcon) +{ + return bdaddr_type(hcon->type, hcon->src_type); +} + +static inline u8 bdaddr_dst_type(struct hci_conn *hcon) +{ + return bdaddr_type(hcon->type, hcon->dst_type); +} + /* ---- L2CAP channels ---- */ static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, @@ -646,7 +656,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); bacpy(&chan->dst, &hcon->dst); - chan->dst_type = bdaddr_type(hcon, hcon->dst_type); + chan->dst_type = bdaddr_dst_type(hcon); l2cap_chan_unlock(chan); } @@ -3790,8 +3800,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); - chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); - chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->local_amp_id = amp_id; @@ -5441,8 +5451,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); - chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); - chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->omtu = mtu; @@ -6881,7 +6891,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) */ if (hcon->type == LE_LINK && hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst, - bdaddr_type(hcon, hcon->dst_type))) { + bdaddr_dst_type(hcon))) { kfree_skb(skb); return; } @@ -6966,8 +6976,9 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags)) conn->local_fixed_chan |= L2CAP_FC_A2MP; - if (bredr_sc_enabled(hcon->hdev) && - test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) + if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) && + (bredr_sc_enabled(hcon->hdev) || + test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags))) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); @@ -7122,7 +7133,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, /* Update source addr of the socket */ bacpy(&chan->src, &hcon->src); - chan->src_type = bdaddr_type(hcon, hcon->src_type); + chan->src_type = bdaddr_src_type(hcon); __l2cap_chan_add(conn, chan); @@ -7196,8 +7207,10 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) * global list (by passing NULL as first parameter). */ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, - bdaddr_t *src, u8 link_type) + struct hci_conn *hcon) { + u8 src_type = bdaddr_src_type(hcon); + read_lock(&chan_list_lock); if (c) @@ -7210,11 +7223,9 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, continue; if (c->state != BT_LISTEN) continue; - if (bacmp(&c->src, src) && bacmp(&c->src, BDADDR_ANY)) + if (bacmp(&c->src, &hcon->src) && bacmp(&c->src, BDADDR_ANY)) continue; - if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR) - continue; - if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) + if (src_type != c->src_type) continue; l2cap_chan_hold(c); @@ -7245,7 +7256,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) if (!conn) return; - dst_type = bdaddr_type(hcon, hcon->dst_type); + dst_type = bdaddr_dst_type(hcon); /* If device is blocked, do not create channels for it */ if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type)) @@ -7256,7 +7267,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) * we left off, because the list lock would prevent calling the * potentially sleeping l2cap_chan_lock() function. */ - pchan = l2cap_global_fixed_chan(NULL, &hdev->bdaddr, hcon->type); + pchan = l2cap_global_fixed_chan(NULL, hcon); while (pchan) { struct l2cap_chan *chan, *next; @@ -7269,7 +7280,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) if (chan) { bacpy(&chan->src, &hcon->src); bacpy(&chan->dst, &hcon->dst); - chan->src_type = bdaddr_type(hcon, hcon->src_type); + chan->src_type = bdaddr_src_type(hcon); chan->dst_type = dst_type; __l2cap_chan_add(conn, chan); @@ -7277,8 +7288,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) l2cap_chan_unlock(pchan); next: - next = l2cap_global_fixed_chan(pchan, &hdev->bdaddr, - hcon->type); + next = l2cap_global_fixed_chan(pchan, hcon); l2cap_chan_put(pchan); pchan = next; } @@ -7526,8 +7536,8 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) read_lock(&chan_list_lock); list_for_each_entry(c, &chan_list, global_l) { - seq_printf(f, "%pMR %pMR %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", - &c->src, &c->dst, + seq_printf(f, "%pMR (%u) %pMR (%u) %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", + &c->src, c->src_type, &c->dst, c->dst_type, c->state, __le16_to_cpu(c->psm), c->scid, c->dcid, c->imtu, c->omtu, c->sec_level, c->mode); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f65caf41953f..60694f0f4c73 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -302,7 +302,7 @@ done: static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; @@ -316,8 +316,6 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -338,10 +336,11 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock_nested(sk, L2CAP_NESTING_PARENT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1614,6 +1613,8 @@ int __init l2cap_init_sockets(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_l2) > sizeof(struct sockaddr)); + err = proto_register(&l2cap_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7384f1161336..9ec5390c85eb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -32,6 +32,7 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" #include "smp.h" #define MGMT_VERSION 1 @@ -130,6 +131,9 @@ static const u16 mgmt_events[] = { #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) +#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ + "\x00\x00\x00\x00\x00\x00\x00\x00" + struct pending_cmd { struct list_head list; u16 opcode; @@ -138,7 +142,7 @@ struct pending_cmd { size_t param_len; struct sock *sk; void *user_data; - void (*cmd_complete)(struct pending_cmd *cmd, u8 status); + int (*cmd_complete)(struct pending_cmd *cmd, u8 status); }; /* HCI to MGMT error code conversion table */ @@ -569,8 +573,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_HS; } - if (lmp_sc_capable(hdev) || - test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) + if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; } @@ -1251,7 +1254,7 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) sizeof(settings)); } -static void clean_up_hci_complete(struct hci_dev *hdev, u8 status) +static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("%s status 0x%02x", hdev->name, status); @@ -1486,16 +1489,16 @@ static void cmd_complete_rsp(struct pending_cmd *cmd, void *data) cmd_status_rsp(cmd, data); } -static void generic_cmd_complete(struct pending_cmd *cmd, u8 status) +static int generic_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - cmd->param_len); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, cmd->param_len); } -static void addr_cmd_complete(struct pending_cmd *cmd, u8 status) +static int addr_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - sizeof(struct mgmt_addr_info)); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, + sizeof(struct mgmt_addr_info)); } static u8 mgmt_bredr_support(struct hci_dev *hdev) @@ -1518,7 +1521,8 @@ static u8 mgmt_le_support(struct hci_dev *hdev) return MGMT_STATUS_SUCCESS; } -static void set_discoverable_complete(struct hci_dev *hdev, u8 status) +static void set_discoverable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; struct mgmt_mode *cp; @@ -1566,7 +1570,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status) * entries. */ hci_req_init(&req, hdev); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); update_class(&req); hci_req_run(&req, NULL); @@ -1777,7 +1781,8 @@ static void write_fast_connectable(struct hci_request *req, bool enable) hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); } -static void set_connectable_complete(struct hci_dev *hdev, u8 status) +static void set_connectable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; struct mgmt_mode *cp; @@ -1813,7 +1818,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status) if (conn_changed || discov_changed) { new_settings(hdev, cmd->sk); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); if (discov_changed) mgmt_update_adv_data(hdev); hci_update_background_scan(hdev); @@ -1847,7 +1852,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev, return err; if (changed) { - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); hci_update_background_scan(hdev); return new_settings(hdev, sk); } @@ -2195,16 +2200,18 @@ unlock: return err; } -static void le_enable_complete(struct hci_dev *hdev, u8 status) +static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; + hci_dev_lock(hdev); + if (status) { u8 mgmt_err = mgmt_status(status); mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, &mgmt_err); - return; + goto unlock; } mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); @@ -2222,17 +2229,15 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status) if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { struct hci_request req; - hci_dev_lock(hdev); - hci_req_init(&req, hdev); update_adv_data(&req); update_scan_rsp_data(&req); + __hci_update_background_scan(&req); hci_req_run(&req, NULL); - - hci_update_background_scan(hdev); - - hci_dev_unlock(hdev); } + +unlock: + hci_dev_unlock(hdev); } static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) @@ -2385,7 +2390,7 @@ unlock: hci_dev_unlock(hdev); } -static void add_uuid_complete(struct hci_dev *hdev, u8 status) +static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -2464,7 +2469,7 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; } -static void remove_uuid_complete(struct hci_dev *hdev, u8 status) +static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -2549,7 +2554,7 @@ unlock: return err; } -static void set_class_complete(struct hci_dev *hdev, u8 status) +static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -3097,16 +3102,17 @@ static struct pending_cmd *find_pairing(struct hci_conn *conn) return NULL; } -static void pairing_complete(struct pending_cmd *cmd, u8 status) +static int pairing_complete(struct pending_cmd *cmd, u8 status) { struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; + int err; bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, + &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ conn->connect_cfm_cb = NULL; @@ -3114,14 +3120,15 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) conn->disconn_cfm_cb = NULL; hci_conn_drop(conn); - hci_conn_put(conn); - - mgmt_pending_remove(cmd); /* The device is paired so there is no need to remove * its connection parameters anymore. */ clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags); + + hci_conn_put(conn); + + return err; } void mgmt_smp_complete(struct hci_conn *conn, bool complete) @@ -3130,8 +3137,10 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete) struct pending_cmd *cmd; cmd = find_pairing(conn); - if (cmd) + if (cmd) { cmd->cmd_complete(cmd, status); + mgmt_pending_remove(cmd); + } } static void pairing_complete_cb(struct hci_conn *conn, u8 status) @@ -3141,10 +3150,13 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status) BT_DBG("status %u", status); cmd = find_pairing(conn); - if (!cmd) + if (!cmd) { BT_DBG("Unable to find a pending command"); - else - cmd->cmd_complete(cmd, mgmt_status(status)); + return; + } + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); } static void le_pairing_complete_cb(struct hci_conn *conn, u8 status) @@ -3157,10 +3169,13 @@ static void le_pairing_complete_cb(struct hci_conn *conn, u8 status) return; cmd = find_pairing(conn); - if (!cmd) + if (!cmd) { BT_DBG("Unable to find a pending command"); - else - cmd->cmd_complete(cmd, mgmt_status(status)); + return; + } + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); } static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, @@ -3274,8 +3289,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, cmd->user_data = hci_conn_get(conn); if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) && - hci_conn_security(conn, sec_level, auth_type, true)) - pairing_complete(cmd, 0); + hci_conn_security(conn, sec_level, auth_type, true)) { + cmd->cmd_complete(cmd, 0); + mgmt_pending_remove(cmd); + } err = 0; @@ -3317,7 +3334,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - pairing_complete(cmd, MGMT_STATUS_CANCELLED); + cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED); + mgmt_pending_remove(cmd); err = cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0, addr, sizeof(*addr)); @@ -3470,7 +3488,7 @@ static void update_name(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); } -static void set_name_complete(struct hci_dev *hdev, u8 status) +static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; struct pending_cmd *cmd; @@ -3618,10 +3636,16 @@ unlock: static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { + struct mgmt_addr_info *addr = data; int err; BT_DBG("%s ", hdev->name); + if (!bdaddr_type_is_valid(addr->type)) + return cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, addr, + sizeof(*addr)); + hci_dev_lock(hdev); if (len == MGMT_ADD_REMOTE_OOB_DATA_SIZE) { @@ -3648,28 +3672,53 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, status, &cp->addr, sizeof(cp->addr)); } else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) { struct mgmt_cp_add_remote_oob_ext_data *cp = data; - u8 *rand192, *hash192; + u8 *rand192, *hash192, *rand256, *hash256; u8 status; - if (cp->addr.type != BDADDR_BREDR) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); - goto unlock; - } - if (bdaddr_type_is_le(cp->addr.type)) { + /* Enforce zero-valued 192-bit parameters as + * long as legacy SMP OOB isn't implemented. + */ + if (memcmp(cp->rand192, ZERO_KEY, 16) || + memcmp(cp->hash192, ZERO_KEY, 16)) { + err = cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + addr, sizeof(*addr)); + goto unlock; + } + rand192 = NULL; hash192 = NULL; } else { - rand192 = cp->rand192; - hash192 = cp->hash192; + /* In case one of the P-192 values is set to zero, + * then just disable OOB data for P-192. + */ + if (!memcmp(cp->rand192, ZERO_KEY, 16) || + !memcmp(cp->hash192, ZERO_KEY, 16)) { + rand192 = NULL; + hash192 = NULL; + } else { + rand192 = cp->rand192; + hash192 = cp->hash192; + } + } + + /* In case one of the P-256 values is set to zero, then just + * disable OOB data for P-256. + */ + if (!memcmp(cp->rand256, ZERO_KEY, 16) || + !memcmp(cp->hash256, ZERO_KEY, 16)) { + rand256 = NULL; + hash256 = NULL; + } else { + rand256 = cp->rand256; + hash256 = cp->hash256; } err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->addr.type, hash192, rand192, - cp->hash256, cp->rand256); + hash256, rand256); if (err < 0) status = MGMT_STATUS_FAILED; else @@ -3791,7 +3840,7 @@ static bool trigger_discovery(struct hci_request *req, u8 *status) /* All active scans will be done with either a resolvable * private address (when privacy feature has been enabled) - * or unresolvable private address. + * or non-resolvable private address. */ err = hci_update_random_address(req, true, &own_addr_type); if (err < 0) { @@ -3821,7 +3870,8 @@ static bool trigger_discovery(struct hci_request *req, u8 *status) return true; } -static void start_discovery_complete(struct hci_dev *hdev, u8 status) +static void start_discovery_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; unsigned long timeout; @@ -3846,6 +3896,9 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) hci_discovery_set_state(hdev, DISCOVERY_FINDING); + /* If the scan involves LE scan, pick proper timeout to schedule + * hdev->le_scan_disable that will stop it. + */ switch (hdev->discovery.type) { case DISCOV_TYPE_LE: timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); @@ -3862,9 +3915,23 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) break; } - if (timeout) + if (timeout) { + /* When service discovery is used and the controller has + * a strict duplicate filter, it is important to remember + * the start and duration of the scan. This is required + * for restarting scanning during the discovery phase. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks) && + (hdev->discovery.uuid_count > 0 || + hdev->discovery.rssi != HCI_RSSI_INVALID)) { + hdev->discovery.scan_start = jiffies; + hdev->discovery.scan_duration = timeout; + } + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout); + } unlock: hci_dev_unlock(hdev); @@ -3936,9 +4003,10 @@ failed: return err; } -static void service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) +static int service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, 1); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, 1); } static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, @@ -4049,7 +4117,7 @@ failed: return err; } -static void stop_discovery_complete(struct hci_dev *hdev, u8 status) +static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct pending_cmd *cmd; @@ -4275,16 +4343,19 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, return err; } -static void set_advertising_complete(struct hci_dev *hdev, u8 status) +static void set_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct cmd_lookup match = { NULL, hdev }; + hci_dev_lock(hdev); + if (status) { u8 mgmt_err = mgmt_status(status); mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, cmd_status_rsp, &mgmt_err); - return; + goto unlock; } if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) @@ -4299,6 +4370,9 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status) if (match.sk) sock_put(match.sk); + +unlock: + hci_dev_unlock(hdev); } static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, @@ -4477,7 +4551,8 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return err; } -static void fast_connectable_complete(struct hci_dev *hdev, u8 status) +static void fast_connectable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; @@ -4575,7 +4650,7 @@ unlock: return err; } -static void set_bredr_complete(struct hci_dev *hdev, u8 status) +static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct pending_cmd *cmd; @@ -4659,6 +4734,28 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_REJECTED); goto unlock; + } else { + /* When configuring a dual-mode controller to operate + * with LE only and using a static address, then switching + * BR/EDR back on is not allowed. + * + * Dual-mode controllers shall operate with the public + * address as its identity address for BR/EDR and LE. So + * reject the attempt to create an invalid configuration. + * + * The same restrictions applies when secure connections + * has been enabled. For BR/EDR this is a controller feature + * while for LE it is a host stack feature. This means that + * switching BR/EDR back on when secure connections has been + * enabled is not a supported transaction. + */ + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + (bacmp(&hdev->static_addr, BDADDR_ANY) || + test_bit(HCI_SC_ENABLED, &hdev->dev_flags))) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); + goto unlock; + } } if (mgmt_pending_find(MGMT_OP_SET_BREDR, hdev)) { @@ -4681,7 +4778,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); write_fast_connectable(&req, false); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); /* Since only the advertising data flags will change, there * is no need to update the scan response data. @@ -4697,30 +4794,80 @@ unlock: return err; } +static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + struct mgmt_mode *cp; + + BT_DBG("%s status %u", hdev->name, status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev); + if (!cmd) + goto unlock; + + if (status) { + cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); + goto remove; + } + + cp = cmd->param; + + switch (cp->val) { + case 0x00: + clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); + clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + case 0x01: + set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + case 0x02: + set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + set_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + } + + send_settings_rsp(cmd->sk, MGMT_OP_SET_SECURE_CONN, hdev); + new_settings(hdev, cmd->sk); + +remove: + mgmt_pending_remove(cmd); +unlock: + hci_dev_unlock(hdev); +} + static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct pending_cmd *cmd; + struct hci_request req; u8 val; int err; BT_DBG("request for %s", hdev->name); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && - !lmp_sc_capable(hdev) && !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) + if (!lmp_sc_capable(hdev) && + !test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_NOT_SUPPORTED); + if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + lmp_sc_capable(hdev) && + !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_REJECTED); + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (!hdev_is_powered(hdev) || - (!lmp_sc_capable(hdev) && - !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) || + if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) || !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { bool changed; @@ -4767,17 +4914,14 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_send_cmd(hdev, HCI_OP_WRITE_SC_SUPPORT, 1, &val); + hci_req_init(&req, hdev); + hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, 1, &val); + err = hci_req_run(&req, sc_enable_complete); if (err < 0) { mgmt_pending_remove(cmd); goto failed; } - if (cp->val == 0x02) - set_bit(HCI_SC_ONLY, &hdev->dev_flags); - else - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - failed: hci_dev_unlock(hdev); return err; @@ -5075,10 +5219,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } -static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_conn_info rp; + int err; memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); @@ -5092,14 +5237,17 @@ static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) rp.max_tx_power = HCI_TX_POWER_INVALID; } - cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + &rp, sizeof(rp)); hci_conn_drop(conn); hci_conn_put(conn); + + return err; } -static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status) +static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, + u16 opcode) { struct hci_cp_read_rssi *cp; struct pending_cmd *cmd; @@ -5270,11 +5418,12 @@ unlock: return err; } -static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_clock_info rp; struct hci_dev *hdev; + int err; memset(&rp, 0, sizeof(rp)); memcpy(&rp.addr, &cmd->param, sizeof(rp.addr)); @@ -5294,15 +5443,18 @@ static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) } complete: - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + sizeof(rp)); if (conn) { hci_conn_drop(conn); hci_conn_put(conn); } + + return err; } -static void get_clock_info_complete(struct hci_dev *hdev, u8 status) +static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_cp_read_clock *hci_cp; struct pending_cmd *cmd; @@ -5409,6 +5561,65 @@ unlock: return err; } +static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) +{ + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); + if (!conn) + return false; + + if (conn->dst_type != type) + return false; + + if (conn->state != BT_CONNECTED) + return false; + + return true; +} + +/* This function requires the caller holds hdev->lock */ +static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, + u8 addr_type, u8 auto_connect) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + + params = hci_conn_params_add(hdev, addr, addr_type); + if (!params) + return -EIO; + + if (params->auto_connect == auto_connect) + return 0; + + list_del_init(¶ms->action); + + switch (auto_connect) { + case HCI_AUTO_CONN_DISABLED: + case HCI_AUTO_CONN_LINK_LOSS: + __hci_update_background_scan(req); + break; + case HCI_AUTO_CONN_REPORT: + list_add(¶ms->action, &hdev->pend_le_reports); + __hci_update_background_scan(req); + break; + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + if (!is_connected(hdev, addr, addr_type)) { + list_add(¶ms->action, &hdev->pend_le_conns); + __hci_update_background_scan(req); + } + break; + } + + params->auto_connect = auto_connect; + + BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, + auto_connect); + + return 0; +} + static void device_added(struct sock *sk, struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type, u8 action) { @@ -5421,10 +5632,31 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } +static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_ADD_DEVICE, hdev); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; + struct pending_cmd *cmd; + struct hci_request req; u8 auto_conn, addr_type; int err; @@ -5441,14 +5673,24 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); + hci_req_init(&req, hdev); + hci_dev_lock(hdev); + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + cmd->cmd_complete = addr_cmd_complete; + if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5457,7 +5699,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); goto added; } @@ -5477,19 +5719,25 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, /* If the connection parameters don't exist for this device, * they will be created and configured with defaults. */ - if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type, + if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type, auto_conn) < 0) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_FAILED, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, MGMT_STATUS_FAILED); + mgmt_pending_remove(cmd); goto unlock; } added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + err = hci_req_run(&req, add_device_complete); + if (err < 0) { + /* ENODATA means no HCI commands were needed (e.g. if + * the adapter is powered off). + */ + if (err == -ENODATA) + err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); + mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -5507,24 +5755,55 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); } +static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_REMOVE_DEVICE, hdev); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; + struct pending_cmd *cmd; + struct hci_request req; int err; BT_DBG("%s", hdev->name); + hci_req_init(&req, hdev); + hci_dev_lock(hdev); + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_DEVICE, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + cmd->cmd_complete = addr_cmd_complete; + if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { struct hci_conn_params *params; u8 addr_type; if (!bdaddr_type_is_valid(cp->addr.type)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5533,14 +5812,13 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, &cp->addr.bdaddr, cp->addr.type); if (err) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -5555,23 +5833,23 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (!params) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } list_del(¶ms->action); list_del(¶ms->list); kfree(params); - hci_update_background_scan(hdev); + __hci_update_background_scan(&req); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); } else { @@ -5579,9 +5857,9 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, struct bdaddr_list *b, *btmp; if (cp->addr.type) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5591,7 +5869,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -5604,12 +5882,19 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, BT_DBG("All LE connection parameters were removed"); - hci_update_background_scan(hdev); + __hci_update_background_scan(&req); } complete: - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + err = hci_req_run(&req, remove_device_complete); + if (err < 0) { + /* ENODATA means no HCI commands were needed (e.g. if + * the adapter is powered off). + */ + if (err == -ENODATA) + err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); + mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -6021,8 +6306,9 @@ void mgmt_index_removed(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -static void restart_le_actions(struct hci_dev *hdev) +static void restart_le_actions(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { @@ -6044,18 +6330,25 @@ static void restart_le_actions(struct hci_dev *hdev) } } - hci_update_background_scan(hdev); + __hci_update_background_scan(req); } -static void powered_complete(struct hci_dev *hdev, u8 status) +static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; BT_DBG("status 0x%02x", status); - hci_dev_lock(hdev); + if (!status) { + /* Register the available SMP channels (BR/EDR and LE) only + * when successfully powering on the controller. This late + * registration is required so that LE SMP can clearly + * decide if the public address or static address is used. + */ + smp_register(hdev); + } - restart_le_actions(hdev); + hci_dev_lock(hdev); mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -6076,9 +6369,16 @@ static int powered_update_hci(struct hci_dev *hdev) if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && !lmp_host_ssp_capable(hdev)) { - u8 ssp = 1; + u8 mode = 0x01; - hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp); + hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + + if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { + u8 support = 0x01; + + hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, + sizeof(support), &support); + } } if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && @@ -6109,6 +6409,8 @@ static int powered_update_hci(struct hci_dev *hdev) if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) enable_advertising(&req); + + restart_le_actions(&req); } link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); @@ -6118,7 +6420,7 @@ static int powered_update_hci(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { write_fast_connectable(&req, false); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); update_class(&req); update_name(&req); update_eir(&req); @@ -6130,8 +6432,7 @@ static int powered_update_hci(struct hci_dev *hdev) int mgmt_powered(struct hci_dev *hdev, u8 powered) { struct cmd_lookup match = { NULL, hdev }; - u8 status_not_powered = MGMT_STATUS_NOT_POWERED; - u8 zero_cod[] = { 0, 0, 0 }; + u8 status, zero_cod[] = { 0, 0, 0 }; int err; if (!test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -6147,7 +6448,20 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) } mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status_not_powered); + + /* If the power off is because of hdev unregistration let + * use the appropriate INVALID_INDEX status. Otherwise use + * NOT_POWERED. We cover both scenarios here since later in + * mgmt_index_removed() any hci_conn callbacks will have already + * been triggered, potentially causing misleading DISCONNECTED + * status responses. + */ + if (test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + status = MGMT_STATUS_INVALID_INDEX; + else + status = MGMT_STATUS_NOT_POWERED; + + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, @@ -6681,8 +6995,10 @@ void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status) mgmt_event(MGMT_EV_AUTH_FAILED, conn->hdev, &ev, sizeof(ev), cmd ? cmd->sk : NULL); - if (cmd) - pairing_complete(cmd, status); + if (cmd) { + cmd->cmd_complete(cmd, status); + mgmt_pending_remove(cmd); + } } void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) @@ -6782,43 +7098,6 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_run(&req, NULL); } -void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) -{ - struct cmd_lookup match = { NULL, hdev }; - bool changed = false; - - if (status) { - u8 mgmt_err = mgmt_status(status); - - if (enable) { - if (test_and_clear_bit(HCI_SC_ENABLED, - &hdev->dev_flags)) - new_settings(hdev, NULL); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - } - - mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev, - cmd_status_rsp, &mgmt_err); - return; - } - - if (enable) { - changed = !test_and_set_bit(HCI_SC_ENABLED, &hdev->dev_flags); - } else { - changed = test_and_clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - } - - mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev, - settings_rsp, &match); - - if (changed) - new_settings(hdev, match.sk); - - if (match.sk) - sock_put(match.sk); -} - static void sk_lookup(struct pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -6889,28 +7168,21 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, mgmt_status(status)); } else { - if (bredr_sc_enabled(hdev) && hash256 && rand256) { - struct mgmt_rp_read_local_oob_ext_data rp; + struct mgmt_rp_read_local_oob_data rp; + size_t rp_size = sizeof(rp); - memcpy(rp.hash192, hash192, sizeof(rp.hash192)); - memcpy(rp.rand192, rand192, sizeof(rp.rand192)); + memcpy(rp.hash192, hash192, sizeof(rp.hash192)); + memcpy(rp.rand192, rand192, sizeof(rp.rand192)); + if (bredr_sc_enabled(hdev) && hash256 && rand256) { memcpy(rp.hash256, hash256, sizeof(rp.hash256)); memcpy(rp.rand256, rand256, sizeof(rp.rand256)); - - cmd_complete(cmd->sk, hdev->id, - MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, sizeof(rp)); } else { - struct mgmt_rp_read_local_oob_data rp; - - memcpy(rp.hash, hash192, sizeof(rp.hash)); - memcpy(rp.rand, rand192, sizeof(rp.rand)); - - cmd_complete(cmd->sk, hdev->id, - MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, sizeof(rp)); + rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256); } + + cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, 0, + &rp, rp_size); } mgmt_pending_remove(cmd); @@ -6983,6 +7255,21 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) return false; } +static void restart_le_scan(struct hci_dev *hdev) +{ + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; + + if (time_after(jiffies + DISCOV_LE_RESTART_DELAY, + hdev->discovery.scan_start + + hdev->discovery.scan_duration)) + return; + + queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart, + DISCOV_LE_RESTART_DELAY); +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) @@ -7005,14 +7292,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, /* When using service discovery with a RSSI threshold, then check * if such a RSSI threshold is specified. If a RSSI threshold has - * been specified, then all results with a RSSI smaller than the - * RSSI threshold will be dropped. + * been specified, and HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, + * then all results with a RSSI smaller than the RSSI threshold will be + * dropped. If the quirk is set, let it through for further processing, + * as we might need to restart the scan. * * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, * the results are also dropped. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && - (rssi < hdev->discovery.rssi || rssi == HCI_RSSI_INVALID)) + (rssi == HCI_RSSI_INVALID || + (rssi < hdev->discovery.rssi && + !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) return; /* Make sure that the buffer is big enough. The 5 extra bytes @@ -7031,7 +7322,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, * However when using service discovery, the value 127 will be * returned when the RSSI is not available. */ - if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi) + if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi && + link_type == ACL_LINK) rssi = 0; bacpy(&ev->addr.bdaddr, bdaddr); @@ -7050,10 +7342,20 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, match = eir_has_uuids(eir, eir_len, hdev->discovery.uuid_count, hdev->discovery.uuids); - if (!match) - return; + /* If duplicate filtering does not report RSSI changes, + * then restart scanning to ensure updated result with + * updated RSSI values. + */ + if (match && test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks)) + restart_le_scan(hdev); + } else { + match = true; } + if (!match && !scan_rsp_len) + return; + /* Copy EIR or advertising data into event */ memcpy(ev->eir, eir, eir_len); } else { @@ -7061,8 +7363,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, * provided, results with empty EIR or advertising data * should be dropped since they do not match any UUID. */ - if (hdev->discovery.uuid_count > 0) + if (hdev->discovery.uuid_count > 0 && !scan_rsp_len) return; + + match = false; } if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV)) @@ -7080,6 +7384,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, hdev->discovery.uuid_count, hdev->discovery.uuids)) return; + + /* If duplicate filtering does not report RSSI changes, + * then restart scanning to ensure updated result with + * updated RSSI values. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks)) + restart_le_scan(hdev); } /* Append scan response data to event */ @@ -7093,6 +7405,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } + /* Validate the reported RSSI value against the RSSI threshold once more + * incase HCI_QUIRK_STRICT_DUPLICATE_FILTER forced a restart of LE + * scanning. + */ + if (hdev->discovery.rssi != HCI_RSSI_INVALID && + rssi < hdev->discovery.rssi) + return; + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); ev_size = sizeof(*ev) + eir_len + scan_rsp_len; @@ -7135,7 +7455,7 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } -static void adv_enable_complete(struct hci_dev *hdev, u8 status) +static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("%s status %u", hdev->name, status); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 73f8c75abe6e..4fea24275b17 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -771,7 +771,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bacpy(&addr.l2_bdaddr, dst); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = cpu_to_le16(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); @@ -2038,7 +2038,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Bind socket */ bacpy(&addr.l2_bdaddr, ba); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = cpu_to_le16(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 2348176401a0..3c6d2c8ac1a4 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -468,7 +468,7 @@ done: static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; @@ -487,8 +487,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -509,10 +507,11 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1058,6 +1057,8 @@ int __init rfcomm_init_sockets(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_rc) > sizeof(struct sockaddr)); + err = proto_register(&rfcomm_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 30e5ea3f1ad3..76321b546e84 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -618,7 +618,7 @@ done: static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; long timeo; int err = 0; @@ -632,8 +632,6 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -654,10 +652,10 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1184,6 +1182,8 @@ int __init sco_init(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_sco) > sizeof(struct sockaddr)); + err = proto_register(&sco_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c new file mode 100644 index 000000000000..378f4064952c --- /dev/null +++ b/net/bluetooth/selftest.c @@ -0,0 +1,244 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "ecc.h" +#include "smp.h" +#include "selftest.h" + +#if IS_ENABLED(CONFIG_BT_SELFTEST_ECDH) + +static const u8 priv_a_1[32] __initconst = { + 0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58, + 0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a, + 0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74, + 0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f, +}; +static const u8 priv_b_1[32] __initconst = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55, +}; +static const u8 pub_a_1[64] __initconst = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20, + + 0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74, + 0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76, + 0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63, + 0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc, +}; +static const u8 pub_b_1[64] __initconst = { + 0x90, 0xa1, 0xaa, 0x2f, 0xb2, 0x77, 0x90, 0x55, + 0x9f, 0xa6, 0x15, 0x86, 0xfd, 0x8a, 0xb5, 0x47, + 0x00, 0x4c, 0x9e, 0xf1, 0x84, 0x22, 0x59, 0x09, + 0x96, 0x1d, 0xaf, 0x1f, 0xf0, 0xf0, 0xa1, 0x1e, + + 0x4a, 0x21, 0xb1, 0x15, 0xf9, 0xaf, 0x89, 0x5f, + 0x76, 0x36, 0x8e, 0xe2, 0x30, 0x11, 0x2d, 0x47, + 0x60, 0x51, 0xb8, 0x9a, 0x3a, 0x70, 0x56, 0x73, + 0x37, 0xad, 0x9d, 0x42, 0x3e, 0xf3, 0x55, 0x4c, +}; +static const u8 dhkey_1[32] __initconst = { + 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, + 0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99, + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec, +}; + +static const u8 priv_a_2[32] __initconst = { + 0x63, 0x76, 0x45, 0xd0, 0xf7, 0x73, 0xac, 0xb7, + 0xff, 0xdd, 0x03, 0x72, 0xb9, 0x72, 0x85, 0xb4, + 0x41, 0xb6, 0x5d, 0x0c, 0x5d, 0x54, 0x84, 0x60, + 0x1a, 0xa3, 0x9a, 0x3c, 0x69, 0x16, 0xa5, 0x06, +}; +static const u8 priv_b_2[32] __initconst = { + 0xba, 0x30, 0x55, 0x50, 0x19, 0xa2, 0xca, 0xa3, + 0xa5, 0x29, 0x08, 0xc6, 0xb5, 0x03, 0x88, 0x7e, + 0x03, 0x2b, 0x50, 0x73, 0xd4, 0x2e, 0x50, 0x97, + 0x64, 0xcd, 0x72, 0x0d, 0x67, 0xa0, 0x9a, 0x52, +}; +static const u8 pub_a_2[64] __initconst = { + 0xdd, 0x78, 0x5c, 0x74, 0x03, 0x9b, 0x7e, 0x98, + 0xcb, 0x94, 0x87, 0x4a, 0xad, 0xfa, 0xf8, 0xd5, + 0x43, 0x3e, 0x5c, 0xaf, 0xea, 0xb5, 0x4c, 0xf4, + 0x9e, 0x80, 0x79, 0x57, 0x7b, 0xa4, 0x31, 0x2c, + + 0x4f, 0x5d, 0x71, 0x43, 0x77, 0x43, 0xf8, 0xea, + 0xd4, 0x3e, 0xbd, 0x17, 0x91, 0x10, 0x21, 0xd0, + 0x1f, 0x87, 0x43, 0x8e, 0x40, 0xe2, 0x52, 0xcd, + 0xbe, 0xdf, 0x98, 0x38, 0x18, 0x12, 0x95, 0x91, +}; +static const u8 pub_b_2[64] __initconst = { + 0xcc, 0x00, 0x65, 0xe1, 0xf5, 0x6c, 0x0d, 0xcf, + 0xec, 0x96, 0x47, 0x20, 0x66, 0xc9, 0xdb, 0x84, + 0x81, 0x75, 0xa8, 0x4d, 0xc0, 0xdf, 0xc7, 0x9d, + 0x1b, 0x3f, 0x3d, 0xf2, 0x3f, 0xe4, 0x65, 0xf4, + + 0x79, 0xb2, 0xec, 0xd8, 0xca, 0x55, 0xa1, 0xa8, + 0x43, 0x4d, 0x6b, 0xca, 0x10, 0xb0, 0xc2, 0x01, + 0xc2, 0x33, 0x4e, 0x16, 0x24, 0xc4, 0xef, 0xee, + 0x99, 0xd8, 0xbb, 0xbc, 0x48, 0xd0, 0x01, 0x02, +}; +static const u8 dhkey_2[32] __initconst = { + 0x69, 0xeb, 0x21, 0x32, 0xf2, 0xc6, 0x05, 0x41, + 0x60, 0x19, 0xcd, 0x5e, 0x94, 0xe1, 0xe6, 0x5f, + 0x33, 0x07, 0xe3, 0x38, 0x4b, 0x68, 0xe5, 0x62, + 0x3f, 0x88, 0x6d, 0x2f, 0x3a, 0x84, 0x85, 0xab, +}; + +static const u8 priv_a_3[32] __initconst = { + 0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58, + 0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a, + 0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74, + 0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f, +}; +static const u8 pub_a_3[64] __initconst = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20, + + 0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74, + 0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76, + 0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63, + 0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc, +}; +static const u8 dhkey_3[32] __initconst = { + 0x2d, 0xab, 0x00, 0x48, 0xcb, 0xb3, 0x7b, 0xda, + 0x55, 0x7b, 0x8b, 0x72, 0xa8, 0x57, 0x87, 0xc3, + 0x87, 0x27, 0x99, 0x32, 0xfc, 0x79, 0x5f, 0xae, + 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70, +}; + +static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], + const u8 pub_a[64], const u8 pub_b[64], + const u8 dhkey[32]) +{ + u8 dhkey_a[32], dhkey_b[32]; + + ecdh_shared_secret(pub_b, priv_a, dhkey_a); + ecdh_shared_secret(pub_a, priv_b, dhkey_b); + + if (memcmp(dhkey_a, dhkey, 32)) + return -EINVAL; + + if (memcmp(dhkey_b, dhkey, 32)) + return -EINVAL; + + return 0; +} + +static int __init test_ecdh(void) +{ + ktime_t calltime, delta, rettime; + unsigned long long duration; + int err; + + calltime = ktime_get(); + + err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); + if (err) { + BT_ERR("ECDH sample 1 failed"); + return err; + } + + err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); + if (err) { + BT_ERR("ECDH sample 2 failed"); + return err; + } + + err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); + if (err) { + BT_ERR("ECDH sample 3 failed"); + return err; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + BT_INFO("ECDH test passed in %llu usecs", duration); + + return 0; +} + +#else + +static inline int test_ecdh(void) +{ + return 0; +} + +#endif + +static int __init run_selftest(void) +{ + int err; + + BT_INFO("Starting self testing"); + + err = test_ecdh(); + if (err) + goto done; + + err = bt_selftest_smp(); + +done: + BT_INFO("Finished self testing"); + + return err; +} + +#if IS_MODULE(CONFIG_BT) + +/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=m and is just a + * wrapper to allow running this at module init. + * + * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all. + */ +int __init bt_selftest(void) +{ + return run_selftest(); +} + +#else + +/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=y and is run + * via late_initcall() as last item in the initialization sequence. + * + * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all. + */ +static int __init bt_selftest_init(void) +{ + return run_selftest(); +} +late_initcall(bt_selftest_init); + +#endif diff --git a/net/bluetooth/selftest.h b/net/bluetooth/selftest.h new file mode 100644 index 000000000000..2aa0a346a913 --- /dev/null +++ b/net/bluetooth/selftest.h @@ -0,0 +1,45 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#if IS_ENABLED(CONFIG_BT_SELFTEST) && IS_MODULE(CONFIG_BT) + +/* When CONFIG_BT_SELFTEST=y and the CONFIG_BT=m, then the self testing + * is run at module loading time. + */ +int bt_selftest(void); + +#else + +/* When CONFIG_BT_SELFTEST=y and CONFIG_BT=y, then the self testing + * is run via late_initcall() to make sure that subsys_initcall() of + * the Bluetooth subsystem and device_initcall() of the Crypto subsystem + * do not clash. + * + * When CONFIG_BT_SELFTEST=n, then this turns into an empty call that + * has no impact. + */ +static inline int bt_selftest(void) +{ + return 0; +} + +#endif diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 6a46252fe66f..c09a821f381d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -20,6 +20,7 @@ SOFTWARE IS DISCLAIMED. */ +#include <linux/debugfs.h> #include <linux/crypto.h> #include <linux/scatterlist.h> #include <crypto/b128ops.h> @@ -223,8 +224,9 @@ static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32], return err; } -static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16], - u8 a1[7], u8 a2[7], u8 mackey[16], u8 ltk[16]) +static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32], + const u8 n1[16], const u8 n2[16], const u8 a1[7], + const u8 a2[7], u8 mackey[16], u8 ltk[16]) { /* The btle, salt and length "magic" values are as defined in * the SMP section of the Bluetooth core specification. In ASCII @@ -276,7 +278,7 @@ static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16], } static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], - const u8 n1[16], u8 n2[16], const u8 r[16], + const u8 n1[16], const u8 n2[16], const u8 r[16], const u8 io_cap[3], const u8 a1[7], const u8 a2[7], u8 res[16]) { @@ -298,7 +300,7 @@ static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], if (err) return err; - BT_DBG("res %16phN", res); + SMP_DBG("res %16phN", res); return err; } @@ -618,7 +620,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, oob_data = hci_find_remote_oob_data(hdev, &hcon->dst, bdaddr_type); - if (oob_data) { + if (oob_data && oob_data->present) { set_bit(SMP_FLAG_OOB, &smp->flags); oob_flag = SMP_OOB_PRESENT; memcpy(smp->rr, oob_data->rand256, 16); @@ -1673,7 +1675,8 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) /* SMP over BR/EDR requires special treatment */ if (conn->hcon->type == ACL_LINK) { /* We must have a BR/EDR SC link */ - if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags)) + if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) && + !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) return SMP_CROSS_TRANSP_NOT_ALLOWED; set_bit(SMP_FLAG_SC, &smp->flags); @@ -2302,8 +2305,12 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, * implementations are not known of and in order to not over * complicate our implementation, simply pretend that we never * received an IRK for such a device. + * + * The Identity Address must also be a Static Random or Public + * Address, which hci_is_identity_address() checks for. */ - if (!bacmp(&info->bdaddr, BDADDR_ANY)) { + if (!bacmp(&info->bdaddr, BDADDR_ANY) || + !hci_is_identity_address(&info->bdaddr, info->addr_type)) { BT_ERR("Ignoring IRK with no identity address"); goto distribute; } @@ -2736,7 +2743,7 @@ static void bredr_pairing(struct l2cap_chan *chan) /* BR/EDR must use Secure Connections for SMP */ if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) return; /* If our LE support is not enabled don't do anything */ @@ -2927,7 +2934,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, 0); if (IS_ERR(tfm_aes)) { BT_ERR("Unable to create crypto context"); - return ERR_PTR(PTR_ERR(tfm_aes)); + return ERR_CAST(tfm_aes); } create_chan: @@ -2943,11 +2950,30 @@ create_chan: l2cap_chan_set_defaults(chan); - bacpy(&chan->src, &hdev->bdaddr); - if (cid == L2CAP_CID_SMP) - chan->src_type = BDADDR_LE_PUBLIC; - else + if (cid == L2CAP_CID_SMP) { + /* If usage of static address is forced or if the devices + * does not have a public address, then listen on the static + * address. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configued, then listen on + * the static address instead. + */ + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + bacpy(&chan->src, &hdev->static_addr); + chan->src_type = BDADDR_LE_RANDOM; + } else { + bacpy(&chan->src, &hdev->bdaddr); + chan->src_type = BDADDR_LE_PUBLIC; + } + } else { + bacpy(&chan->src, &hdev->bdaddr); chan->src_type = BDADDR_BREDR; + } + chan->state = BT_LISTEN; chan->mode = L2CAP_MODE_BASIC; chan->imtu = L2CAP_DEFAULT_MTU; @@ -2974,21 +3000,108 @@ static void smp_del_chan(struct l2cap_chan *chan) l2cap_chan_put(chan); } +static ssize_t force_bredr_smp_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t force_bredr_smp_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + bool enable; + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + if (strtobool(buf, &enable)) + return -EINVAL; + + if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + return -EALREADY; + + if (enable) { + struct l2cap_chan *chan; + + chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR); + if (IS_ERR(chan)) + return PTR_ERR(chan); + + hdev->smp_bredr_data = chan; + } else { + struct l2cap_chan *chan; + + chan = hdev->smp_bredr_data; + hdev->smp_bredr_data = NULL; + smp_del_chan(chan); + } + + change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags); + + return count; +} + +static const struct file_operations force_bredr_smp_fops = { + .open = simple_open, + .read = force_bredr_smp_read, + .write = force_bredr_smp_write, + .llseek = default_llseek, +}; + int smp_register(struct hci_dev *hdev) { struct l2cap_chan *chan; BT_DBG("%s", hdev->name); + /* If the controller does not support Low Energy operation, then + * there is also no need to register any SMP channel. + */ + if (!lmp_le_capable(hdev)) + return 0; + + if (WARN_ON(hdev->smp_data)) { + chan = hdev->smp_data; + hdev->smp_data = NULL; + smp_del_chan(chan); + } + chan = smp_add_cid(hdev, L2CAP_CID_SMP); if (IS_ERR(chan)) return PTR_ERR(chan); hdev->smp_data = chan; - if (!lmp_sc_capable(hdev) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + /* If the controller does not support BR/EDR Secure Connections + * feature, then the BR/EDR SMP channel shall not be present. + * + * To test this with Bluetooth 4.0 controllers, create a debugfs + * switch that allows forcing BR/EDR SMP support and accepting + * cross-transport pairing on non-AES encrypted connections. + */ + if (!lmp_sc_capable(hdev)) { + debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, + hdev, &force_bredr_smp_fops); return 0; + } + + if (WARN_ON(hdev->smp_bredr_data)) { + chan = hdev->smp_bredr_data; + hdev->smp_bredr_data = NULL; + smp_del_chan(chan); + } chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR); if (IS_ERR(chan)) { @@ -3020,3 +3133,331 @@ void smp_unregister(struct hci_dev *hdev) smp_del_chan(chan); } } + +#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) + +static int __init test_ah(struct crypto_blkcipher *tfm_aes) +{ + const u8 irk[16] = { + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 r[3] = { 0x94, 0x81, 0x70 }; + const u8 exp[3] = { 0xaa, 0xfb, 0x0d }; + u8 res[3]; + int err; + + err = smp_ah(tfm_aes, irk, r, res); + if (err) + return err; + + if (memcmp(res, exp, 3)) + return -EINVAL; + + return 0; +} + +static int __init test_c1(struct crypto_blkcipher *tfm_aes) +{ + const u8 k[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + const u8 r[16] = { + 0xe0, 0x2e, 0x70, 0xc6, 0x4e, 0x27, 0x88, 0x63, + 0x0e, 0x6f, 0xad, 0x56, 0x21, 0xd5, 0x83, 0x57 }; + const u8 preq[7] = { 0x01, 0x01, 0x00, 0x00, 0x10, 0x07, 0x07 }; + const u8 pres[7] = { 0x02, 0x03, 0x00, 0x00, 0x08, 0x00, 0x05 }; + const u8 _iat = 0x01; + const u8 _rat = 0x00; + const bdaddr_t ra = { { 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1 } }; + const bdaddr_t ia = { { 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1 } }; + const u8 exp[16] = { + 0x86, 0x3b, 0xf1, 0xbe, 0xc5, 0x4d, 0xa7, 0xd2, + 0xea, 0x88, 0x89, 0x87, 0xef, 0x3f, 0x1e, 0x1e }; + u8 res[16]; + int err; + + err = smp_c1(tfm_aes, k, r, preq, pres, _iat, &ia, _rat, &ra, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_s1(struct crypto_blkcipher *tfm_aes) +{ + const u8 k[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + const u8 r1[16] = { + 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11 }; + const u8 r2[16] = { + 0x00, 0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99 }; + const u8 exp[16] = { + 0x62, 0xa0, 0x6d, 0x79, 0xae, 0x16, 0x42, 0x5b, + 0x9b, 0xf4, 0xb0, 0xe8, 0xf0, 0xe1, 0x1f, 0x9a }; + u8 res[16]; + int err; + + err = smp_s1(tfm_aes, k, r1, r2, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f4(struct crypto_hash *tfm_cmac) +{ + const u8 u[32] = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 }; + const u8 v[32] = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 }; + const u8 x[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 z = 0x00; + const u8 exp[16] = { + 0x2d, 0x87, 0x74, 0xa9, 0xbe, 0xa1, 0xed, 0xf1, + 0x1c, 0xbd, 0xa9, 0x07, 0xf1, 0x16, 0xc9, 0xf2 }; + u8 res[16]; + int err; + + err = smp_f4(tfm_cmac, u, v, x, z, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f5(struct crypto_hash *tfm_cmac) +{ + const u8 w[32] = { + 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, + 0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99, + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 n1[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 n2[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 }; + const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 }; + const u8 exp_ltk[16] = { + 0x38, 0x0a, 0x75, 0x94, 0xb5, 0x22, 0x05, 0x98, + 0x23, 0xcd, 0xd7, 0x69, 0x11, 0x79, 0x86, 0x69 }; + const u8 exp_mackey[16] = { + 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, + 0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 }; + u8 mackey[16], ltk[16]; + int err; + + err = smp_f5(tfm_cmac, w, n1, n2, a1, a2, mackey, ltk); + if (err) + return err; + + if (memcmp(mackey, exp_mackey, 16)) + return -EINVAL; + + if (memcmp(ltk, exp_ltk, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f6(struct crypto_hash *tfm_cmac) +{ + const u8 w[16] = { + 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, + 0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 }; + const u8 n1[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 n2[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u8 r[16] = { + 0xc8, 0x0f, 0x2d, 0x0c, 0xd2, 0x42, 0xda, 0x08, + 0x54, 0xbb, 0x53, 0xb4, 0x3b, 0x34, 0xa3, 0x12 }; + const u8 io_cap[3] = { 0x02, 0x01, 0x01 }; + const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 }; + const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 }; + const u8 exp[16] = { + 0x61, 0x8f, 0x95, 0xda, 0x09, 0x0b, 0x6c, 0xd2, + 0xc5, 0xe8, 0xd0, 0x9c, 0x98, 0x73, 0xc4, 0xe3 }; + u8 res[16]; + int err; + + err = smp_f6(tfm_cmac, w, n1, n2, r, io_cap, a1, a2, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_g2(struct crypto_hash *tfm_cmac) +{ + const u8 u[32] = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 }; + const u8 v[32] = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 }; + const u8 x[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 y[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u32 exp_val = 0x2f9ed5ba % 1000000; + u32 val; + int err; + + err = smp_g2(tfm_cmac, u, v, x, y, &val); + if (err) + return err; + + if (val != exp_val) + return -EINVAL; + + return 0; +} + +static int __init test_h6(struct crypto_hash *tfm_cmac) +{ + const u8 w[16] = { + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 key_id[4] = { 0x72, 0x62, 0x65, 0x6c }; + const u8 exp[16] = { + 0x99, 0x63, 0xb1, 0x80, 0xe2, 0xa9, 0xd3, 0xe8, + 0x1c, 0xc9, 0x6d, 0xe7, 0x02, 0xe1, 0x9a, 0x2d }; + u8 res[16]; + int err; + + err = smp_h6(tfm_cmac, w, key_id, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init run_selftests(struct crypto_blkcipher *tfm_aes, + struct crypto_hash *tfm_cmac) +{ + ktime_t calltime, delta, rettime; + unsigned long long duration; + int err; + + calltime = ktime_get(); + + err = test_ah(tfm_aes); + if (err) { + BT_ERR("smp_ah test failed"); + return err; + } + + err = test_c1(tfm_aes); + if (err) { + BT_ERR("smp_c1 test failed"); + return err; + } + + err = test_s1(tfm_aes); + if (err) { + BT_ERR("smp_s1 test failed"); + return err; + } + + err = test_f4(tfm_cmac); + if (err) { + BT_ERR("smp_f4 test failed"); + return err; + } + + err = test_f5(tfm_cmac); + if (err) { + BT_ERR("smp_f5 test failed"); + return err; + } + + err = test_f6(tfm_cmac); + if (err) { + BT_ERR("smp_f6 test failed"); + return err; + } + + err = test_g2(tfm_cmac); + if (err) { + BT_ERR("smp_g2 test failed"); + return err; + } + + err = test_h6(tfm_cmac); + if (err) { + BT_ERR("smp_h6 test failed"); + return err; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + BT_INFO("SMP test passed in %llu usecs", duration); + + return 0; +} + +int __init bt_selftest_smp(void) +{ + struct crypto_blkcipher *tfm_aes; + struct crypto_hash *tfm_cmac; + int err; + + tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_aes)) { + BT_ERR("Unable to create ECB crypto context"); + return PTR_ERR(tfm_aes); + } + + tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_cmac)) { + BT_ERR("Unable to create CMAC crypto context"); + crypto_free_blkcipher(tfm_aes); + return PTR_ERR(tfm_cmac); + } + + err = run_selftests(tfm_aes, tfm_cmac); + + crypto_free_hash(tfm_cmac); + crypto_free_blkcipher(tfm_aes); + + return err; +} + +#endif diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 3296bf42ae80..60c5b73fcb4b 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -192,4 +192,17 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa); int smp_register(struct hci_dev *hdev); void smp_unregister(struct hci_dev *hdev); +#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) + +int bt_selftest_smp(void); + +#else + +static inline int bt_selftest_smp(void) +{ + return 0; +} + +#endif + #endif /* __SMP_H */ diff --git a/net/bridge/br.c b/net/bridge/br.c index 44425aff7cba..fb57ab6b24f9 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -19,6 +19,7 @@ #include <linux/llc.h> #include <net/llc.h> #include <net/stp.h> +#include <net/switchdev.h> #include "br_private.h" @@ -120,6 +121,48 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; +static int br_netdev_switch_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + struct netdev_switch_notifier_fdb_info *fdb_info; + int err = NOTIFY_DONE; + + rtnl_lock(); + p = br_port_get_rtnl(dev); + if (!p) + goto out; + + br = p->br; + + switch (event) { + case NETDEV_SWITCH_FDB_ADD: + fdb_info = ptr; + err = br_fdb_external_learn_add(br, p, fdb_info->addr, + fdb_info->vid); + if (err) + err = notifier_from_errno(err); + break; + case NETDEV_SWITCH_FDB_DEL: + fdb_info = ptr; + err = br_fdb_external_learn_del(br, p, fdb_info->addr, + fdb_info->vid); + if (err) + err = notifier_from_errno(err); + break; + } + +out: + rtnl_unlock(); + return err; +} + +static struct notifier_block br_netdev_switch_notifier = { + .notifier_call = br_netdev_switch_event, +}; + static void __net_exit br_net_exit(struct net *net) { struct net_device *dev; @@ -169,10 +212,14 @@ static int __init br_init(void) if (err) goto err_out3; - err = br_netlink_init(); + err = register_netdev_switch_notifier(&br_netdev_switch_notifier); if (err) goto err_out4; + err = br_netlink_init(); + if (err) + goto err_out5; + brioctl_set(br_ioctl_deviceless_stub); #if IS_ENABLED(CONFIG_ATM_LANE) @@ -185,6 +232,8 @@ static int __init br_init(void) return 0; +err_out5: + unregister_netdev_switch_notifier(&br_netdev_switch_notifier); err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: @@ -202,6 +251,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); + unregister_netdev_switch_notifier(&br_netdev_switch_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); unregister_pernet_subsys(&br_net_ops); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cc36e59db7d7..6eb94b58637a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -280,7 +280,7 @@ void br_fdb_cleanup(unsigned long _data) hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { unsigned long this_timer; - if (f->is_static) + if (f->is_static || f->added_by_external_learn) continue; this_timer = f->updated + delay; if (time_before_eq(this_timer, jiffies)) @@ -633,7 +633,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -686,6 +687,9 @@ int br_fdb_dump(struct sk_buff *skb, if (!(dev->priv_flags & IFF_EBRIDGE)) goto out; + if (!filter_dev) + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; @@ -697,7 +701,7 @@ int br_fdb_dump(struct sk_buff *skb, (!f->dst || f->dst->dev != filter_dev)) { if (filter_dev != dev) goto skip; - /* !f->dst is a speacial case for bridge + /* !f->dst is a special case for bridge * It means the MAC belongs to the bridge * Therefore need a little more filtering * we only want to dump the !f->dst case @@ -705,6 +709,8 @@ int br_fdb_dump(struct sk_buff *skb, if (f->dst) goto skip; } + if (!filter_dev && f->dst) + goto skip; if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, @@ -985,26 +991,14 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } } -int br_fdb_external_learn_add(struct net_device *dev, +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct net_bridge_port *p; - struct net_bridge *br; struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; - rtnl_lock(); - - p = br_port_get_rtnl(dev); - if (!p) { - pr_info("bridge: %s not a bridge port\n", dev->name); - err = -EINVAL; - goto err_rtnl_unlock; - } - - br = p->br; - + ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1029,33 +1023,18 @@ int br_fdb_external_learn_add(struct net_device *dev, err_unlock: spin_unlock_bh(&br->hash_lock); -err_rtnl_unlock: - rtnl_unlock(); return err; } -EXPORT_SYMBOL(br_fdb_external_learn_add); -int br_fdb_external_learn_del(struct net_device *dev, +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct net_bridge_port *p; - struct net_bridge *br; struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; - rtnl_lock(); - - p = br_port_get_rtnl(dev); - if (!p) { - pr_info("bridge: %s not a bridge port\n", dev->name); - err = -EINVAL; - goto err_rtnl_unlock; - } - - br = p->br; - + ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1066,9 +1045,6 @@ int br_fdb_external_learn_del(struct net_device *dev, err = -ENOENT; spin_unlock_bh(&br->hash_lock); -err_rtnl_unlock: - rtnl_unlock(); return err; } -EXPORT_SYMBOL(br_fdb_external_learn_del); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ed307db7a12b..b087d278c679 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -424,6 +424,7 @@ netdev_features_t br_features_recompute(struct net_bridge *br, features = netdev_increment_features(features, p->dev->features, mask); } + features = netdev_add_tso_features(features, mask); return features; } @@ -435,10 +436,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) int err = 0; bool changed_addr; - /* Don't allow bridging non-ethernet like devices */ + /* Don't allow bridging non-ethernet like devices, or DSA-enabled + * master network devices since the bridge layer rx_handler prevents + * the DSA fake ethertype handler to be invoked, so we do not strip off + * the DSA switch tag protocol header and the bridge layer just return + * RX_HANDLER_CONSUMED, stopping RX processing for these frames. + */ if ((dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || - !is_valid_ether_addr(dev->dev_addr)) + !is_valid_ether_addr(dev->dev_addr) || + netdev_uses_dsa(dev)) return -EINVAL; /* No bridging of bridges */ diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1f1de715197c..e2aa7be3a847 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -154,7 +154,8 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; if (is_broadcast_ether_addr(dest)) { - if (p->flags & BR_PROXYARP && + if (IS_ENABLED(CONFIG_INET) && + p->flags & BR_PROXYARP && skb->protocol == htons(ETH_P_ARP)) br_do_proxy_arp(skb, br, vid); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 5df05269d17a..409608960899 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -190,7 +190,8 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, nla_nest_end(skb, nest2); nla_nest_end(skb, nest); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; end: nla_nest_end(skb, nest); @@ -276,7 +277,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL); if (err < 0) return err; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index c190d22b6b3d..65728e0dc4ff 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -66,17 +66,17 @@ static int brnf_pass_vlan_indev __read_mostly = 0; #endif #define IS_IP(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) #define IS_IPV6(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) #define IS_ARP(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) static inline __be16 vlan_proto(const struct sk_buff *skb) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) return skb->protocol; else if (skb->protocol == htons(ETH_P_8021Q)) return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -436,11 +436,11 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct struct net_device *vlan, *br; br = bridge_parent(dev); - if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) + if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) return br; vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, - vlan_tx_tag_get(skb) & VLAN_VID_MASK); + skb_vlan_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9f5eb55a4d3a..b93f42c515da 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <net/switchdev.h> #include <uapi/linux/if_bridge.h> #include "br_private.h" @@ -67,6 +68,120 @@ static int br_port_fill_attrs(struct sk_buff *skb, return 0; } +static int br_fill_ifvlaninfo_range(struct sk_buff *skb, u16 vid_start, + u16 vid_end, u16 flags) +{ + struct bridge_vlan_info vinfo; + + if ((vid_end - vid_start) > 0) { + /* add range to skb */ + vinfo.vid = vid_start; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_BEGIN; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + + vinfo.vid = vid_end; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_END; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } else { + vinfo.vid = vid_start; + vinfo.flags = flags; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb, + const struct net_port_vlans *pv) +{ + u16 vid_range_start = 0, vid_range_end = 0; + u16 vid_range_flags = 0; + u16 pvid, vid, flags; + int err = 0; + + /* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan + * and mark vlan info with begin and end flags + * if vlaninfo represents a range + */ + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + flags = 0; + if (vid == pvid) + flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_range_start == 0) { + goto initvars; + } else if ((vid - vid_range_end) == 1 && + flags == vid_range_flags) { + vid_range_end = vid; + continue; + } else { + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + +initvars: + vid_range_start = vid; + vid_range_end = vid; + vid_range_flags = flags; + } + + if (vid_range_start != 0) { + /* Call it once more to send any left over vlans */ + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + + return 0; +} + +static int br_fill_ifvlaninfo(struct sk_buff *skb, + const struct net_port_vlans *pv) +{ + struct bridge_vlan_info vinfo; + u16 pvid, vid; + + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + vinfo.vid = vid; + vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + /* * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. @@ -121,12 +236,11 @@ static int br_fill_ifinfo(struct sk_buff *skb, } /* Check if the VID information is requested */ - if (filter_mask & RTEXT_FILTER_BRVLAN) { - struct nlattr *af; + if ((filter_mask & RTEXT_FILTER_BRVLAN) || + (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { const struct net_port_vlans *pv; - struct bridge_vlan_info vinfo; - u16 vid; - u16 pvid; + struct nlattr *af; + int err; if (port) pv = nbp_get_vlan_info(port); @@ -140,26 +254,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (!af) goto nla_put_failure; - pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - vinfo.vid = vid; - vinfo.flags = 0; - if (vid == pvid) - vinfo.flags |= BRIDGE_VLAN_INFO_PVID; - - if (test_bit(vid, pv->untagged_bitmap)) - vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; - - if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - goto nla_put_failure; - } - + if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) + err = br_fill_ifvlaninfo_compressed(skb, pv); + else + err = br_fill_ifvlaninfo(skb, pv); + if (err) + goto nla_put_failure; nla_nest_end(skb, af); } done: - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -206,69 +312,99 @@ errout: int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask) { - int err = 0; struct net_bridge_port *port = br_port_get_rtnl(dev); - if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) - goto out; + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) && + !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) + return 0; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, - filter_mask, dev); -out: - return err; + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); } -static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { - [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, - [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, - [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, - .len = sizeof(struct bridge_vlan_info), }, -}; +static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, + int cmd, struct bridge_vlan_info *vinfo) +{ + int err = 0; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); + } else { + err = br_vlan_add(br, vinfo->vid, vinfo->flags); + } + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else { + br_vlan_delete(br, vinfo->vid); + } + break; + } + + return err; +} static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, int cmd) { - struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + struct bridge_vlan_info *vinfo_start = NULL; + struct bridge_vlan_info *vinfo = NULL; + struct nlattr *attr; int err = 0; + int rem; - err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); - if (err) - return err; + nla_for_each_nested(attr, af_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) + continue; + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) + return -EINVAL; + vinfo = nla_data(attr); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vinfo_start) + return -EINVAL; + vinfo_start = vinfo; + continue; + } - if (tb[IFLA_BRIDGE_VLAN_INFO]) { - struct bridge_vlan_info *vinfo; + if (vinfo_start) { + struct bridge_vlan_info tmp_vinfo; + int v; - vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) + return -EINVAL; - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) - return -EINVAL; + if (vinfo->vid <= vinfo_start->vid) + return -EINVAL; - switch (cmd) { - case RTM_SETLINK: - if (p) { - err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + memcpy(&tmp_vinfo, vinfo_start, + sizeof(struct bridge_vlan_info)); + + for (v = vinfo_start->vid; v <= vinfo->vid; v++) { + tmp_vinfo.vid = v; + err = br_vlan_info(br, p, cmd, &tmp_vinfo); if (err) break; - - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - err = br_vlan_add(p->br, vinfo->vid, - vinfo->flags); - } else - err = br_vlan_add(br, vinfo->vid, vinfo->flags); - - break; - - case RTM_DELLINK: - if (p) { - nbp_vlan_delete(p, vinfo->vid); - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - br_vlan_delete(p->br, vinfo->vid); - } else - br_vlan_delete(br, vinfo->vid); - break; + } + vinfo_start = NULL; + } else { + err = br_vlan_info(br, p, cmd, vinfo); } + if (err) + break; } return err; @@ -359,13 +495,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) } /* Change state and parameters on port. */ -int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *protinfo; struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; - int err = 0; + int err = 0, ret_offload = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -407,19 +543,28 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) afspec, RTM_SETLINK); } + if (!(flags & BRIDGE_FLAGS_SELF)) { + /* set bridge attributes in hardware if supported + */ + ret_offload = netdev_switch_port_bridge_setlink(dev, nlh, + flags); + if (ret_offload && ret_offload != -EOPNOTSUPP) + br_warn(p->br, "error setting attrs on port %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + } + if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); - out: return err; } /* Delete port information */ -int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *afspec; struct net_bridge_port *p; - int err; + int err = 0, ret_offload = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!afspec) @@ -432,6 +577,21 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) err = br_afspec((struct net_bridge *)netdev_priv(dev), p, afspec, RTM_DELLINK); + if (err == 0) + /* Send RTM_NEWLINK because userspace + * expects RTM_NEWLINK for vlan dels + */ + br_ifinfo_notify(RTM_NEWLINK, p); + + if (!(flags & BRIDGE_FLAGS_SELF)) { + /* del bridge attributes in hardware + */ + ret_offload = netdev_switch_port_bridge_dellink(dev, nlh, + flags); + if (ret_offload && ret_offload != -EOPNOTSUPP) + br_warn(p->br, "error deleting attrs on port %u (%s)\n", + (unsigned int)p->port_no, p->dev->name); + } return err; } @@ -561,7 +721,7 @@ static size_t br_get_link_af_size(const struct net_device *dev) return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); } -static struct rtnl_af_ops br_af_ops = { +static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size, }; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aea3d1339b3f..de0919975a25 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -402,6 +402,10 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); @@ -628,8 +632,8 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) { int err = 0; - if (vlan_tx_tag_present(skb)) - *vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK; + if (skb_vlan_tag_present(skb)) + *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK; else { *vid = 0; err = -EINVAL; @@ -815,8 +819,8 @@ extern struct rtnl_link_ops br_link_ops; int br_netlink_init(void); void br_netlink_fini(void); void br_ifinfo_notify(int event, struct net_bridge_port *port); -int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); -int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); +int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); +int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 97b8ddf57363..13013fe8db24 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -187,7 +187,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * sent from vlan device on the bridge device, it does not have * HW accelerated vlan tag. */ - if (unlikely(!vlan_tx_tag_present(skb) && + if (unlikely(!skb_vlan_tag_present(skb) && skb->protocol == proto)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) @@ -200,7 +200,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* Protocol-mismatch, empty out vlan_tci for new tag */ skb_push(skb, ETH_HLEN); skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb_vlan_tag_get(skb)); if (unlikely(!skb)) return false; diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 8d3f8c7651f0..618568888128 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -45,8 +45,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) /* VLAN encapsulated Type/Length field, given from orig frame */ __be16 encap; - if (vlan_tx_tag_present(skb)) { - TCI = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + TCI = skb_vlan_tag_get(skb); encap = skb->protocol; } else { const struct vlan_hdr *fp; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index d9a8c05d995d..91180a7fc943 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -133,7 +133,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, __be16 ethproto; int verdict, i; - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) ethproto = htons(ETH_P_8021Q); else ethproto = h->h_proto; diff --git a/net/can/gw.c b/net/can/gw.c index 295f62e62eb3..a6f448e18ea8 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -575,7 +575,8 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; cancel: nlmsg_cancel(skb, nlh); diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 7e38b729696a..ba6eb17226da 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -8,6 +8,7 @@ #include <linux/ceph/decode.h> #include <linux/ceph/auth.h> +#include <linux/ceph/messenger.h> #include "crypto.h" #include "auth_x.h" @@ -293,6 +294,11 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, dout("build_authorizer for %s %p\n", ceph_entity_type_name(th->service), au); + ceph_crypto_key_destroy(&au->session_key); + ret = ceph_crypto_key_clone(&au->session_key, &th->session_key); + if (ret) + return ret; + maxlen = sizeof(*msg_a) + sizeof(msg_b) + ceph_x_encrypt_buflen(ticket_blob_len); dout(" need len %d\n", maxlen); @@ -302,8 +308,10 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, } if (!au->buf) { au->buf = ceph_buffer_new(maxlen, GFP_NOFS); - if (!au->buf) + if (!au->buf) { + ceph_crypto_key_destroy(&au->session_key); return -ENOMEM; + } } au->service = th->service; au->secret_id = th->secret_id; @@ -329,7 +337,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, get_random_bytes(&au->nonce, sizeof(au->nonce)); msg_b.struct_v = 1; msg_b.nonce = cpu_to_le64(au->nonce); - ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b), + ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b), p, end - p); if (ret < 0) goto out_buf; @@ -560,6 +568,8 @@ static int ceph_x_create_authorizer( auth->authorizer_buf_len = au->buf->vec.iov_len; auth->authorizer_reply_buf = au->reply_buf; auth->authorizer_reply_buf_len = sizeof (au->reply_buf); + auth->sign_message = ac->ops->sign_message; + auth->check_message_signature = ac->ops->check_message_signature; return 0; } @@ -588,17 +598,13 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len) { struct ceph_x_authorizer *au = (void *)a; - struct ceph_x_ticket_handler *th; int ret = 0; struct ceph_x_authorize_reply reply; void *preply = &reply; void *p = au->reply_buf; void *end = p + sizeof(au->reply_buf); - th = get_ticket_handler(ac, au->service); - if (IS_ERR(th)) - return PTR_ERR(th); - ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply)); + ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply)); if (ret < 0) return ret; if (ret != sizeof(reply)) @@ -618,6 +624,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac, { struct ceph_x_authorizer *au = (void *)a; + ceph_crypto_key_destroy(&au->session_key); ceph_buffer_put(au->buf); kfree(au); } @@ -663,6 +670,59 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, memset(&th->validity, 0, sizeof(th->validity)); } +static int calcu_signature(struct ceph_x_authorizer *au, + struct ceph_msg *msg, __le64 *sig) +{ + int ret; + char tmp_enc[40]; + __le32 tmp[5] = { + cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc, + msg->footer.middle_crc, msg->footer.data_crc, + }; + ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), + tmp_enc, sizeof(tmp_enc)); + if (ret < 0) + return ret; + *sig = *(__le64*)(tmp_enc + 4); + return 0; +} + +static int ceph_x_sign_message(struct ceph_auth_handshake *auth, + struct ceph_msg *msg) +{ + int ret; + if (!auth->authorizer) + return 0; + ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &msg->footer.sig); + if (ret < 0) + return ret; + msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED; + return 0; +} + +static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth, + struct ceph_msg *msg) +{ + __le64 sig_check; + int ret; + + if (!auth->authorizer) + return 0; + ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &sig_check); + if (ret < 0) + return ret; + if (sig_check == msg->footer.sig) + return 0; + if (msg->footer.flags & CEPH_MSG_FOOTER_SIGNED) + dout("ceph_x_check_message_signature %p has signature %llx " + "expect %llx\n", msg, msg->footer.sig, sig_check); + else + dout("ceph_x_check_message_signature %p sender did not set " + "CEPH_MSG_FOOTER_SIGNED\n", msg); + return -EBADMSG; +} static const struct ceph_auth_client_ops ceph_x_ops = { .name = "x", @@ -677,6 +737,8 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, .destroy = ceph_x_destroy, + .sign_message = ceph_x_sign_message, + .check_message_signature = ceph_x_check_message_signature, }; diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 65ee72082d99..e8b7c6917d47 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -26,6 +26,7 @@ struct ceph_x_ticket_handler { struct ceph_x_authorizer { + struct ceph_crypto_key session_key; struct ceph_buffer *buf; unsigned int service; u64 nonce; diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c index 621b5f65407f..add5f921a0ff 100644 --- a/net/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -6,7 +6,7 @@ #include <linux/ceph/buffer.h> #include <linux/ceph/decode.h> -#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */ +#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) { @@ -35,7 +35,7 @@ void ceph_buffer_release(struct kref *kref) struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref); dout("buffer_release %p\n", b); - ceph_kvfree(b->vec.iov_base); + kvfree(b->vec.iov_base); kfree(b); } EXPORT_SYMBOL(ceph_buffer_release); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 58fbfe134f93..5d5ab67f516d 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -184,14 +184,6 @@ void *ceph_kvmalloc(size_t size, gfp_t flags) return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); } -void ceph_kvfree(const void *ptr) -{ - if (is_vmalloc_addr(ptr)) - vfree(ptr); - else - kfree(ptr); -} - static int parse_fsid(const char *str, struct ceph_fsid *fsid) { @@ -245,6 +237,8 @@ enum { Opt_noshare, Opt_crc, Opt_nocrc, + Opt_cephx_require_signatures, + Opt_nocephx_require_signatures, }; static match_table_t opt_tokens = { @@ -263,6 +257,8 @@ static match_table_t opt_tokens = { {Opt_noshare, "noshare"}, {Opt_crc, "crc"}, {Opt_nocrc, "nocrc"}, + {Opt_cephx_require_signatures, "cephx_require_signatures"}, + {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, {-1, NULL} }; @@ -461,6 +457,12 @@ ceph_parse_options(char *options, const char *dev_name, case Opt_nocrc: opt->flags |= CEPH_OPT_NOCRC; break; + case Opt_cephx_require_signatures: + opt->flags &= ~CEPH_OPT_NOMSGAUTH; + break; + case Opt_nocephx_require_signatures: + opt->flags |= CEPH_OPT_NOMSGAUTH; + break; default: BUG_ON(token); @@ -504,6 +506,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, init_waitqueue_head(&client->auth_wq); client->auth_err = 0; + if (!ceph_test_opt(client, NOMSGAUTH)) + required_features |= CEPH_FEATURE_MSG_AUTH; + client->extra_mon_dispatch = NULL; client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT | supported_features; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 8d1653caffdb..33a2f201e460 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1196,8 +1196,18 @@ static void prepare_write_message_footer(struct ceph_connection *con) dout("prepare_write_message_footer %p\n", con); con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; - con->out_kvec[v].iov_len = sizeof(m->footer); - con->out_kvec_bytes += sizeof(m->footer); + if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { + if (con->ops->sign_message) + con->ops->sign_message(con, m); + else + m->footer.sig = 0; + con->out_kvec[v].iov_len = sizeof(m->footer); + con->out_kvec_bytes += sizeof(m->footer); + } else { + m->old_footer.flags = m->footer.flags; + con->out_kvec[v].iov_len = sizeof(m->old_footer); + con->out_kvec_bytes += sizeof(m->old_footer); + } con->out_kvec_left++; con->out_more = m->more_to_follow; con->out_msg_done = true; @@ -2249,6 +2259,7 @@ static int read_partial_message(struct ceph_connection *con) int ret; unsigned int front_len, middle_len, data_len; bool do_datacrc = !con->msgr->nocrc; + bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); u64 seq; u32 crc; @@ -2361,12 +2372,21 @@ static int read_partial_message(struct ceph_connection *con) } /* footer */ - size = sizeof (m->footer); + if (need_sign) + size = sizeof(m->footer); + else + size = sizeof(m->old_footer); + end += size; ret = read_partial(con, end, size, &m->footer); if (ret <= 0) return ret; + if (!need_sign) { + m->footer.flags = m->old_footer.flags; + m->footer.sig = 0; + } + dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", m, front_len, m->footer.front_crc, middle_len, m->footer.middle_crc, data_len, m->footer.data_crc); @@ -2390,6 +2410,12 @@ static int read_partial_message(struct ceph_connection *con) return -EBADMSG; } + if (need_sign && con->ops->check_message_signature && + con->ops->check_message_signature(con, m)) { + pr_err("read_partial_message %p signature check failed\n", m); + return -EBADMSG; + } + return 1; /* done! */ } @@ -3288,7 +3314,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) static void ceph_msg_free(struct ceph_msg *m) { dout("%s %p\n", __func__, m); - ceph_kvfree(m->front.iov_base); + kvfree(m->front.iov_base); kmem_cache_free(ceph_msg_cache, m); } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index a83062ceeec9..f2148e22b148 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -717,7 +717,7 @@ static int get_poolop_reply_buf(const char *src, size_t src_len, if (src_len != sizeof(u32) + dst_len) return -EINVAL; - buf_len = le32_to_cpu(*(u32 *)src); + buf_len = le32_to_cpu(*(__le32 *)src); if (buf_len != dst_len) return -EINVAL; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6f164289bde8..53299c7b0ca4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -292,6 +292,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, ceph_osd_data_release(&op->cls.request_data); ceph_osd_data_release(&op->cls.response_data); break; + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_CMPXATTR: + ceph_osd_data_release(&op->xattr.osd_data); + break; default: break; } @@ -476,8 +480,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && - opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && - opcode != CEPH_OSD_OP_TRUNCATE); + opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE); op->extent.offset = offset; op->extent.length = length; @@ -545,6 +548,39 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, } EXPORT_SYMBOL(osd_req_op_cls_init); +int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, + u16 opcode, const char *name, const void *value, + size_t size, u8 cmp_op, u8 cmp_mode) +{ + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_pagelist *pagelist; + size_t payload_len; + + BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR); + + pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); + if (!pagelist) + return -ENOMEM; + + ceph_pagelist_init(pagelist); + + payload_len = strlen(name); + op->xattr.name_len = payload_len; + ceph_pagelist_append(pagelist, name, payload_len); + + op->xattr.value_len = size; + ceph_pagelist_append(pagelist, value, size); + payload_len += size; + + op->xattr.cmp_op = cmp_op; + op->xattr.cmp_mode = cmp_mode; + + ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); + op->payload_len = payload_len; + return 0; +} +EXPORT_SYMBOL(osd_req_op_xattr_init); + void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag) @@ -626,7 +662,6 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, case CEPH_OSD_OP_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_ZERO: - case CEPH_OSD_OP_DELETE: case CEPH_OSD_OP_TRUNCATE: if (src->op == CEPH_OSD_OP_WRITE) request_data_len = src->extent.length; @@ -676,6 +711,19 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, dst->alloc_hint.expected_write_size = cpu_to_le64(src->alloc_hint.expected_write_size); break; + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_CMPXATTR: + dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); + dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); + dst->xattr.cmp_op = src->xattr.cmp_op; + dst->xattr.cmp_mode = src->xattr.cmp_mode; + osd_data = &src->xattr.osd_data; + ceph_osdc_msg_data_add(req->r_request, osd_data); + request_data_len = osd_data->pagelist->length; + break; + case CEPH_OSD_OP_CREATE: + case CEPH_OSD_OP_DELETE: + break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); @@ -705,7 +753,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, - u64 off, u64 *plen, int num_ops, + u64 off, u64 *plen, + unsigned int which, int num_ops, int opcode, int flags, struct ceph_snap_context *snapc, u32 truncate_seq, @@ -716,13 +765,11 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u64 objnum = 0; u64 objoff = 0; u64 objlen = 0; - u32 object_size; - u64 object_base; int r; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && - opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && - opcode != CEPH_OSD_OP_TRUNCATE); + opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && + opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE); req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, GFP_NOFS); @@ -738,29 +785,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, return ERR_PTR(r); } - object_size = le32_to_cpu(layout->fl_object_size); - object_base = off - objoff; - if (!(truncate_seq == 1 && truncate_size == -1ULL)) { - if (truncate_size <= object_base) { - truncate_size = 0; - } else { - truncate_size -= object_base; - if (truncate_size > object_size) - truncate_size = object_size; + if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { + osd_req_op_init(req, which, opcode); + } else { + u32 object_size = le32_to_cpu(layout->fl_object_size); + u32 object_base = off - objoff; + if (!(truncate_seq == 1 && truncate_size == -1ULL)) { + if (truncate_size <= object_base) { + truncate_size = 0; + } else { + truncate_size -= object_base; + if (truncate_size > object_size) + truncate_size = object_size; + } } + osd_req_op_extent_init(req, which, opcode, objoff, objlen, + truncate_size, truncate_seq); } - osd_req_op_extent_init(req, 0, opcode, objoff, objlen, - truncate_size, truncate_seq); - - /* - * A second op in the ops array means the caller wants to - * also issue a include a 'startsync' command so that the - * osd will flush data quickly. - */ - if (num_ops > 1) - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); - req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout); snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name), @@ -2626,7 +2668,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, vino.snap, off, *plen); - req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1, + req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, truncate_seq, truncate_size, false); @@ -2669,7 +2711,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, int page_align = off & ~PAGE_MASK; BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */ - req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1, + req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, snapc, truncate_seq, truncate_size, @@ -2920,6 +2962,20 @@ static int invalidate_authorizer(struct ceph_connection *con) return ceph_monc_validate_auth(&osdc->client->monc); } +static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) +{ + struct ceph_osd *o = con->private; + struct ceph_auth_handshake *auth = &o->o_auth; + return ceph_auth_sign_message(auth, msg); +} + +static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) +{ + struct ceph_osd *o = con->private; + struct ceph_auth_handshake *auth = &o->o_auth; + return ceph_auth_check_message_signature(auth, msg); +} + static const struct ceph_connection_operations osd_con_ops = { .get = get_osd_con, .put = put_osd_con, @@ -2928,5 +2984,7 @@ static const struct ceph_connection_operations osd_con_ops = { .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .alloc_msg = alloc_msg, + .sign_message = sign_message, + .check_message_signature = check_message_signature, .fault = osd_reset, }; diff --git a/net/core/dev.c b/net/core/dev.c index f411c28d0a66..1d564d68e31a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) static inline struct list_head *ptype_head(const struct packet_type *pt) { if (pt->type == htons(ETH_P_ALL)) - return &ptype_all; + return pt->dev ? &pt->dev->ptype_all : &ptype_all; else - return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; + return pt->dev ? &pt->dev->ptype_specific : + &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; } /** @@ -1694,6 +1695,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); return 0; } @@ -1733,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } +static inline void deliver_ptype_list_skb(struct sk_buff *skb, + struct packet_type **pt, + struct net_device *dev, __be16 type, + struct list_head *ptype_list) +{ + struct packet_type *ptype, *pt_prev = *pt; + + list_for_each_entry_rcu(ptype, ptype_list, list) { + if (ptype->type != type) + continue; + if (pt_prev) + deliver_skb(skb, pt_prev, dev); + pt_prev = ptype; + } + *pt = pt_prev; +} + static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) { if (!ptype->af_packet_priv || !skb->sk) @@ -1756,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) struct packet_type *ptype; struct sk_buff *skb2 = NULL; struct packet_type *pt_prev = NULL; + struct list_head *ptype_list = &ptype_all; rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_all, list) { +again: + list_for_each_entry_rcu(ptype, ptype_list, list) { /* Never send packets back to the socket * they originated from - MvS (miquels@drinkel.ow.org) */ - if ((ptype->dev == dev || !ptype->dev) && - (!skb_loop_sk(ptype, skb))) { - if (pt_prev) { - deliver_skb(skb2, pt_prev, skb->dev); - pt_prev = ptype; - continue; - } + if (skb_loop_sk(ptype, skb)) + continue; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - break; + if (pt_prev) { + deliver_skb(skb2, pt_prev, skb->dev); + pt_prev = ptype; + continue; + } - net_timestamp_set(skb2); + /* need to clone skb, done only once */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + goto out_unlock; - /* skb->nh should be correctly - set by sender, so that the second statement is - just protection against buggy protocols. - */ - skb_reset_mac_header(skb2); - - if (skb_network_header(skb2) < skb2->data || - skb_network_header(skb2) > skb_tail_pointer(skb2)) { - net_crit_ratelimited("protocol %04x is buggy, dev %s\n", - ntohs(skb2->protocol), - dev->name); - skb_reset_network_header(skb2); - } + net_timestamp_set(skb2); - skb2->transport_header = skb2->network_header; - skb2->pkt_type = PACKET_OUTGOING; - pt_prev = ptype; + /* skb->nh should be correctly + * set by sender, so that the second statement is + * just protection against buggy protocols. + */ + skb_reset_mac_header(skb2); + + if (skb_network_header(skb2) < skb2->data || + skb_network_header(skb2) > skb_tail_pointer(skb2)) { + net_crit_ratelimited("protocol %04x is buggy, dev %s\n", + ntohs(skb2->protocol), + dev->name); + skb_reset_network_header(skb2); } + + skb2->transport_header = skb2->network_header; + skb2->pkt_type = PACKET_OUTGOING; + pt_prev = ptype; + } + + if (ptype_list == &ptype_all) { + ptype_list = &dev->ptype_all; + goto again; } +out_unlock: if (pt_prev) pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); rcu_read_unlock(); @@ -2522,7 +2550,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) /* If MPLS offload request, verify we are testing hardware MPLS features * instead of standard features for the netdev. */ -#ifdef CONFIG_NET_MPLS_GSO +#if IS_ENABLED(CONFIG_NET_MPLS_GSO) static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) @@ -2562,7 +2590,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t netif_skb_features(struct sk_buff *skb) { - const struct net_device *dev = skb->dev; + struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; __be16 protocol = skb->protocol; @@ -2570,11 +2598,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, features); + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; + + if (!skb_vlan_tag_present(skb)) { + if (unlikely(protocol == htons(ETH_P_8021Q) || + protocol == htons(ETH_P_8021AD))) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else { + goto finalize; + } } features = netdev_intersect_features(features, @@ -2591,6 +2629,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); +finalize: + if (dev->netdev_ops->ndo_features_check) + features &= dev->netdev_ops->ndo_features_check(skb, dev, + features); + return harmonize_features(skb, features); } EXPORT_SYMBOL(netif_skb_features); @@ -2601,7 +2644,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int len; int rc; - if (!list_empty(&ptype_all)) + if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) dev_queue_xmit_nit(skb, dev); len = skb->len; @@ -2643,7 +2686,7 @@ out: static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { - if (vlan_tx_tag_present(skb) && + if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) skb = __vlan_hwaccel_push_inside(skb); return skb; @@ -2661,19 +2704,12 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - /* If encapsulation offload request, verify we are testing - * hardware encapsulation features instead of standard - * features for the netdev - */ - if (skb->encapsulation) - features &= dev->hw_enc_features; - if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); if (IS_ERR(segs)) { - segs = NULL; + goto out_kfree_skb; } else if (segs) { consume_skb(skb); skb = segs; @@ -3606,7 +3642,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *null_or_dev; bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; @@ -3649,11 +3684,15 @@ another_round: goto skip_taps; list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (!ptype->dev || ptype->dev == skb->dev) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } + if (pt_prev) + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = ptype; + } + + list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) { + if (pt_prev) + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = ptype; } skip_taps: @@ -3667,7 +3706,7 @@ ncls: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; @@ -3699,8 +3738,8 @@ ncls: } } - if (unlikely(vlan_tx_tag_present(skb))) { - if (vlan_tx_tag_get_id(skb)) + if (unlikely(skb_vlan_tag_present(skb))) { + if (skb_vlan_tag_get_id(skb)) skb->pkt_type = PACKET_OTHERHOST; /* Note: we might in the future use prio bits * and set skb->priority like in vlan_do_receive() @@ -3709,19 +3748,21 @@ ncls: skb->vlan_tci = 0; } + type = skb->protocol; + /* deliver only exact match when indicated */ - null_or_dev = deliver_exact ? skb->dev : NULL; + if (likely(!deliver_exact)) { + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &ptype_base[ntohs(type) & + PTYPE_HASH_MASK]); + } - type = skb->protocol; - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && - (ptype->dev == null_or_dev || ptype->dev == skb->dev || - ptype->dev == orig_dev)) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &orig_dev->ptype_specific); + + if (unlikely(skb->dev != orig_dev)) { + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &skb->dev->ptype_specific); } if (pt_prev) { @@ -4557,6 +4598,68 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); +static int napi_poll(struct napi_struct *n, struct list_head *repoll) +{ + void *have; + int work, weight; + + list_del_init(&n->poll_list); + + have = netpoll_poll_lock(n); + + weight = n->weight; + + /* This NAPI_STATE_SCHED test is for avoiding a race + * with netpoll's poll_napi(). Only the entity which + * obtains the lock and sees NAPI_STATE_SCHED set will + * actually make the ->poll() call. Therefore we avoid + * accidentally calling ->poll() when NAPI is not scheduled. + */ + work = 0; + if (test_bit(NAPI_STATE_SCHED, &n->state)) { + work = n->poll(n, weight); + trace_napi_poll(n); + } + + WARN_ON_ONCE(work > weight); + + if (likely(work < weight)) + goto out_unlock; + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(napi_disable_pending(n))) { + napi_complete(n); + goto out_unlock; + } + + if (n->gro_list) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + napi_gro_flush(n, HZ >= 1000); + } + + /* Some drivers may have called napi_schedule + * prior to exhausting their budget. + */ + if (unlikely(!list_empty(&n->poll_list))) { + pr_warn_once("%s: Budget exhausted after napi rescheduled\n", + n->dev ? n->dev->name : "backlog"); + goto out_unlock; + } + + list_add_tail(&n->poll_list, repoll); + +out_unlock: + netpoll_poll_unlock(have); + + return work; +} + static void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -4564,74 +4667,34 @@ static void net_rx_action(struct softirq_action *h) int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); - void *have; local_irq_disable(); list_splice_init(&sd->poll_list, &list); local_irq_enable(); - while (!list_empty(&list)) { + for (;;) { struct napi_struct *n; - int work, weight; - - /* If softirq window is exhausted then punt. - * Allow this to run for 2 jiffies since which will allow - * an average latency of 1.5/HZ. - */ - if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) - goto softnet_break; - - n = list_first_entry(&list, struct napi_struct, poll_list); - list_del_init(&n->poll_list); - - have = netpoll_poll_lock(n); - - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race - * with netpoll's poll_napi(). Only the entity which - * obtains the lock and sees NAPI_STATE_SCHED set will - * actually make the ->poll() call. Therefore we avoid - * accidentally calling ->poll() when NAPI is not scheduled. - */ - work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) { - work = n->poll(n, weight); - trace_napi_poll(n); + if (list_empty(&list)) { + if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) + return; + break; } - WARN_ON_ONCE(work > weight); - - budget -= work; + n = list_first_entry(&list, struct napi_struct, poll_list); + budget -= napi_poll(n, &repoll); - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can - * move the instance around on the list at-will. + /* If softirq window is exhausted then punt. + * Allow this to run for 2 jiffies since which will allow + * an average latency of 1.5/HZ. */ - if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) { - napi_complete(n); - } else { - if (n->gro_list) { - /* flush too old packets - * If HZ < 1000, flush all packets. - */ - napi_gro_flush(n, HZ >= 1000); - } - list_add_tail(&n->poll_list, &repoll); - } + if (unlikely(budget <= 0 || + time_after_eq(jiffies, time_limit))) { + sd->time_squeeze++; + break; } - - netpoll_poll_unlock(have); } - if (!sd_has_rps_ipi_waiting(sd) && - list_empty(&list) && - list_empty(&repoll)) - return; -out: local_irq_disable(); list_splice_tail_init(&sd->poll_list, &list); @@ -4641,12 +4704,6 @@ out: __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); - - return; - -softnet_break: - sd->time_squeeze++; - goto out; } struct netdev_adjacent { @@ -6147,13 +6204,16 @@ static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; + size_t sz = count * sizeof(*rx); BUG_ON(count < 1); - rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); - if (!rx) - return -ENOMEM; - + rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!rx) { + rx = vzalloc(sz); + if (!rx) + return -ENOMEM; + } dev->_rx = rx; for (i = 0; i < count; i++) @@ -6551,6 +6611,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); + BUG_ON(!list_empty(&dev->ptype_all)); + BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); @@ -6733,6 +6795,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->adj_list.lower); INIT_LIST_HEAD(&dev->all_adj_list.upper); INIT_LIST_HEAD(&dev->all_adj_list.lower); + INIT_LIST_HEAD(&dev->ptype_all); + INIT_LIST_HEAD(&dev->ptype_specific); dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -6783,7 +6847,7 @@ void free_netdev(struct net_device *dev) netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS - kfree(dev->_rx); + kvfree(dev->_rx); #endif kfree(rcu_dereference_protected(dev->ingress_queue, 1)); @@ -7047,10 +7111,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } - /* Append NAPI poll list from offline CPU. */ - if (!list_empty(&oldsd->poll_list)) { - list_splice_init(&oldsd->poll_list, &sd->poll_list); - raise_softirq_irqoff(NET_RX_SOFTIRQ); + /* Append NAPI poll list from offline CPU, with one exception : + * process_backlog() must be called by cpu owning percpu backlog. + * We properly handle process_queue & input_pkt_queue later. + */ + while (!list_empty(&oldsd->poll_list)) { + struct napi_struct *napi = list_first_entry(&oldsd->poll_list, + struct napi_struct, + poll_list); + + list_del_init(&napi->poll_list); + if (napi->poll == process_backlog) + napi->state = 0; + else + ____napi_schedule(sd, napi); } raise_softirq_irqoff(NET_TX_SOFTIRQ); @@ -7061,7 +7135,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, netif_rx_internal(skb); input_queue_head_incr(oldsd); } - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { + while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx_internal(skb); input_queue_head_incr(oldsd); } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 550892cd6b3f..91f74f3eb204 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) return err; } +static int __ethtool_get_module_info(struct net_device *dev, + struct ethtool_modinfo *modinfo) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + + if (phydev && phydev->drv && phydev->drv->module_info) + return phydev->drv->module_info(phydev, modinfo); + + if (ops->get_module_info) + return ops->get_module_info(dev, modinfo); + + return -EOPNOTSUPP; +} + static int ethtool_get_module_info(struct net_device *dev, void __user *useraddr) { int ret; struct ethtool_modinfo modinfo; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_module_info) - return -EOPNOTSUPP; if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) return -EFAULT; - ret = ops->get_module_info(dev, &modinfo); + ret = __ethtool_get_module_info(dev, &modinfo); if (ret) return ret; @@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev, return 0; } +static int __ethtool_get_module_eeprom(struct net_device *dev, + struct ethtool_eeprom *ee, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + + if (phydev && phydev->drv && phydev->drv->module_eeprom) + return phydev->drv->module_eeprom(phydev, ee, data); + + if (ops->get_module_eeprom) + return ops->get_module_eeprom(dev, ee, data); + + return -EOPNOTSUPP; +} + static int ethtool_get_module_eeprom(struct net_device *dev, void __user *useraddr) { int ret; struct ethtool_modinfo modinfo; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_module_info || !ops->get_module_eeprom) - return -EOPNOTSUPP; - ret = ops->get_module_info(dev, &modinfo); + ret = __ethtool_get_module_info(dev, &modinfo); if (ret) return ret; - return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom, + return ethtool_get_any_eeprom(dev, useraddr, + __ethtool_get_module_eeprom, modinfo.eeprom_len); } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 185c341fafbd..44706e81b2e0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 45084938c403..2c35c02a931e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -178,6 +178,20 @@ ipv6: return false; } } + case htons(ETH_P_TIPC): { + struct { + __be32 pre[3]; + __be32 srcnode; + } *hdr, _hdr; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + if (!hdr) + return false; + flow->src = hdr->srcnode; + flow->dst = 0; + flow->n_proto = proto; + flow->thoff = (u16)nhoff; + return true; + } case htons(ETH_P_FCOE): flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ @@ -408,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { map = rcu_dereference( - dev_maps->cpu_map[raw_smp_processor_id()]); + dev_maps->cpu_map[skb->sender_cpu - 1]); if (map) { if (map->len == 1) queue_index = map->queues[0]; @@ -454,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, { int queue_index = 0; +#ifdef CONFIG_XPS + if (skb->sender_cpu == 0) + skb->sender_cpu = raw_smp_processor_id() + 1; +#endif + if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8e38f17288d3..70fe9e10ac86 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, goto nla_put_failure; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: read_unlock_bh(&tbl->lock); @@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb, goto errout; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: read_unlock_bh(&tbl->lock); nlmsg_cancel(skb, nlh); @@ -2043,6 +2045,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) case NDTPA_BASE_REACHABLE_TIME: NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, nla_get_msecs(tbp[i])); + /* update reachable_time as well, otherwise, the change will + * only be effective after the next time neigh_periodic_work + * decides to recompute it (can be multiple minutes) + */ + p->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); break; case NDTPA_GC_STALETIME: NEIGH_VAR_SET(p, GC_STALETIME, @@ -2120,7 +2128,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + NLM_F_MULTI) < 0) break; nidx = 0; @@ -2136,7 +2144,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + NLM_F_MULTI) < 0) goto out; next: nidx++; @@ -2196,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2226,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2264,7 +2274,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI) <= 0) { + NLM_F_MULTI) < 0) { rc = -1; goto out; } @@ -2301,7 +2311,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI, tbl) <= 0) { + NLM_F_MULTI, tbl) < 0) { read_unlock_bh(&tbl->lock); rc = -1; goto out; @@ -2921,6 +2931,31 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, return ret; } +static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct neigh_parms *p = ctl->extra2; + int ret; + + if (strcmp(ctl->procname, "base_reachable_time") == 0) + ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); + else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) + ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); + else + ret = -1; + + if (write && ret == 0) { + /* update reachable_time as well, otherwise, the change will + * only be effective after the next time neigh_periodic_work + * decides to recompute it + */ + p->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + } + return ret; +} + #define NEIGH_PARMS_DATA_OFFSET(index) \ (&((struct neigh_parms *) 0)->data[index]) @@ -3047,6 +3082,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; /* ReachableTime (in milliseconds) */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; + } else { + /* Those handlers will update p->reachable_time after + * base_reachable_time(_ms) is set to ensure the new timer starts being + * applied after the next neighbour update instead of waiting for + * neigh_periodic_work to update its value (can be multiple minutes) + * So any handler that replaces them should do this as well + */ + /* ReachableTime */ + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = + neigh_proc_base_reachable_time; + /* ReachableTime (in milliseconds) */ + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = + neigh_proc_base_reachable_time; } /* Don't export sysctls to unprivileged users */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5d5ee8f3e4ff..cb5290b8c428 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -15,6 +15,10 @@ #include <linux/file.h> #include <linux/export.h> #include <linux/user_namespace.h> +#include <linux/net_namespace.h> +#include <linux/rtnetlink.h> +#include <net/sock.h> +#include <net/netlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -144,6 +148,78 @@ static void ops_free_list(const struct pernet_operations *ops, } } +static int alloc_netid(struct net *net, struct net *peer, int reqid) +{ + int min = 0, max = 0; + + ASSERT_RTNL(); + + if (reqid >= 0) { + min = reqid; + max = reqid + 1; + } + + return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); +} + +/* This function is used by idr_for_each(). If net is equal to peer, the + * function returns the id so that idr_for_each() stops. Because we cannot + * returns the id 0 (idr_for_each() will not stop), we return the magic value + * NET_ID_ZERO (-1) for it. + */ +#define NET_ID_ZERO -1 +static int net_eq_idr(int id, void *net, void *peer) +{ + if (net_eq(net, peer)) + return id ? : NET_ID_ZERO; + return 0; +} + +static int __peernet2id(struct net *net, struct net *peer, bool alloc) +{ + int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); + + ASSERT_RTNL(); + + /* Magic value for id 0. */ + if (id == NET_ID_ZERO) + return 0; + if (id > 0) + return id; + + if (alloc) + return alloc_netid(net, peer, -1); + + return -ENOENT; +} + +/* This function returns the id of a peer netns. If no id is assigned, one will + * be allocated and returned. + */ +int peernet2id(struct net *net, struct net *peer) +{ + int id = __peernet2id(net, peer, true); + + return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; +} +EXPORT_SYMBOL(peernet2id); + +struct net *get_net_ns_by_id(struct net *net, int id) +{ + struct net *peer; + + if (id < 0) + return NULL; + + rcu_read_lock(); + peer = idr_find(&net->netns_ids, id); + if (peer) + get_net(peer); + rcu_read_unlock(); + + return peer; +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -158,6 +234,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) atomic_set(&net->passive, 1); net->dev_base_seq = 1; net->user_ns = user_ns; + idr_init(&net->netns_ids); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -288,6 +365,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry(net, &net_kill_list, cleanup_list) { list_del_rcu(&net->list); list_add_tail(&net->exit_list, &net_exit_list); + for_each_net(tmp) { + int id = __peernet2id(tmp, net, false); + + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + } + idr_destroy(&net->netns_ids); + } rtnl_unlock(); @@ -337,17 +422,17 @@ EXPORT_SYMBOL_GPL(__put_net); struct net *get_net_ns_by_fd(int fd) { - struct proc_ns *ei; struct file *file; + struct ns_common *ns; struct net *net; file = proc_ns_fget(fd); if (IS_ERR(file)) return ERR_CAST(file); - ei = get_proc_ns(file_inode(file)); - if (ei->ns_ops == &netns_operations) - net = get_net(ei->ns); + ns = get_proc_ns(file_inode(file)); + if (ns->ops == &netns_operations) + net = get_net(container_of(ns, struct net, ns)); else net = ERR_PTR(-EINVAL); @@ -387,12 +472,15 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid); static __net_init int net_ns_net_init(struct net *net) { - return proc_alloc_inum(&net->proc_inum); +#ifdef CONFIG_NET_NS + net->ns.ops = &netns_operations; +#endif + return ns_alloc_inum(&net->ns); } static __net_exit void net_ns_net_exit(struct net *net) { - proc_free_inum(net->proc_inum); + ns_free_inum(&net->ns); } static struct pernet_operations __net_initdata net_ns_ops = { @@ -400,6 +488,130 @@ static struct pernet_operations __net_initdata net_ns_ops = { .exit = net_ns_net_exit, }; +static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { + [NETNSA_NONE] = { .type = NLA_UNSPEC }, + [NETNSA_NSID] = { .type = NLA_S32 }, + [NETNSA_PID] = { .type = NLA_U32 }, + [NETNSA_FD] = { .type = NLA_U32 }, +}; + +static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[NETNSA_MAX + 1]; + struct net *peer; + int nsid, err; + + err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy); + if (err < 0) + return err; + if (!tb[NETNSA_NSID]) + return -EINVAL; + nsid = nla_get_s32(tb[NETNSA_NSID]); + + if (tb[NETNSA_PID]) + peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); + else if (tb[NETNSA_FD]) + peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); + else + return -EINVAL; + if (IS_ERR(peer)) + return PTR_ERR(peer); + + if (__peernet2id(net, peer, false) >= 0) { + err = -EEXIST; + goto out; + } + + err = alloc_netid(net, peer, nsid); + if (err > 0) + err = 0; +out: + put_net(peer); + return err; +} + +static int rtnl_net_get_size(void) +{ + return NLMSG_ALIGN(sizeof(struct rtgenmsg)) + + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ + ; +} + +static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, + int cmd, struct net *net, struct net *peer) +{ + struct nlmsghdr *nlh; + struct rtgenmsg *rth; + int id; + + ASSERT_RTNL(); + + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); + if (!nlh) + return -EMSGSIZE; + + rth = nlmsg_data(nlh); + rth->rtgen_family = AF_UNSPEC; + + id = __peernet2id(net, peer, false); + if (id < 0) + id = NETNSA_NSID_NOT_ASSIGNED; + if (nla_put_s32(skb, NETNSA_NSID, id)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[NETNSA_MAX + 1]; + struct sk_buff *msg; + int err = -ENOBUFS; + struct net *peer; + + err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy); + if (err < 0) + return err; + if (tb[NETNSA_PID]) + peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); + else if (tb[NETNSA_FD]) + peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); + else + return -EINVAL; + + if (IS_ERR(peer)) + return PTR_ERR(peer); + + msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + RTM_GETNSID, net, peer); + if (err < 0) + goto err_out; + + err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); + goto out; + +err_out: + nlmsg_free(msg); +out: + put_net(peer); + return err; +} + static int __init net_ns_init(void) { struct net_generic *ng; @@ -433,6 +645,9 @@ static int __init net_ns_init(void) register_pernet_subsys(&net_ns_ops); + rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL); + return 0; } @@ -630,7 +845,7 @@ void unregister_pernet_device(struct pernet_operations *ops) EXPORT_SYMBOL_GPL(unregister_pernet_device); #ifdef CONFIG_NET_NS -static void *netns_get(struct task_struct *task) +static struct ns_common *netns_get(struct task_struct *task) { struct net *net = NULL; struct nsproxy *nsproxy; @@ -641,17 +856,22 @@ static void *netns_get(struct task_struct *task) net = get_net(nsproxy->net_ns); task_unlock(task); - return net; + return net ? &net->ns : NULL; +} + +static inline struct net *to_net_ns(struct ns_common *ns) +{ + return container_of(ns, struct net, ns); } -static void netns_put(void *ns) +static void netns_put(struct ns_common *ns) { - put_net(ns); + put_net(to_net_ns(ns)); } -static int netns_install(struct nsproxy *nsproxy, void *ns) +static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) { - struct net *net = ns; + struct net *net = to_net_ns(ns); if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) @@ -662,18 +882,11 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) return 0; } -static unsigned int netns_inum(void *ns) -{ - struct net *net = ns; - return net->proc_inum; -} - const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, - .inum = netns_inum, }; #endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e0ad5d16c9c5..c126a878c47c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, features = netif_skb_features(skb); - if (vlan_tx_tag_present(skb) && + if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) { skb = __vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d06107d36ec8..673cb4c6f391 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -50,6 +50,7 @@ #include <net/arp.h> #include <net/route.h> #include <net/udp.h> +#include <net/tcp.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/fib_rules.h> @@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) for (i = 0; i < RTAX_MAX; i++) { if (metrics[i]) { + if (i == RTAX_CC_ALGO - 1) { + char tmp[TCP_CA_NAME_MAX], *name; + + name = tcp_ca_get_name_by_key(metrics[i], tmp); + if (!name) + continue; + if (nla_put_string(skb, i + 1, name)) + goto nla_put_failure; + } else { + if (nla_put_u32(skb, i + 1, metrics[i])) + goto nla_put_failure; + } valid++; - if (nla_put_u32(skb, i+1, metrics[i])) - goto nla_put_failure; } } @@ -864,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + + nla_total_size(4) /* IFLA_LINK_NETNSID */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ @@ -1158,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (dev->rtnl_link_ops && + dev->rtnl_link_ops->get_link_net) { + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { + int id = peernet2id(dev_net(dev), link_net); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + goto nla_put_failure; + } + } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) goto nla_put_failure; @@ -1188,7 +1212,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, af_spec); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1223,6 +1248,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, + [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1315,7 +1341,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) */ WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err <= 0) + if (err < 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1996,7 +2022,7 @@ replay: struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; - struct net *dest_net; + struct net *dest_net, *link_net = NULL; if (ops) { if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { @@ -2102,7 +2128,18 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb); + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); + + link_net = get_net_ns_by_id(dest_net, id); + if (!link_net) { + err = -EINVAL; + goto out; + } + } + + dev = rtnl_create_link(link_net ? : dest_net, ifname, + name_assign_type, ops, tb); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -2111,7 +2148,7 @@ replay: dev->ifindex = ifm->ifi_index; if (ops->newlink) { - err = ops->newlink(net, dev, tb, data); + err = ops->newlink(link_net ? : net, dev, tb, data); /* Drivers should call free_netdev() in ->destructor * and unregister it on failure after registration * so that device could be finally freed in rtnl_unlock. @@ -2130,9 +2167,19 @@ replay: } } err = rtnl_configure_link(dev, ifm); - if (err < 0) + if (err < 0) { unregister_netdevice(dev); + goto out; + } + + if (link_net) { + err = dev_change_net_namespace(dev, dest_net, ifname); + if (err < 0) + unregister_netdevice(dev); + } out: + if (link_net) + put_net(link_net); put_net(dest_net); return err; } @@ -2315,7 +2362,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2368,6 +2416,11 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm, return err; } + if (vid) { + pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name); + return err; + } + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) err = dev_uc_add_excl(dev, addr); else if (is_multicast_ether_addr(addr)) @@ -2693,10 +2746,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) idx); } - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, idx); + else + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); cops = NULL; } @@ -2792,7 +2846,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_nest_end(skb, protinfo); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -2863,32 +2918,24 @@ static inline size_t bridge_nlmsg_size(void) + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ } -static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +static int rtnl_bridge_notify(struct net_device *dev) { struct net *net = dev_net(dev); - struct net_device *br_dev = netdev_master_upper_dev_get(dev); struct sk_buff *skb; int err = -EOPNOTSUPP; + if (!dev->netdev_ops->ndo_bridge_getlink) + return 0; + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); if (!skb) { err = -ENOMEM; goto errout; } - if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && - br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); - if (err < 0) - goto errout; - } - - if ((flags & BRIDGE_FLAGS_SELF) && - dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); - if (err < 0) - goto errout; - } + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); + if (err < 0) + goto errout; rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; @@ -2906,7 +2953,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; - u16 oflags, flags = 0; + u16 flags = 0; bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) @@ -2936,8 +2983,6 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) } } - oflags = flags; - if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); @@ -2946,7 +2991,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags); if (err) goto out; @@ -2957,17 +3002,20 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (!dev->netdev_ops->ndo_bridge_setlink) err = -EOPNOTSUPP; else - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); - - if (!err) + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh, + flags); + if (!err) { flags &= ~BRIDGE_FLAGS_SELF; + + /* Generate event to notify upper layer of bridge + * change + */ + err = rtnl_bridge_notify(dev); + } } if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); - /* Generate event to notify upper layer of bridge change */ - if (!err) - err = rtnl_bridge_notify(dev, oflags); out: return err; } @@ -2979,7 +3027,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; - u16 oflags, flags = 0; + u16 flags = 0; bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) @@ -3009,8 +3057,6 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) } } - oflags = flags; - if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); @@ -3019,7 +3065,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags); if (err) goto out; @@ -3030,17 +3076,21 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) if (!dev->netdev_ops->ndo_bridge_dellink) err = -EOPNOTSUPP; else - err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh, + flags); - if (!err) + if (!err) { flags &= ~BRIDGE_FLAGS_SELF; + + /* Generate event to notify upper layer of bridge + * change + */ + err = rtnl_bridge_notify(dev); + } } if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); - /* Generate event to notify upper layer of bridge change */ - if (!err) - err = rtnl_bridge_notify(dev, oflags); out: return err; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ae13ef6b3ea7..88c613eab142 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -74,6 +74,8 @@ #include <asm/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> +#include <linux/capability.h> +#include <linux/user_namespace.h> struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; @@ -677,13 +679,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); #endif -/* XXX: IS this still necessary? - JHS */ -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif -#endif } /* Free everything but the sk_buff shell. */ @@ -830,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif +#ifdef CONFIG_XPS + CHECK_SKB_FIELD(sender_cpu); +#endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -3697,11 +3695,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, kfree_skb(skb); } +static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) +{ + bool ret; + + if (likely(sysctl_tstamp_allow_data || tsonly)) + return true; + + read_lock_bh(&sk->sk_callback_lock); + ret = sk->sk_socket && sk->sk_socket->file && + file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); + read_unlock_bh(&sk->sk_callback_lock); + return ret; +} + void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) { struct sock *sk = skb->sk; + if (!skb_may_tx_timestamp(sk, false)) + return; + /* take a reference to prevent skb_orphan() from freeing the socket */ sock_hold(sk); @@ -3717,19 +3732,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; + bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; - if (!sk) + if (!sk || !skb_may_tx_timestamp(sk, tsonly)) return; - if (hwtstamps) - *skb_hwtstamps(orig_skb) = *hwtstamps; + if (tsonly) + skb = alloc_skb(0, GFP_ATOMIC); else - orig_skb->tstamp = ktime_get_real(); - - skb = skb_clone(orig_skb, GFP_ATOMIC); + skb = skb_clone(orig_skb, GFP_ATOMIC); if (!skb) return; + if (tsonly) { + skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; + skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; + } + + if (hwtstamps) + *skb_hwtstamps(skb) = *hwtstamps; + else + skb->tstamp = ktime_get_real(); + __skb_complete_tx_timestamp(skb, sk, tstype); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); @@ -4148,6 +4172,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; + skb->sender_cpu = 0; + skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); @@ -4203,7 +4229,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb) struct vlan_hdr *vhdr; u16 vlan_tci; - if (unlikely(vlan_tx_tag_present(skb))) { + if (unlikely(skb_vlan_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } @@ -4289,7 +4315,7 @@ int skb_vlan_pop(struct sk_buff *skb) __be16 vlan_proto; int err; - if (likely(vlan_tx_tag_present(skb))) { + if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { if (unlikely((skb->protocol != htons(ETH_P_8021Q) && @@ -4319,7 +4345,7 @@ EXPORT_SYMBOL(skb_vlan_pop); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { unsigned int offset = skb->data - skb_mac_header(skb); int err; @@ -4329,7 +4355,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) */ __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb_vlan_tag_get(skb)); if (err) return err; skb->protocol = skb->vlan_proto; diff --git a/net/core/sock.c b/net/core/sock.c index 9a56b2000c3f..93c8b20c91e4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +int sysctl_tstamp_allow_data __read_mostly = 1; + struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; EXPORT_SYMBOL_GPL(memalloc_socks); @@ -840,6 +842,7 @@ set_rcvbuf: ret = -EINVAL; break; } + if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk->sk_protocol == IPPROTO_TCP) { @@ -1731,18 +1734,34 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) } EXPORT_SYMBOL(sock_kmalloc); -/* - * Free an option memory block. +/* Free an option memory block. Note, we actually want the inline + * here as this allows gcc to detect the nullify and fold away the + * condition entirely. */ -void sock_kfree_s(struct sock *sk, void *mem, int size) +static inline void __sock_kfree_s(struct sock *sk, void *mem, int size, + const bool nullify) { if (WARN_ON_ONCE(!mem)) return; - kfree(mem); + if (nullify) + kzfree(mem); + else + kfree(mem); atomic_sub(size, &sk->sk_omem_alloc); } + +void sock_kfree_s(struct sock *sk, void *mem, int size) +{ + __sock_kfree_s(sk, mem, size, false); +} EXPORT_SYMBOL(sock_kfree_s); +void sock_kzfree_s(struct sock *sk, void *mem, int size) +{ + __sock_kfree_s(sk, mem, size, true); +} +EXPORT_SYMBOL(sock_kzfree_s); + /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. I think, these locks should be removed for datagram sockets. */ diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 31baba2a71ce..fde21d19e61b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tstamp_allow_data", + .data = &sysctl_tstamp_allow_data, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one + }, #ifdef CONFIG_RPS { .procname = "rps_sock_flow_entries", diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4400da7739da..b2c26b081134 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -702,7 +702,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || nla_put_u32(skb, IFA_FLAGS, ifa_flags)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index d332aefb0846..df4803437888 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att int type = nla_type(attr); if (type) { - if (type > RTAX_MAX || nla_len(attr) < 4) + if (type > RTAX_MAX || type == RTAX_CC_ALGO || + nla_len(attr) < 4) goto err_inval; fi->fib_metrics[type-1] = nla_get_u32(attr); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index daccc4a36d80..1d7c1256e845 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1616,7 +1616,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0) goto errout; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); @@ -1709,9 +1710,6 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) rt->rt_flags |= RTCF_NOTIFY; err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - - if (err == 0) - goto out_free; if (err < 0) { err = -EMSGSIZE; goto out_free; @@ -1762,7 +1760,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) skb_dst_set(skb, dst_clone(&rt->dst)); if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) <= 0) { + 1, NLM_F_MULTI) < 0) { skb_dst_drop(skb); rcu_read_unlock_bh(); goto done; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 86e3807052e9..1540b506e3e0 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -29,6 +29,7 @@ #include <linux/route.h> /* RTF_xxx */ #include <net/neighbour.h> #include <net/netlink.h> +#include <net/tcp.h> #include <net/dst.h> #include <net/flow.h> #include <net/fib_rules.h> @@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(2) /* RTA_DST */ - + nla_total_size(4); /* RTA_PRIORITY */ + + nla_total_size(4) /* RTA_PRIORITY */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); @@ -365,7 +367,8 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_nest_end(skb, mp_head); } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 322c778487e7..37317149f918 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -879,7 +879,6 @@ static struct platform_driver dsa_driver = { .shutdown = dsa_shutdown, .driver = { .name = "dsa", - .owner = THIS_MODULE, .of_match_table = dsa_of_match_table, .pm = &dsa_pm_ops, }, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 515569ffde8a..589aafd01fc5 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -46,6 +46,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x", ds->index, ds->pd->sw_addr); ds->slave_mii_bus->parent = ds->master_dev; + ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 33a140e15834..238f38d21641 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); + +struct sk_buff **eth_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff *p, **pp = NULL; + struct ethhdr *eh, *eh2; + unsigned int hlen, off_eth; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_eth = skb_gro_offset(skb); + hlen = off_eth + sizeof(*eh); + eh = skb_gro_header_fast(skb, off_eth); + if (skb_gro_header_hard(skb, hlen)) { + eh = skb_gro_header_slow(skb, hlen, off_eth); + if (unlikely(!eh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + eh2 = (struct ethhdr *)(p->data + off_eth); + if (compare_ether_header(eh, eh2)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = eh->h_proto; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, sizeof(*eh)); + skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(eth_gro_receive); + +int eth_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff); + __be16 type = eh->h_proto; + struct packet_offload *ptype; + int err = -ENOSYS; + + if (skb->encapsulation) + skb_set_inner_mac_header(skb, nhoff); + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + + sizeof(struct ethhdr)); + + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(eth_gro_complete); + +static struct packet_offload eth_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_TEB), + .callbacks = { + .gro_receive = eth_gro_receive, + .gro_complete = eth_gro_complete, + }, +}; + +static int __init eth_offload_init(void) +{ + dev_add_offload(ð_packet_offload); + + return 0; +} + +fs_initcall(eth_offload_init); diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h new file mode 100644 index 000000000000..e50f69da78eb --- /dev/null +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -0,0 +1,72 @@ +#ifndef __IEEE802154_6LOWPAN_I_H__ +#define __IEEE802154_6LOWPAN_I_H__ + +#include <linux/list.h> + +#include <net/ieee802154_netdev.h> +#include <net/inet_frag.h> + +struct lowpan_create_arg { + u16 tag; + u16 d_size; + const struct ieee802154_addr *src; + const struct ieee802154_addr *dst; +}; + +/* Equivalent of ipv4 struct ip + */ +struct lowpan_frag_queue { + struct inet_frag_queue q; + + u16 tag; + u16 d_size; + struct ieee802154_addr saddr; + struct ieee802154_addr daddr; +}; + +static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) +{ + switch (a->mode) { + case IEEE802154_ADDR_LONG: + return (((__force u64)a->extended_addr) >> 32) ^ + (((__force u64)a->extended_addr) & 0xffffffff); + case IEEE802154_ADDR_SHORT: + return (__force u32)(a->short_addr); + default: + return 0; + } +} + +struct lowpan_dev_record { + struct net_device *ldev; + struct list_head list; +}; + +/* private device info */ +struct lowpan_dev_info { + struct net_device *real_dev; /* real WPAN device ptr */ + struct mutex dev_list_mtx; /* mutex for list ops */ + u16 fragment_tag; +}; + +static inline struct +lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + +extern struct list_head lowpan_devices; + +int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); +void lowpan_net_frag_exit(void); +int lowpan_net_frag_init(void); + +void lowpan_rx_init(void); +void lowpan_rx_exit(void); + +int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len); +netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev); + +#endif /* __IEEE802154_6LOWPAN_I_H__ */ diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig new file mode 100644 index 000000000000..d24f985b0bfd --- /dev/null +++ b/net/ieee802154/6lowpan/Kconfig @@ -0,0 +1,5 @@ +config IEEE802154_6LOWPAN + tristate "6lowpan support over IEEE 802.15.4" + depends on 6LOWPAN + ---help--- + IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile new file mode 100644 index 000000000000..6bfb270a81a6 --- /dev/null +++ b/net/ieee802154/6lowpan/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o + +ieee802154_6lowpan-y := core.o rx.o reassembly.o tx.o diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c new file mode 100644 index 000000000000..055fbb71ba6f --- /dev/null +++ b/net/ieee802154/6lowpan/core.c @@ -0,0 +1,304 @@ +/* Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/ieee802154.h> + +#include <net/ipv6.h> + +#include "6lowpan_i.h" + +LIST_HEAD(lowpan_devices); +static int lowpan_open_count; + +static __le16 lowpan_get_pan_id(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); +} + +static __le16 lowpan_get_short_addr(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); +} + +static u8 lowpan_get_dsn(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); +} + +static struct header_ops lowpan_header_ops = { + .create = lowpan_header_create, +}; + +static struct lock_class_key lowpan_tx_busylock; +static struct lock_class_key lowpan_netdev_xmit_lock_key; + +static void lowpan_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &lowpan_netdev_xmit_lock_key); +} + +static int lowpan_dev_init(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); + dev->qdisc_tx_busylock = &lowpan_tx_busylock; + return 0; +} + +static const struct net_device_ops lowpan_netdev_ops = { + .ndo_init = lowpan_dev_init, + .ndo_start_xmit = lowpan_xmit, +}; + +static struct ieee802154_mlme_ops lowpan_mlme = { + .get_pan_id = lowpan_get_pan_id, + .get_short_addr = lowpan_get_short_addr, + .get_dsn = lowpan_get_dsn, +}; + +static void lowpan_setup(struct net_device *dev) +{ + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + dev->type = ARPHRD_IEEE802154; + /* Frame Control + Sequence Number + Address fields + Security Header */ + dev->hard_header_len = 2 + 1 + 20 + 14; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = IPV6_MIN_MTU; + dev->tx_queue_len = 0; + dev->flags = IFF_BROADCAST | IFF_MULTICAST; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &lowpan_netdev_ops; + dev->header_ops = &lowpan_header_ops; + dev->ml_priv = &lowpan_mlme; + dev->destructor = free_netdev; +} + +static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) + return -EINVAL; + } + return 0; +} + +static int lowpan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *real_dev; + struct lowpan_dev_record *entry; + int ret; + + ASSERT_RTNL(); + + pr_debug("adding new link\n"); + + if (!tb[IFLA_LINK]) + return -EINVAL; + /* find and hold real wpan device */ + real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + if (real_dev->type != ARPHRD_IEEE802154) { + dev_put(real_dev); + return -EINVAL; + } + + lowpan_dev_info(dev)->real_dev = real_dev; + mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + dev_put(real_dev); + lowpan_dev_info(dev)->real_dev = NULL; + return -ENOMEM; + } + + entry->ldev = dev; + + /* Set the lowpan hardware address to the wpan hardware address. */ + memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, &lowpan_devices); + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + ret = register_netdevice(dev); + if (ret >= 0) { + if (!lowpan_open_count) + lowpan_rx_init(); + lowpan_open_count++; + } + + return ret; +} + +static void lowpan_dellink(struct net_device *dev, struct list_head *head) +{ + struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); + struct net_device *real_dev = lowpan_dev->real_dev; + struct lowpan_dev_record *entry, *tmp; + + ASSERT_RTNL(); + + lowpan_open_count--; + if (!lowpan_open_count) + lowpan_rx_exit(); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (entry->ldev == dev) { + list_del(&entry->list); + kfree(entry); + } + } + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); + + unregister_netdevice_queue(dev, head); + + dev_put(real_dev); +} + +static struct rtnl_link_ops lowpan_link_ops __read_mostly = { + .kind = "lowpan", + .priv_size = sizeof(struct lowpan_dev_info), + .setup = lowpan_setup, + .newlink = lowpan_newlink, + .dellink = lowpan_dellink, + .validate = lowpan_validate, +}; + +static inline int __init lowpan_netlink_init(void) +{ + return rtnl_link_register(&lowpan_link_ops); +} + +static inline void lowpan_netlink_fini(void) +{ + rtnl_link_unregister(&lowpan_link_ops); +} + +static int lowpan_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + LIST_HEAD(del_list); + struct lowpan_dev_record *entry, *tmp; + + if (dev->type != ARPHRD_IEEE802154) + goto out; + + if (event == NETDEV_UNREGISTER) { + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (lowpan_dev_info(entry->ldev)->real_dev == dev) + lowpan_dellink(entry->ldev, &del_list); + } + + unregister_netdevice_many(&del_list); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block lowpan_dev_notifier = { + .notifier_call = lowpan_device_event, +}; + +static int __init lowpan_init_module(void) +{ + int err = 0; + + err = lowpan_net_frag_init(); + if (err < 0) + goto out; + + err = lowpan_netlink_init(); + if (err < 0) + goto out_frag; + + err = register_netdevice_notifier(&lowpan_dev_notifier); + if (err < 0) + goto out_pack; + + return 0; + +out_pack: + lowpan_netlink_fini(); +out_frag: + lowpan_net_frag_exit(); +out: + return err; +} + +static void __exit lowpan_cleanup_module(void) +{ + lowpan_netlink_fini(); + + lowpan_net_frag_exit(); + + unregister_netdevice_notifier(&lowpan_dev_notifier); +} + +module_init(lowpan_init_module); +module_exit(lowpan_cleanup_module); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 9d980ed3ffe2..f46e4d1306f2 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -28,7 +28,7 @@ #include <net/ipv6.h> #include <net/inet_frag.h> -#include "reassembly.h" +#include "6lowpan_i.h" static const char lowpan_frags_cache_name[] = "lowpan-frags"; diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c new file mode 100644 index 000000000000..4be1d289ab2d --- /dev/null +++ b/net/ieee802154/6lowpan/rx.c @@ -0,0 +1,171 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/if_arp.h> + +#include <net/6lowpan.h> +#include <net/ieee802154_netdev.h> + +#include "6lowpan_i.h" + +static int lowpan_give_skb_to_devices(struct sk_buff *skb, + struct net_device *dev) +{ + struct lowpan_dev_record *entry; + struct sk_buff *skb_cp; + int stat = NET_RX_SUCCESS; + + skb->protocol = htons(ETH_P_IPV6); + skb->pkt_type = PACKET_HOST; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { + skb_cp = skb_copy(skb, GFP_ATOMIC); + if (!skb_cp) { + kfree_skb(skb); + rcu_read_unlock(); + return NET_RX_DROP; + } + + skb_cp->dev = entry->ldev; + stat = netif_rx(skb_cp); + if (stat == NET_RX_DROP) + break; + } + rcu_read_unlock(); + + consume_skb(skb); + + return stat; +} + +static int +iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) +{ + u8 iphc0, iphc1; + struct ieee802154_addr_sa sa, da; + void *sap, *dap; + + raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + return -EINVAL; + + if (lowpan_fetch_skb_u8(skb, &iphc0)) + return -EINVAL; + + if (lowpan_fetch_skb_u8(skb, &iphc1)) + return -EINVAL; + + ieee802154_addr_to_sa(&sa, &hdr->source); + ieee802154_addr_to_sa(&da, &hdr->dest); + + if (sa.addr_type == IEEE802154_ADDR_SHORT) + sap = &sa.short_addr; + else + sap = &sa.hwaddr; + + if (da.addr_type == IEEE802154_ADDR_SHORT) + dap = &da.short_addr; + else + dap = &da.hwaddr; + + return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, + IEEE802154_ADDR_LEN, dap, da.addr_type, + IEEE802154_ADDR_LEN, iphc0, iphc1); +} + +static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct ieee802154_hdr hdr; + int ret; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto drop; + + if (!netif_running(dev)) + goto drop_skb; + + if (skb->pkt_type == PACKET_OTHERHOST) + goto drop_skb; + + if (dev->type != ARPHRD_IEEE802154) + goto drop_skb; + + if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) + goto drop_skb; + + /* check that it's our buffer */ + if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(skb, 1); + return lowpan_give_skb_to_devices(skb, NULL); + } else { + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); + if (ret == 1) { + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + } else if (ret == -1) { + return NET_RX_DROP; + } else { + return NET_RX_SUCCESS; + } + case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); + if (ret == 1) { + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + } else if (ret == -1) { + return NET_RX_DROP; + } else { + return NET_RX_SUCCESS; + } + default: + break; + } + } + +drop_skb: + kfree_skb(skb); +drop: + return NET_RX_DROP; +} + +static struct packet_type lowpan_packet_type = { + .type = htons(ETH_P_IEEE802154), + .func = lowpan_rcv, +}; + +void lowpan_rx_init(void) +{ + dev_add_pack(&lowpan_packet_type); +} + +void lowpan_rx_exit(void) +{ + dev_remove_pack(&lowpan_packet_type); +} diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c new file mode 100644 index 000000000000..2349070bd534 --- /dev/null +++ b/net/ieee802154/6lowpan/tx.c @@ -0,0 +1,271 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <net/6lowpan.h> +#include <net/ieee802154_netdev.h> + +#include "6lowpan_i.h" + +/* don't save pan id, it's intra pan */ +struct lowpan_addr { + u8 mode; + union { + /* IPv6 needs big endian here */ + __be64 extended_addr; + __be16 short_addr; + } u; +}; + +struct lowpan_addr_info { + struct lowpan_addr daddr; + struct lowpan_addr saddr; +}; + +static inline struct +lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb) +{ + WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info)); + return (struct lowpan_addr_info *)(skb->data - + sizeof(struct lowpan_addr_info)); +} + +int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) +{ + const u8 *saddr = _saddr; + const u8 *daddr = _daddr; + struct lowpan_addr_info *info; + + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ + if (type != ETH_P_IPV6) + return 0; + + if (!saddr) + saddr = dev->dev_addr; + + raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); + + info = lowpan_skb_priv(skb); + + /* TODO: Currently we only support extended_addr */ + info->daddr.mode = IEEE802154_ADDR_LONG; + memcpy(&info->daddr.u.extended_addr, daddr, + sizeof(info->daddr.u.extended_addr)); + info->saddr.mode = IEEE802154_ADDR_LONG; + memcpy(&info->saddr.u.extended_addr, saddr, + sizeof(info->daddr.u.extended_addr)); + + return 0; +} + +static struct sk_buff* +lowpan_alloc_frag(struct sk_buff *skb, int size, + const struct ieee802154_hdr *master_hdr) +{ + struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; + struct sk_buff *frag; + int rc; + + frag = alloc_skb(real_dev->hard_header_len + + real_dev->needed_tailroom + size, + GFP_ATOMIC); + + if (likely(frag)) { + frag->dev = real_dev; + frag->priority = skb->priority; + skb_reserve(frag, real_dev->hard_header_len); + skb_reset_network_header(frag); + *mac_cb(frag) = *mac_cb(skb); + + rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, + &master_hdr->source, size); + if (rc < 0) { + kfree_skb(frag); + return ERR_PTR(rc); + } + } else { + frag = ERR_PTR(-ENOMEM); + } + + return frag; +} + +static int +lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, + u8 *frag_hdr, int frag_hdrlen, + int offset, int len) +{ + struct sk_buff *frag; + + raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); + + frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); + if (IS_ERR(frag)) + return -PTR_ERR(frag); + + memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); + memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); + + raw_dump_table(__func__, " fragment dump", frag->data, frag->len); + + return dev_queue_xmit(frag); +} + +static int +lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, + const struct ieee802154_hdr *wpan_hdr) +{ + u16 dgram_size, dgram_offset; + __be16 frag_tag; + u8 frag_hdr[5]; + int frag_cap, frag_len, payload_cap, rc; + int skb_unprocessed, skb_offset; + + dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - + skb->mac_len; + frag_tag = htons(lowpan_dev_info(dev)->fragment_tag); + lowpan_dev_info(dev)->fragment_tag++; + + frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); + frag_hdr[1] = dgram_size & 0xff; + memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); + + payload_cap = ieee802154_max_payload(wpan_hdr); + + frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - + skb_network_header_len(skb), 8); + + skb_offset = skb_network_header_len(skb); + skb_unprocessed = skb->len - skb->mac_len - skb_offset; + + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAG1_HEAD_SIZE, 0, + frag_len + skb_network_header_len(skb)); + if (rc) { + pr_debug("%s unable to send FRAG1 packet (tag: %d)", + __func__, ntohs(frag_tag)); + goto err; + } + + frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; + frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; + frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); + + do { + dgram_offset += frag_len; + skb_offset += frag_len; + skb_unprocessed -= frag_len; + frag_len = min(frag_cap, skb_unprocessed); + + frag_hdr[4] = dgram_offset >> 3; + + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAGN_HEAD_SIZE, skb_offset, + frag_len); + if (rc) { + pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", + __func__, ntohs(frag_tag), skb_offset); + goto err; + } + } while (skb_unprocessed > frag_cap); + + consume_skb(skb); + return NET_XMIT_SUCCESS; + +err: + kfree_skb(skb); + return rc; +} + +static int lowpan_header(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee802154_addr sa, da; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); + struct lowpan_addr_info info; + void *daddr, *saddr; + + memcpy(&info, lowpan_skb_priv(skb), sizeof(info)); + + /* TODO: Currently we only support extended_addr */ + daddr = &info.daddr.u.extended_addr; + saddr = &info.saddr.u.extended_addr; + + lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); + + cb->type = IEEE802154_FC_TYPE_DATA; + + /* prepare wpan address data */ + sa.mode = IEEE802154_ADDR_LONG; + sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + sa.extended_addr = ieee802154_devaddr_from_raw(saddr); + + /* intra-PAN communications */ + da.pan_id = sa.pan_id; + + /* if the destination address is the broadcast address, use the + * corresponding short address + */ + if (lowpan_is_addr_broadcast((const u8 *)daddr)) { + da.mode = IEEE802154_ADDR_SHORT; + da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + cb->ackreq = false; + } else { + da.mode = IEEE802154_ADDR_LONG; + da.extended_addr = ieee802154_devaddr_from_raw(daddr); + cb->ackreq = true; + } + + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + ETH_P_IPV6, (void *)&da, (void *)&sa, 0); +} + +netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee802154_hdr wpan_hdr; + int max_single, ret; + + pr_debug("package xmit\n"); + + /* We must take a copy of the skb before we modify/replace the ipv6 + * header as the header could be used elsewhere + */ + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + + ret = lowpan_header(skb, dev); + if (ret < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + max_single = ieee802154_max_payload(&wpan_hdr); + + if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { + skb->dev = lowpan_dev_info(dev)->real_dev; + return dev_queue_xmit(skb); + } else { + netdev_tx_t rc; + + pr_debug("frame is too big, fragmentation is needed\n"); + rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); + + return rc < 0 ? NET_XMIT_DROP : rc; + } +} diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c deleted file mode 100644 index 27eaa65e88e1..000000000000 --- a/net/ieee802154/6lowpan_rtnl.c +++ /dev/null @@ -1,729 +0,0 @@ -/* Copyright 2011, Siemens AG - * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> - */ - -/* Based on patches from Jon Smirl <jonsmirl@gmail.com> - * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -/* Jon's code is based on 6lowpan implementation for Contiki which is: - * Copyright (c) 2008, Swedish Institute of Computer Science. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Institute nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <linux/bitops.h> -#include <linux/if_arp.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/netdevice.h> -#include <linux/ieee802154.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> -#include <net/6lowpan.h> -#include <net/ipv6.h> - -#include "reassembly.h" - -static LIST_HEAD(lowpan_devices); -static int lowpan_open_count; - -/* private device info */ -struct lowpan_dev_info { - struct net_device *real_dev; /* real WPAN device ptr */ - struct mutex dev_list_mtx; /* mutex for list ops */ - u16 fragment_tag; -}; - -struct lowpan_dev_record { - struct net_device *ldev; - struct list_head list; -}; - -/* don't save pan id, it's intra pan */ -struct lowpan_addr { - u8 mode; - union { - /* IPv6 needs big endian here */ - __be64 extended_addr; - __be16 short_addr; - } u; -}; - -struct lowpan_addr_info { - struct lowpan_addr daddr; - struct lowpan_addr saddr; -}; - -static inline struct -lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) -{ - return netdev_priv(dev); -} - -static inline struct -lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb) -{ - WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info)); - return (struct lowpan_addr_info *)(skb->data - - sizeof(struct lowpan_addr_info)); -} - -static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) -{ - const u8 *saddr = _saddr; - const u8 *daddr = _daddr; - struct lowpan_addr_info *info; - - /* TODO: - * if this package isn't ipv6 one, where should it be routed? - */ - if (type != ETH_P_IPV6) - return 0; - - if (!saddr) - saddr = dev->dev_addr; - - raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); - raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); - - info = lowpan_skb_priv(skb); - - /* TODO: Currently we only support extended_addr */ - info->daddr.mode = IEEE802154_ADDR_LONG; - memcpy(&info->daddr.u.extended_addr, daddr, - sizeof(info->daddr.u.extended_addr)); - info->saddr.mode = IEEE802154_ADDR_LONG; - memcpy(&info->saddr.u.extended_addr, saddr, - sizeof(info->daddr.u.extended_addr)); - - return 0; -} - -static int lowpan_give_skb_to_devices(struct sk_buff *skb, - struct net_device *dev) -{ - struct lowpan_dev_record *entry; - struct sk_buff *skb_cp; - int stat = NET_RX_SUCCESS; - - skb->protocol = htons(ETH_P_IPV6); - skb->pkt_type = PACKET_HOST; - - rcu_read_lock(); - list_for_each_entry_rcu(entry, &lowpan_devices, list) - if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { - skb_cp = skb_copy(skb, GFP_ATOMIC); - if (!skb_cp) { - kfree_skb(skb); - rcu_read_unlock(); - return NET_RX_DROP; - } - - skb_cp->dev = entry->ldev; - stat = netif_rx(skb_cp); - if (stat == NET_RX_DROP) - break; - } - rcu_read_unlock(); - - consume_skb(skb); - - return stat; -} - -static int -iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) -{ - u8 iphc0, iphc1; - struct ieee802154_addr_sa sa, da; - void *sap, *dap; - - raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); - /* at least two bytes will be used for the encoding */ - if (skb->len < 2) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc0)) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc1)) - return -EINVAL; - - ieee802154_addr_to_sa(&sa, &hdr->source); - ieee802154_addr_to_sa(&da, &hdr->dest); - - if (sa.addr_type == IEEE802154_ADDR_SHORT) - sap = &sa.short_addr; - else - sap = &sa.hwaddr; - - if (da.addr_type == IEEE802154_ADDR_SHORT) - dap = &da.short_addr; - else - dap = &da.hwaddr; - - return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, - IEEE802154_ADDR_LEN, dap, da.addr_type, - IEEE802154_ADDR_LEN, iphc0, iphc1); -} - -static struct sk_buff* -lowpan_alloc_frag(struct sk_buff *skb, int size, - const struct ieee802154_hdr *master_hdr) -{ - struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; - struct sk_buff *frag; - int rc; - - frag = alloc_skb(real_dev->hard_header_len + - real_dev->needed_tailroom + size, - GFP_ATOMIC); - - if (likely(frag)) { - frag->dev = real_dev; - frag->priority = skb->priority; - skb_reserve(frag, real_dev->hard_header_len); - skb_reset_network_header(frag); - *mac_cb(frag) = *mac_cb(skb); - - rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, - &master_hdr->source, size); - if (rc < 0) { - kfree_skb(frag); - return ERR_PTR(rc); - } - } else { - frag = ERR_PTR(-ENOMEM); - } - - return frag; -} - -static int -lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, - u8 *frag_hdr, int frag_hdrlen, - int offset, int len) -{ - struct sk_buff *frag; - - raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); - - frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); - if (IS_ERR(frag)) - return -PTR_ERR(frag); - - memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); - memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); - - raw_dump_table(__func__, " fragment dump", frag->data, frag->len); - - return dev_queue_xmit(frag); -} - -static int -lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, - const struct ieee802154_hdr *wpan_hdr) -{ - u16 dgram_size, dgram_offset; - __be16 frag_tag; - u8 frag_hdr[5]; - int frag_cap, frag_len, payload_cap, rc; - int skb_unprocessed, skb_offset; - - dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - - skb->mac_len; - frag_tag = htons(lowpan_dev_info(dev)->fragment_tag); - lowpan_dev_info(dev)->fragment_tag++; - - frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); - frag_hdr[1] = dgram_size & 0xff; - memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); - - payload_cap = ieee802154_max_payload(wpan_hdr); - - frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - - skb_network_header_len(skb), 8); - - skb_offset = skb_network_header_len(skb); - skb_unprocessed = skb->len - skb->mac_len - skb_offset; - - rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, - LOWPAN_FRAG1_HEAD_SIZE, 0, - frag_len + skb_network_header_len(skb)); - if (rc) { - pr_debug("%s unable to send FRAG1 packet (tag: %d)", - __func__, ntohs(frag_tag)); - goto err; - } - - frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; - frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; - frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); - - do { - dgram_offset += frag_len; - skb_offset += frag_len; - skb_unprocessed -= frag_len; - frag_len = min(frag_cap, skb_unprocessed); - - frag_hdr[4] = dgram_offset >> 3; - - rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, - LOWPAN_FRAGN_HEAD_SIZE, skb_offset, - frag_len); - if (rc) { - pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", - __func__, ntohs(frag_tag), skb_offset); - goto err; - } - } while (skb_unprocessed > frag_cap); - - consume_skb(skb); - return NET_XMIT_SUCCESS; - -err: - kfree_skb(skb); - return rc; -} - -static int lowpan_header(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee802154_addr sa, da; - struct ieee802154_mac_cb *cb = mac_cb_init(skb); - struct lowpan_addr_info info; - void *daddr, *saddr; - - memcpy(&info, lowpan_skb_priv(skb), sizeof(info)); - - /* TODO: Currently we only support extended_addr */ - daddr = &info.daddr.u.extended_addr; - saddr = &info.saddr.u.extended_addr; - - lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); - - cb->type = IEEE802154_FC_TYPE_DATA; - - /* prepare wpan address data */ - sa.mode = IEEE802154_ADDR_LONG; - sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - sa.extended_addr = ieee802154_devaddr_from_raw(saddr); - - /* intra-PAN communications */ - da.pan_id = sa.pan_id; - - /* if the destination address is the broadcast address, use the - * corresponding short address - */ - if (lowpan_is_addr_broadcast((const u8 *)daddr)) { - da.mode = IEEE802154_ADDR_SHORT; - da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); - cb->ackreq = false; - } else { - da.mode = IEEE802154_ADDR_LONG; - da.extended_addr = ieee802154_devaddr_from_raw(daddr); - cb->ackreq = true; - } - - return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, - ETH_P_IPV6, (void *)&da, (void *)&sa, 0); -} - -static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee802154_hdr wpan_hdr; - int max_single, ret; - - pr_debug("package xmit\n"); - - /* We must take a copy of the skb before we modify/replace the ipv6 - * header as the header could be used elsewhere - */ - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_XMIT_DROP; - - ret = lowpan_header(skb, dev); - if (ret < 0) { - kfree_skb(skb); - return NET_XMIT_DROP; - } - - if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { - kfree_skb(skb); - return NET_XMIT_DROP; - } - - max_single = ieee802154_max_payload(&wpan_hdr); - - if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { - skb->dev = lowpan_dev_info(dev)->real_dev; - return dev_queue_xmit(skb); - } else { - netdev_tx_t rc; - - pr_debug("frame is too big, fragmentation is needed\n"); - rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); - - return rc < 0 ? NET_XMIT_DROP : rc; - } -} - -static __le16 lowpan_get_pan_id(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); -} - -static __le16 lowpan_get_short_addr(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); -} - -static u8 lowpan_get_dsn(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); -} - -static struct header_ops lowpan_header_ops = { - .create = lowpan_header_create, -}; - -static struct lock_class_key lowpan_tx_busylock; -static struct lock_class_key lowpan_netdev_xmit_lock_key; - -static void lowpan_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, - &lowpan_netdev_xmit_lock_key); -} - -static int lowpan_dev_init(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); - dev->qdisc_tx_busylock = &lowpan_tx_busylock; - return 0; -} - -static const struct net_device_ops lowpan_netdev_ops = { - .ndo_init = lowpan_dev_init, - .ndo_start_xmit = lowpan_xmit, -}; - -static struct ieee802154_mlme_ops lowpan_mlme = { - .get_pan_id = lowpan_get_pan_id, - .get_short_addr = lowpan_get_short_addr, - .get_dsn = lowpan_get_dsn, -}; - -static void lowpan_setup(struct net_device *dev) -{ - dev->addr_len = IEEE802154_ADDR_LEN; - memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); - dev->type = ARPHRD_IEEE802154; - /* Frame Control + Sequence Number + Address fields + Security Header */ - dev->hard_header_len = 2 + 1 + 20 + 14; - dev->needed_tailroom = 2; /* FCS */ - dev->mtu = IPV6_MIN_MTU; - dev->tx_queue_len = 0; - dev->flags = IFF_BROADCAST | IFF_MULTICAST; - dev->watchdog_timeo = 0; - - dev->netdev_ops = &lowpan_netdev_ops; - dev->header_ops = &lowpan_header_ops; - dev->ml_priv = &lowpan_mlme; - dev->destructor = free_netdev; -} - -static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) -{ - if (tb[IFLA_ADDRESS]) { - if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) - return -EINVAL; - } - return 0; -} - -static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct ieee802154_hdr hdr; - int ret; - - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - goto drop; - - if (!netif_running(dev)) - goto drop_skb; - - if (skb->pkt_type == PACKET_OTHERHOST) - goto drop_skb; - - if (dev->type != ARPHRD_IEEE802154) - goto drop_skb; - - if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) - goto drop_skb; - - /* check that it's our buffer */ - if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { - /* Pull off the 1-byte of 6lowpan header. */ - skb_pull(skb, 1); - return lowpan_give_skb_to_devices(skb, NULL); - } else { - switch (skb->data[0] & 0xe0) { - case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - default: - break; - } - } - -drop_skb: - kfree_skb(skb); -drop: - return NET_RX_DROP; -} - -static struct packet_type lowpan_packet_type = { - .type = htons(ETH_P_IEEE802154), - .func = lowpan_rcv, -}; - -static int lowpan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_device *real_dev; - struct lowpan_dev_record *entry; - int ret; - - ASSERT_RTNL(); - - pr_debug("adding new link\n"); - - if (!tb[IFLA_LINK]) - return -EINVAL; - /* find and hold real wpan device */ - real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) - return -ENODEV; - if (real_dev->type != ARPHRD_IEEE802154) { - dev_put(real_dev); - return -EINVAL; - } - - lowpan_dev_info(dev)->real_dev = real_dev; - mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - dev_put(real_dev); - lowpan_dev_info(dev)->real_dev = NULL; - return -ENOMEM; - } - - entry->ldev = dev; - - /* Set the lowpan hardware address to the wpan hardware address. */ - memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); - - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - INIT_LIST_HEAD(&entry->list); - list_add_tail(&entry->list, &lowpan_devices); - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - - ret = register_netdevice(dev); - if (ret >= 0) { - if (!lowpan_open_count) - dev_add_pack(&lowpan_packet_type); - lowpan_open_count++; - } - - return ret; -} - -static void lowpan_dellink(struct net_device *dev, struct list_head *head) -{ - struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); - struct net_device *real_dev = lowpan_dev->real_dev; - struct lowpan_dev_record *entry, *tmp; - - ASSERT_RTNL(); - - lowpan_open_count--; - if (!lowpan_open_count) - dev_remove_pack(&lowpan_packet_type); - - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (entry->ldev == dev) { - list_del(&entry->list); - kfree(entry); - } - } - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - - mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); - - unregister_netdevice_queue(dev, head); - - dev_put(real_dev); -} - -static struct rtnl_link_ops lowpan_link_ops __read_mostly = { - .kind = "lowpan", - .priv_size = sizeof(struct lowpan_dev_info), - .setup = lowpan_setup, - .newlink = lowpan_newlink, - .dellink = lowpan_dellink, - .validate = lowpan_validate, -}; - -static inline int __init lowpan_netlink_init(void) -{ - return rtnl_link_register(&lowpan_link_ops); -} - -static inline void lowpan_netlink_fini(void) -{ - rtnl_link_unregister(&lowpan_link_ops); -} - -static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - LIST_HEAD(del_list); - struct lowpan_dev_record *entry, *tmp; - - if (dev->type != ARPHRD_IEEE802154) - goto out; - - if (event == NETDEV_UNREGISTER) { - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (lowpan_dev_info(entry->ldev)->real_dev == dev) - lowpan_dellink(entry->ldev, &del_list); - } - - unregister_netdevice_many(&del_list); - } - -out: - return NOTIFY_DONE; -} - -static struct notifier_block lowpan_dev_notifier = { - .notifier_call = lowpan_device_event, -}; - -static int __init lowpan_init_module(void) -{ - int err = 0; - - err = lowpan_net_frag_init(); - if (err < 0) - goto out; - - err = lowpan_netlink_init(); - if (err < 0) - goto out_frag; - - err = register_netdevice_notifier(&lowpan_dev_notifier); - if (err < 0) - goto out_pack; - - return 0; - -out_pack: - lowpan_netlink_fini(); -out_frag: - lowpan_net_frag_exit(); -out: - return err; -} - -static void __exit lowpan_cleanup_module(void) -{ - lowpan_netlink_fini(); - - lowpan_net_frag_exit(); - - unregister_netdevice_notifier(&lowpan_dev_notifier); -} - -module_init(lowpan_init_module); -module_exit(lowpan_cleanup_module); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index c0d4154d144f..1370d5b0041b 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -1,4 +1,4 @@ -config IEEE802154 +menuconfig IEEE802154 tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support" ---help--- IEEE Std 802.15.4 defines a low data rate, low power and low @@ -10,8 +10,16 @@ config IEEE802154 Say Y here to compile LR-WPAN support into the kernel or say M to compile it as modules. -config IEEE802154_6LOWPAN - tristate "6lowpan support over IEEE 802.15.4" - depends on IEEE802154 && 6LOWPAN +if IEEE802154 + +config IEEE802154_SOCKET + tristate "IEEE 802.15.4 socket interface" + default y ---help--- - IPv6 compression over IEEE 802.15.4. + Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface + for 802.15.4 dataframes. Also RAW socket interface to build MAC + header from userspace. + +source "net/ieee802154/6lowpan/Kconfig" + +endif diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 9f6970f2a28b..05dab2957cd4 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,9 +1,9 @@ -obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o +obj-$(CONFIG_IEEE802154) += ieee802154.o +obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o +obj-y += 6lowpan/ -ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ header_ops.o sysfs.o nl802154.o -af_802154-y := af_ieee802154.o raw.o dgram.o +ieee802154_socket-y := socket.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h deleted file mode 100644 index 343b63e6f953..000000000000 --- a/net/ieee802154/af802154.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Internal interfaces for ieee 802.15.4 address family. - * - * Copyright 2007, 2008, 2009 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#ifndef AF802154_H -#define AF802154_H - -struct sk_buff; -struct net_device; -struct ieee802154_addr; -extern struct proto ieee802154_raw_prot; -extern struct proto ieee802154_dgram_prot; -void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb); -int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb); -struct net_device *ieee802154_get_dev(struct net *net, - const struct ieee802154_addr *addr); - -#endif diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c deleted file mode 100644 index d0a1282cdf43..000000000000 --- a/net/ieee802154/af_ieee802154.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - * IEEE802154.4 socket interface - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Maxim Gorbachyov <maxim.gorbachev@siemens.com> - */ - -#include <linux/net.h> -#include <linux/capability.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/if.h> -#include <linux/termios.h> /* For TIOCOUTQ/INQ */ -#include <linux/list.h> -#include <linux/slab.h> -#include <net/datalink.h> -#include <net/psnap.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <net/route.h> - -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include "af802154.h" - -/* Utility function for families */ -struct net_device* -ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) -{ - struct net_device *dev = NULL; - struct net_device *tmp; - __le16 pan_id, short_addr; - u8 hwaddr[IEEE802154_ADDR_LEN]; - - switch (addr->mode) { - case IEEE802154_ADDR_LONG: - ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); - rcu_read_lock(); - dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - break; - case IEEE802154_ADDR_SHORT: - if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || - addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || - addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) - break; - - rtnl_lock(); - - for_each_netdev(net, tmp) { - if (tmp->type != ARPHRD_IEEE802154) - continue; - - pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); - short_addr = - ieee802154_mlme_ops(tmp)->get_short_addr(tmp); - - if (pan_id == addr->pan_id && - short_addr == addr->short_addr) { - dev = tmp; - dev_hold(dev); - break; - } - } - - rtnl_unlock(); - break; - default: - pr_warn("Unsupported ieee802154 address type: %d\n", - addr->mode); - break; - } - - return dev; -} - -static int ieee802154_sock_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (sk) { - sock->sk = NULL; - sk->sk_prot->close(sk, 0); - } - return 0; -} - -static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - - return sk->sk_prot->sendmsg(iocb, sk, msg, len); -} - -static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, - int addr_len) -{ - struct sock *sk = sock->sk; - - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); - - return sock_no_bind(sock, uaddr, addr_len); -} - -static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) -{ - struct sock *sk = sock->sk; - - if (addr_len < sizeof(uaddr->sa_family)) - return -EINVAL; - - if (uaddr->sa_family == AF_UNSPEC) - return sk->sk_prot->disconnect(sk, flags); - - return sk->sk_prot->connect(sk, uaddr, addr_len); -} - -static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, - unsigned int cmd) -{ - struct ifreq ifr; - int ret = -ENOIOCTLCMD; - struct net_device *dev; - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; - - dev_load(sock_net(sk), ifr.ifr_name); - dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); - - if (!dev) - return -ENODEV; - - if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) - ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); - - if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) - ret = -EFAULT; - dev_put(dev); - - return ret; -} - -static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) -{ - struct sock *sk = sock->sk; - - switch (cmd) { - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *)arg); - case SIOCGIFADDR: - case SIOCSIFADDR: - return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, - cmd); - default: - if (!sk->sk_prot->ioctl) - return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); - } -} - -static const struct proto_ops ieee802154_raw_ops = { - .family = PF_IEEE802154, - .owner = THIS_MODULE, - .release = ieee802154_sock_release, - .bind = ieee802154_sock_bind, - .connect = ieee802154_sock_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = datagram_poll, - .ioctl = ieee802154_sock_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = ieee802154_sock_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -static const struct proto_ops ieee802154_dgram_ops = { - .family = PF_IEEE802154, - .owner = THIS_MODULE, - .release = ieee802154_sock_release, - .bind = ieee802154_sock_bind, - .connect = ieee802154_sock_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = datagram_poll, - .ioctl = ieee802154_sock_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = ieee802154_sock_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -/* Create a socket. Initialise the socket, blank the addresses - * set the state. - */ -static int ieee802154_create(struct net *net, struct socket *sock, - int protocol, int kern) -{ - struct sock *sk; - int rc; - struct proto *proto; - const struct proto_ops *ops; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - switch (sock->type) { - case SOCK_RAW: - proto = &ieee802154_raw_prot; - ops = &ieee802154_raw_ops; - break; - case SOCK_DGRAM: - proto = &ieee802154_dgram_prot; - ops = &ieee802154_dgram_ops; - break; - default: - rc = -ESOCKTNOSUPPORT; - goto out; - } - - rc = -ENOMEM; - sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); - if (!sk) - goto out; - rc = 0; - - sock->ops = ops; - - sock_init_data(sock, sk); - /* FIXME: sk->sk_destruct */ - sk->sk_family = PF_IEEE802154; - - /* Checksums on by default */ - sock_set_flag(sk, SOCK_ZAPPED); - - if (sk->sk_prot->hash) - sk->sk_prot->hash(sk); - - if (sk->sk_prot->init) { - rc = sk->sk_prot->init(sk); - if (rc) - sk_common_release(sk); - } -out: - return rc; -} - -static const struct net_proto_family ieee802154_family_ops = { - .family = PF_IEEE802154, - .create = ieee802154_create, - .owner = THIS_MODULE, -}; - -static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - if (!netif_running(dev)) - goto drop; - pr_debug("got frame, type %d, dev %p\n", dev->type, dev); -#ifdef DEBUG - print_hex_dump_bytes("ieee802154_rcv ", - DUMP_PREFIX_NONE, skb->data, skb->len); -#endif - - if (!net_eq(dev_net(dev), &init_net)) - goto drop; - - ieee802154_raw_deliver(dev, skb); - - if (dev->type != ARPHRD_IEEE802154) - goto drop; - - if (skb->pkt_type != PACKET_OTHERHOST) - return ieee802154_dgram_deliver(dev, skb); - -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -static struct packet_type ieee802154_packet_type = { - .type = htons(ETH_P_IEEE802154), - .func = ieee802154_rcv, -}; - -static int __init af_ieee802154_init(void) -{ - int rc = -EINVAL; - - rc = proto_register(&ieee802154_raw_prot, 1); - if (rc) - goto out; - - rc = proto_register(&ieee802154_dgram_prot, 1); - if (rc) - goto err_dgram; - - /* Tell SOCKET that we are alive */ - rc = sock_register(&ieee802154_family_ops); - if (rc) - goto err_sock; - dev_add_pack(&ieee802154_packet_type); - - rc = 0; - goto out; - -err_sock: - proto_unregister(&ieee802154_dgram_prot); -err_dgram: - proto_unregister(&ieee802154_raw_prot); -out: - return rc; -} - -static void __exit af_ieee802154_remove(void) -{ - dev_remove_pack(&ieee802154_packet_type); - sock_unregister(PF_IEEE802154); - proto_unregister(&ieee802154_dgram_prot); - proto_unregister(&ieee802154_raw_prot); -} - -module_init(af_ieee802154_init); -module_exit(af_ieee802154_remove); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_IEEE802154); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c deleted file mode 100644 index d1930b70c4aa..000000000000 --- a/net/ieee802154/dgram.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * IEEE 802.15.4 dgram socket interface - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#include <linux/capability.h> -#include <linux/net.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/ieee802154.h> -#include <net/sock.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include <asm/ioctls.h> - -#include "af802154.h" - -static HLIST_HEAD(dgram_head); -static DEFINE_RWLOCK(dgram_lock); - -struct dgram_sock { - struct sock sk; - - struct ieee802154_addr src_addr; - struct ieee802154_addr dst_addr; - - unsigned int bound:1; - unsigned int connected:1; - unsigned int want_ack:1; - unsigned int secen:1; - unsigned int secen_override:1; - unsigned int seclevel:3; - unsigned int seclevel_override:1; -}; - -static inline struct dgram_sock *dgram_sk(const struct sock *sk) -{ - return container_of(sk, struct dgram_sock, sk); -} - -static void dgram_hash(struct sock *sk) -{ - write_lock_bh(&dgram_lock); - sk_add_node(sk, &dgram_head); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock_bh(&dgram_lock); -} - -static void dgram_unhash(struct sock *sk) -{ - write_lock_bh(&dgram_lock); - if (sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&dgram_lock); -} - -static int dgram_init(struct sock *sk) -{ - struct dgram_sock *ro = dgram_sk(sk); - - ro->want_ack = 1; - return 0; -} - -static void dgram_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - -static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) -{ - struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; - struct ieee802154_addr haddr; - struct dgram_sock *ro = dgram_sk(sk); - int err = -EINVAL; - struct net_device *dev; - - lock_sock(sk); - - ro->bound = 0; - - if (len < sizeof(*addr)) - goto out; - - if (addr->family != AF_IEEE802154) - goto out; - - ieee802154_addr_from_sa(&haddr, &addr->addr); - dev = ieee802154_get_dev(sock_net(sk), &haddr); - if (!dev) { - err = -ENODEV; - goto out; - } - - if (dev->type != ARPHRD_IEEE802154) { - err = -ENODEV; - goto out_put; - } - - ro->src_addr = haddr; - - ro->bound = 1; - err = 0; -out_put: - dev_put(dev); -out: - release_sock(sk); - - return err; -} - -static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) -{ - switch (cmd) { - case SIOCOUTQ: - { - int amount = sk_wmem_alloc_get(sk); - - return put_user(amount, (int __user *)arg); - } - - case SIOCINQ: - { - struct sk_buff *skb; - unsigned long amount; - - amount = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) { - /* We will only return the amount - * of this packet since that is all - * that will be read. - */ - amount = skb->len - ieee802154_hdr_length(skb); - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); - } - } - - return -ENOIOCTLCMD; -} - -/* FIXME: autobind */ -static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, - int len) -{ - struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; - struct dgram_sock *ro = dgram_sk(sk); - int err = 0; - - if (len < sizeof(*addr)) - return -EINVAL; - - if (addr->family != AF_IEEE802154) - return -EINVAL; - - lock_sock(sk); - - if (!ro->bound) { - err = -ENETUNREACH; - goto out; - } - - ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); - ro->connected = 1; - -out: - release_sock(sk); - return err; -} - -static int dgram_disconnect(struct sock *sk, int flags) -{ - struct dgram_sock *ro = dgram_sk(sk); - - lock_sock(sk); - ro->connected = 0; - release_sock(sk); - - return 0; -} - -static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) -{ - struct net_device *dev; - unsigned int mtu; - struct sk_buff *skb; - struct ieee802154_mac_cb *cb; - struct dgram_sock *ro = dgram_sk(sk); - struct ieee802154_addr dst_addr; - int hlen, tlen; - int err; - - if (msg->msg_flags & MSG_OOB) { - pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); - return -EOPNOTSUPP; - } - - if (!ro->connected && !msg->msg_name) - return -EDESTADDRREQ; - else if (ro->connected && msg->msg_name) - return -EISCONN; - - if (!ro->bound) - dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); - else - dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); - - if (!dev) { - pr_debug("no dev\n"); - err = -ENXIO; - goto out; - } - mtu = dev->mtu; - pr_debug("name = %s, mtu = %u\n", dev->name, mtu); - - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EMSGSIZE; - goto out_dev; - } - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, - &err); - if (!skb) - goto out_dev; - - skb_reserve(skb, hlen); - - skb_reset_network_header(skb); - - cb = mac_cb_init(skb); - cb->type = IEEE802154_FC_TYPE_DATA; - cb->ackreq = ro->want_ack; - - if (msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_ieee802154*, - daddr, msg->msg_name); - - ieee802154_addr_from_sa(&dst_addr, &daddr->addr); - } else { - dst_addr = ro->dst_addr; - } - - cb->secen = ro->secen; - cb->secen_override = ro->secen_override; - cb->seclevel = ro->seclevel; - cb->seclevel_override = ro->seclevel_override; - - err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, - ro->bound ? &ro->src_addr : NULL, size); - if (err < 0) - goto out_skb; - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err < 0) - goto out_skb; - - skb->dev = dev; - skb->sk = sk; - skb->protocol = htons(ETH_P_IEEE802154); - - dev_put(dev); - - err = dev_queue_xmit(skb); - if (err > 0) - err = net_xmit_errno(err); - - return err ?: size; - -out_skb: - kfree_skb(skb); -out_dev: - dev_put(dev); -out: - return err; -} - -static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) -{ - size_t copied = 0; - int err = -EOPNOTSUPP; - struct sk_buff *skb; - DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); - - skb = skb_recv_datagram(sk, flags, noblock, &err); - if (!skb) - goto out; - - copied = skb->len; - if (len < copied) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - - /* FIXME: skip headers if necessary ?! */ - err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto done; - - sock_recv_ts_and_drops(msg, sk, skb); - - if (saddr) { - saddr->family = AF_IEEE802154; - ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); - *addr_len = sizeof(*saddr); - } - - if (flags & MSG_TRUNC) - copied = skb->len; -done: - skb_free_datagram(sk, skb); -out: - if (err) - return err; - return copied; -} - -static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); - return NET_RX_DROP; - } - - return NET_RX_SUCCESS; -} - -static inline bool -ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, - struct dgram_sock *ro) -{ - if (!ro->bound) - return true; - - if (ro->src_addr.mode == IEEE802154_ADDR_LONG && - hw_addr == ro->src_addr.extended_addr) - return true; - - if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && - pan_id == ro->src_addr.pan_id && - short_addr == ro->src_addr.short_addr) - return true; - - return false; -} - -int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) -{ - struct sock *sk, *prev = NULL; - int ret = NET_RX_SUCCESS; - __le16 pan_id, short_addr; - __le64 hw_addr; - - /* Data frame processing */ - BUG_ON(dev->type != ARPHRD_IEEE802154); - - pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); - hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); - - read_lock(&dgram_lock); - sk_for_each(sk, &dgram_head) { - if (ieee802154_match_sock(hw_addr, pan_id, short_addr, - dgram_sk(sk))) { - if (prev) { - struct sk_buff *clone; - - clone = skb_clone(skb, GFP_ATOMIC); - if (clone) - dgram_rcv_skb(prev, clone); - } - - prev = sk; - } - } - - if (prev) { - dgram_rcv_skb(prev, skb); - } else { - kfree_skb(skb); - ret = NET_RX_DROP; - } - read_unlock(&dgram_lock); - - return ret; -} - -static int dgram_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct dgram_sock *ro = dgram_sk(sk); - - int val, len; - - if (level != SOL_IEEE802154) - return -EOPNOTSUPP; - - if (get_user(len, optlen)) - return -EFAULT; - - len = min_t(unsigned int, len, sizeof(int)); - - switch (optname) { - case WPAN_WANTACK: - val = ro->want_ack; - break; - case WPAN_SECURITY: - if (!ro->secen_override) - val = WPAN_SECURITY_DEFAULT; - else if (ro->secen) - val = WPAN_SECURITY_ON; - else - val = WPAN_SECURITY_OFF; - break; - case WPAN_SECURITY_LEVEL: - if (!ro->seclevel_override) - val = WPAN_SECURITY_LEVEL_DEFAULT; - else - val = ro->seclevel; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &val, len)) - return -EFAULT; - return 0; -} - -static int dgram_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct dgram_sock *ro = dgram_sk(sk); - struct net *net = sock_net(sk); - int val; - int err = 0; - - if (optlen < sizeof(int)) - return -EINVAL; - - if (get_user(val, (int __user *)optval)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case WPAN_WANTACK: - ro->want_ack = !!val; - break; - case WPAN_SECURITY: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && - !ns_capable(net->user_ns, CAP_NET_RAW)) { - err = -EPERM; - break; - } - - switch (val) { - case WPAN_SECURITY_DEFAULT: - ro->secen_override = 0; - break; - case WPAN_SECURITY_ON: - ro->secen_override = 1; - ro->secen = 1; - break; - case WPAN_SECURITY_OFF: - ro->secen_override = 1; - ro->secen = 0; - break; - default: - err = -EINVAL; - break; - } - break; - case WPAN_SECURITY_LEVEL: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && - !ns_capable(net->user_ns, CAP_NET_RAW)) { - err = -EPERM; - break; - } - - if (val < WPAN_SECURITY_LEVEL_DEFAULT || - val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { - err = -EINVAL; - } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { - ro->seclevel_override = 0; - } else { - ro->seclevel_override = 1; - ro->seclevel = val; - } - break; - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -struct proto ieee802154_dgram_prot = { - .name = "IEEE-802.15.4-MAC", - .owner = THIS_MODULE, - .obj_size = sizeof(struct dgram_sock), - .init = dgram_init, - .close = dgram_close, - .bind = dgram_bind, - .sendmsg = dgram_sendmsg, - .recvmsg = dgram_recvmsg, - .hash = dgram_hash, - .unhash = dgram_unhash, - .connect = dgram_connect, - .disconnect = dgram_disconnect, - .ioctl = dgram_ioctl, - .getsockopt = dgram_getsockopt, - .setsockopt = dgram_setsockopt, -}; - diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index fa1464762d0d..c8133c07ceee 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -63,13 +63,9 @@ int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) struct nlmsghdr *nlh = nlmsg_hdr(msg); void *hdr = genlmsg_data(nlmsg_data(nlh)); - if (genlmsg_end(msg, hdr) < 0) - goto out; + genlmsg_end(msg, hdr); return genlmsg_multicast(&nl802154_family, msg, 0, group, GFP_ATOMIC); -out: - nlmsg_free(msg); - return -ENOBUFS; } struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, @@ -96,13 +92,9 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) struct nlmsghdr *nlh = nlmsg_hdr(msg); void *hdr = genlmsg_data(nlmsg_data(nlh)); - if (genlmsg_end(msg, hdr) < 0) - goto out; + genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); -out: - nlmsg_free(msg); - return -ENOBUFS; } static const struct genl_ops ieee8021154_ops[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index cd919493c976..9105265920fe 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -121,7 +121,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, params.transmit_power) || nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, - params.cca_mode) || + params.cca.mode) || nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, params.cca_ed_level) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, @@ -136,7 +136,8 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, } wpan_phy_put(phy); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: wpan_phy_put(phy); @@ -516,7 +517,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); if (info->attrs[IEEE802154_ATTR_CCA_MODE]) - params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); + params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 7baf98b14611..1b9d25f6e898 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -65,7 +65,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, goto nla_put_failure; mutex_unlock(&phy->pib_lock); kfree(buf); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: mutex_unlock(&phy->pib_lock); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 889647744697..a4daf91b8d0a 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -209,7 +209,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_TX_POWER] = { .type = NLA_S8, }, - [NL802154_ATTR_CCA_MODE] = { .type = NLA_U8, }, + [NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, }, + [NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, }, [NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, }, @@ -290,16 +291,23 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, goto nla_put_failure; /* cca mode */ - if (nla_put_u8(msg, NL802154_ATTR_CCA_MODE, - rdev->wpan_phy.cca_mode)) + if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE, + rdev->wpan_phy.cca.mode)) goto nla_put_failure; + if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) { + if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT, + rdev->wpan_phy.cca.opt)) + goto nla_put_failure; + } + if (nla_put_s8(msg, NL802154_ATTR_TX_POWER, rdev->wpan_phy.transmit_power)) goto nla_put_failure; finish: - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -482,7 +490,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -622,6 +631,31 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info) return rdev_set_channel(rdev, page, channel); } +static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct wpan_phy_cca cca; + + if (!info->attrs[NL802154_ATTR_CCA_MODE]) + return -EINVAL; + + cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]); + /* checking 802.15.4 constraints */ + if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX) + return -EINVAL; + + if (cca.mode == NL802154_CCA_ENERGY_CARRIER) { + if (!info->attrs[NL802154_ATTR_CCA_OPT]) + return -EINVAL; + + cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]); + if (cca.opt > NL802154_CCA_OPT_ATTR_MAX) + return -EINVAL; + } + + return rdev_set_cca_mode(rdev, &cca); +} + static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; @@ -895,6 +929,14 @@ static const struct genl_ops nl802154_ops[] = { NL802154_FLAG_NEED_RTNL, }, { + .cmd = NL802154_CMD_SET_CCA_MODE, + .doit = nl802154_set_cca_mode, + .policy = nl802154_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | + NL802154_FLAG_NEED_RTNL, + }, + { .cmd = NL802154_CMD_SET_PAN_ID, .doit = nl802154_set_pan_id, .policy = nl802154_policy, diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c deleted file mode 100644 index 1674b115c891..000000000000 --- a/net/ieee802154/raw.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Raw IEEE 802.15.4 sockets - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#include <linux/net.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <net/sock.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include "af802154.h" - -static HLIST_HEAD(raw_head); -static DEFINE_RWLOCK(raw_lock); - -static void raw_hash(struct sock *sk) -{ - write_lock_bh(&raw_lock); - sk_add_node(sk, &raw_head); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock_bh(&raw_lock); -} - -static void raw_unhash(struct sock *sk) -{ - write_lock_bh(&raw_lock); - if (sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&raw_lock); -} - -static void raw_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - -static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) -{ - struct ieee802154_addr addr; - struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; - int err = 0; - struct net_device *dev = NULL; - - if (len < sizeof(*uaddr)) - return -EINVAL; - - uaddr = (struct sockaddr_ieee802154 *)_uaddr; - if (uaddr->family != AF_IEEE802154) - return -EINVAL; - - lock_sock(sk); - - ieee802154_addr_from_sa(&addr, &uaddr->addr); - dev = ieee802154_get_dev(sock_net(sk), &addr); - if (!dev) { - err = -ENODEV; - goto out; - } - - if (dev->type != ARPHRD_IEEE802154) { - err = -ENODEV; - goto out_put; - } - - sk->sk_bound_dev_if = dev->ifindex; - sk_dst_reset(sk); - -out_put: - dev_put(dev); -out: - release_sock(sk); - - return err; -} - -static int raw_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) -{ - return -ENOTSUPP; -} - -static int raw_disconnect(struct sock *sk, int flags) -{ - return 0; -} - -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) -{ - struct net_device *dev; - unsigned int mtu; - struct sk_buff *skb; - int hlen, tlen; - int err; - - if (msg->msg_flags & MSG_OOB) { - pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); - return -EOPNOTSUPP; - } - - lock_sock(sk); - if (!sk->sk_bound_dev_if) - dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); - else - dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); - release_sock(sk); - - if (!dev) { - pr_debug("no dev\n"); - err = -ENXIO; - goto out; - } - - mtu = dev->mtu; - pr_debug("name = %s, mtu = %u\n", dev->name, mtu); - - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EINVAL; - goto out_dev; - } - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - goto out_dev; - - skb_reserve(skb, hlen); - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err < 0) - goto out_skb; - - skb->dev = dev; - skb->sk = sk; - skb->protocol = htons(ETH_P_IEEE802154); - - dev_put(dev); - - err = dev_queue_xmit(skb); - if (err > 0) - err = net_xmit_errno(err); - - return err ?: size; - -out_skb: - kfree_skb(skb); -out_dev: - dev_put(dev); -out: - return err; -} - -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) -{ - size_t copied = 0; - int err = -EOPNOTSUPP; - struct sk_buff *skb; - - skb = skb_recv_datagram(sk, flags, noblock, &err); - if (!skb) - goto out; - - copied = skb->len; - if (len < copied) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - - err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto done; - - sock_recv_ts_and_drops(msg, sk, skb); - - if (flags & MSG_TRUNC) - copied = skb->len; -done: - skb_free_datagram(sk, skb); -out: - if (err) - return err; - return copied; -} - -static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); - return NET_RX_DROP; - } - - return NET_RX_SUCCESS; -} - -void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) -{ - struct sock *sk; - - read_lock(&raw_lock); - sk_for_each(sk, &raw_head) { - bh_lock_sock(sk); - if (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == dev->ifindex) { - struct sk_buff *clone; - - clone = skb_clone(skb, GFP_ATOMIC); - if (clone) - raw_rcv_skb(sk, clone); - } - bh_unlock_sock(sk); - } - read_unlock(&raw_lock); -} - -static int raw_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - return -EOPNOTSUPP; -} - -static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - return -EOPNOTSUPP; -} - -struct proto ieee802154_raw_prot = { - .name = "IEEE-802.15.4-RAW", - .owner = THIS_MODULE, - .obj_size = sizeof(struct sock), - .close = raw_close, - .bind = raw_bind, - .sendmsg = raw_sendmsg, - .recvmsg = raw_recvmsg, - .hash = raw_hash, - .unhash = raw_unhash, - .connect = raw_connect, - .disconnect = raw_disconnect, - .getsockopt = raw_getsockopt, - .setsockopt = raw_setsockopt, -}; diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index aff54fbd9264..7c46732fad2b 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -42,6 +42,13 @@ rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel) } static inline int +rdev_set_cca_mode(struct cfg802154_registered_device *rdev, + const struct wpan_phy_cca *cca) +{ + return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); +} + +static inline int rdev_set_pan_id(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 pan_id) { diff --git a/net/ieee802154/reassembly.h b/net/ieee802154/reassembly.h deleted file mode 100644 index 836b16fa001f..000000000000 --- a/net/ieee802154/reassembly.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__ -#define __IEEE802154_6LOWPAN_REASSEMBLY_H__ - -#include <net/inet_frag.h> - -struct lowpan_create_arg { - u16 tag; - u16 d_size; - const struct ieee802154_addr *src; - const struct ieee802154_addr *dst; -}; - -/* Equivalent of ipv4 struct ip - */ -struct lowpan_frag_queue { - struct inet_frag_queue q; - - u16 tag; - u16 d_size; - struct ieee802154_addr saddr; - struct ieee802154_addr daddr; -}; - -static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) -{ - switch (a->mode) { - case IEEE802154_ADDR_LONG: - return (((__force u64)a->extended_addr) >> 32) ^ - (((__force u64)a->extended_addr) & 0xffffffff); - case IEEE802154_ADDR_SHORT: - return (__force u32)(a->short_addr); - default: - return 0; - } -} - -int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); -void lowpan_net_frag_exit(void); -int lowpan_net_frag_init(void); - -#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */ diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c new file mode 100644 index 000000000000..2878d8ca6d3b --- /dev/null +++ b/net/ieee802154/socket.c @@ -0,0 +1,1125 @@ +/* + * IEEE802154.4 socket interface + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Sergey Lapin <slapin@ossfans.org> + * Maxim Gorbachyov <maxim.gorbachev@siemens.com> + */ + +#include <linux/net.h> +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/if_arp.h> +#include <linux/if.h> +#include <linux/termios.h> /* For TIOCOUTQ/INQ */ +#include <linux/list.h> +#include <linux/slab.h> +#include <net/datalink.h> +#include <net/psnap.h> +#include <net/sock.h> +#include <net/tcp_states.h> +#include <net/route.h> + +#include <net/af_ieee802154.h> +#include <net/ieee802154_netdev.h> + +/* Utility function for families */ +static struct net_device* +ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) +{ + struct net_device *dev = NULL; + struct net_device *tmp; + __le16 pan_id, short_addr; + u8 hwaddr[IEEE802154_ADDR_LEN]; + + switch (addr->mode) { + case IEEE802154_ADDR_LONG: + ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); + rcu_read_lock(); + dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + break; + case IEEE802154_ADDR_SHORT: + if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) + break; + + rtnl_lock(); + + for_each_netdev(net, tmp) { + if (tmp->type != ARPHRD_IEEE802154) + continue; + + pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); + short_addr = + ieee802154_mlme_ops(tmp)->get_short_addr(tmp); + + if (pan_id == addr->pan_id && + short_addr == addr->short_addr) { + dev = tmp; + dev_hold(dev); + break; + } + } + + rtnl_unlock(); + break; + default: + pr_warn("Unsupported ieee802154 address type: %d\n", + addr->mode); + break; + } + + return dev; +} + +static int ieee802154_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} + +static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + + return sk->sk_prot->sendmsg(iocb, sk, msg, len); +} + +static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) +{ + struct sock *sk = sock->sk; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, uaddr, addr_len); + + return sock_no_bind(sock, uaddr, addr_len); +} + +static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = sock->sk; + + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return sk->sk_prot->disconnect(sk, flags); + + return sk->sk_prot->connect(sk, uaddr, addr_len); +} + +static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, + unsigned int cmd) +{ + struct ifreq ifr; + int ret = -ENOIOCTLCMD; + struct net_device *dev; + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + + dev_load(sock_net(sk), ifr.ifr_name); + dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); + + if (!dev) + return -ENODEV; + + if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) + ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); + + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + ret = -EFAULT; + dev_put(dev); + + return ret; +} + +static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCGSTAMP: + return sock_get_timestamp(sk, (struct timeval __user *)arg); + case SIOCGSTAMPNS: + return sock_get_timestampns(sk, (struct timespec __user *)arg); + case SIOCGIFADDR: + case SIOCSIFADDR: + return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, + cmd); + default: + if (!sk->sk_prot->ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->ioctl(sk, cmd, arg); + } +} + +/* RAW Sockets (802.15.4 created in userspace) */ +static HLIST_HEAD(raw_head); +static DEFINE_RWLOCK(raw_lock); + +static void raw_hash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + sk_add_node(sk, &raw_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&raw_lock); +} + +static void raw_unhash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&raw_lock); +} + +static void raw_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) +{ + struct ieee802154_addr addr; + struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; + int err = 0; + struct net_device *dev = NULL; + + if (len < sizeof(*uaddr)) + return -EINVAL; + + uaddr = (struct sockaddr_ieee802154 *)_uaddr; + if (uaddr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + ieee802154_addr_from_sa(&addr, &uaddr->addr); + dev = ieee802154_get_dev(sock_net(sk), &addr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + sk->sk_bound_dev_if = dev->ifindex; + sk_dst_reset(sk); + +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int raw_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + return -ENOTSUPP; +} + +static int raw_disconnect(struct sock *sk, int flags) +{ + return 0; +} + +static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + struct net_device *dev; + unsigned int mtu; + struct sk_buff *skb; + int hlen, tlen; + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + lock_sock(sk); + if (!sk->sk_bound_dev_if) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + release_sock(sk); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EINVAL; + goto out_dev; + } + + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, hlen); + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + + err = memcpy_from_msg(skb_put(skb, size), msg, size); + if (err < 0) + goto out_skb; + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + err = skb_copy_datagram_msg(skb, 0, msg, copied); + if (err) + goto done; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + + if (sock_queue_rcv_skb(sk, skb) < 0) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +static void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk; + + read_lock(&raw_lock); + sk_for_each(sk, &raw_head) { + bh_lock_sock(sk); + if (!sk->sk_bound_dev_if || + sk->sk_bound_dev_if == dev->ifindex) { + struct sk_buff *clone; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + raw_rcv_skb(sk, clone); + } + bh_unlock_sock(sk); + } + read_unlock(&raw_lock); +} + +static int raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +static int raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + return -EOPNOTSUPP; +} + +static struct proto ieee802154_raw_prot = { + .name = "IEEE-802.15.4-RAW", + .owner = THIS_MODULE, + .obj_size = sizeof(struct sock), + .close = raw_close, + .bind = raw_bind, + .sendmsg = raw_sendmsg, + .recvmsg = raw_recvmsg, + .hash = raw_hash, + .unhash = raw_unhash, + .connect = raw_connect, + .disconnect = raw_disconnect, + .getsockopt = raw_getsockopt, + .setsockopt = raw_setsockopt, +}; + +static const struct proto_ops ieee802154_raw_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +/* DGRAM Sockets (802.15.4 dataframes) */ +static HLIST_HEAD(dgram_head); +static DEFINE_RWLOCK(dgram_lock); + +struct dgram_sock { + struct sock sk; + + struct ieee802154_addr src_addr; + struct ieee802154_addr dst_addr; + + unsigned int bound:1; + unsigned int connected:1; + unsigned int want_ack:1; + unsigned int secen:1; + unsigned int secen_override:1; + unsigned int seclevel:3; + unsigned int seclevel_override:1; +}; + +static inline struct dgram_sock *dgram_sk(const struct sock *sk) +{ + return container_of(sk, struct dgram_sock, sk); +} + +static void dgram_hash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + sk_add_node(sk, &dgram_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&dgram_lock); +} + +static void dgram_unhash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&dgram_lock); +} + +static int dgram_init(struct sock *sk) +{ + struct dgram_sock *ro = dgram_sk(sk); + + ro->want_ack = 1; + return 0; +} + +static void dgram_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct ieee802154_addr haddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = -EINVAL; + struct net_device *dev; + + lock_sock(sk); + + ro->bound = 0; + + if (len < sizeof(*addr)) + goto out; + + if (addr->family != AF_IEEE802154) + goto out; + + ieee802154_addr_from_sa(&haddr, &addr->addr); + dev = ieee802154_get_dev(sock_net(sk), &haddr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + ro->src_addr = haddr; + + ro->bound = 1; + err = 0; +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + { + int amount = sk_wmem_alloc_get(sk); + + return put_user(amount, (int __user *)arg); + } + + case SIOCINQ: + { + struct sk_buff *skb; + unsigned long amount; + + amount = 0; + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + /* We will only return the amount + * of this packet since that is all + * that will be read. + */ + amount = skb->len - ieee802154_hdr_length(skb); + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } + } + + return -ENOIOCTLCMD; +} + +/* FIXME: autobind */ +static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, + int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = 0; + + if (len < sizeof(*addr)) + return -EINVAL; + + if (addr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + if (!ro->bound) { + err = -ENETUNREACH; + goto out; + } + + ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); + ro->connected = 1; + +out: + release_sock(sk); + return err; +} + +static int dgram_disconnect(struct sock *sk, int flags) +{ + struct dgram_sock *ro = dgram_sk(sk); + + lock_sock(sk); + ro->connected = 0; + release_sock(sk); + + return 0; +} + +static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + struct net_device *dev; + unsigned int mtu; + struct sk_buff *skb; + struct ieee802154_mac_cb *cb; + struct dgram_sock *ro = dgram_sk(sk); + struct ieee802154_addr dst_addr; + int hlen, tlen; + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + if (!ro->connected && !msg->msg_name) + return -EDESTADDRREQ; + else if (ro->connected && msg->msg_name) + return -EISCONN; + + if (!ro->bound) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EMSGSIZE; + goto out_dev; + } + + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, + msg->msg_flags & MSG_DONTWAIT, + &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, hlen); + + skb_reset_network_header(skb); + + cb = mac_cb_init(skb); + cb->type = IEEE802154_FC_TYPE_DATA; + cb->ackreq = ro->want_ack; + + if (msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_ieee802154*, + daddr, msg->msg_name); + + ieee802154_addr_from_sa(&dst_addr, &daddr->addr); + } else { + dst_addr = ro->dst_addr; + } + + cb->secen = ro->secen; + cb->secen_override = ro->secen_override; + cb->seclevel = ro->seclevel; + cb->seclevel_override = ro->seclevel_override; + + err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, + ro->bound ? &ro->src_addr : NULL, size); + if (err < 0) + goto out_skb; + + err = memcpy_from_msg(skb_put(skb, size), msg, size); + if (err < 0) + goto out_skb; + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + /* FIXME: skip headers if necessary ?! */ + err = skb_copy_datagram_msg(skb, 0, msg, copied); + if (err) + goto done; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (saddr) { + saddr->family = AF_IEEE802154; + ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); + *addr_len = sizeof(*saddr); + } + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + + if (sock_queue_rcv_skb(sk, skb) < 0) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +static inline bool +ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, + struct dgram_sock *ro) +{ + if (!ro->bound) + return true; + + if (ro->src_addr.mode == IEEE802154_ADDR_LONG && + hw_addr == ro->src_addr.extended_addr) + return true; + + if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && + pan_id == ro->src_addr.pan_id && + short_addr == ro->src_addr.short_addr) + return true; + + return false; +} + +static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk, *prev = NULL; + int ret = NET_RX_SUCCESS; + __le16 pan_id, short_addr; + __le64 hw_addr; + + /* Data frame processing */ + BUG_ON(dev->type != ARPHRD_IEEE802154); + + pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); + hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); + + read_lock(&dgram_lock); + sk_for_each(sk, &dgram_head) { + if (ieee802154_match_sock(hw_addr, pan_id, short_addr, + dgram_sk(sk))) { + if (prev) { + struct sk_buff *clone; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + dgram_rcv_skb(prev, clone); + } + + prev = sk; + } + } + + if (prev) { + dgram_rcv_skb(prev, skb); + } else { + kfree_skb(skb); + ret = NET_RX_DROP; + } + read_unlock(&dgram_lock); + + return ret; +} + +static int dgram_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct dgram_sock *ro = dgram_sk(sk); + + int val, len; + + if (level != SOL_IEEE802154) + return -EOPNOTSUPP; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(int)); + + switch (optname) { + case WPAN_WANTACK: + val = ro->want_ack; + break; + case WPAN_SECURITY: + if (!ro->secen_override) + val = WPAN_SECURITY_DEFAULT; + else if (ro->secen) + val = WPAN_SECURITY_ON; + else + val = WPAN_SECURITY_OFF; + break; + case WPAN_SECURITY_LEVEL: + if (!ro->seclevel_override) + val = WPAN_SECURITY_LEVEL_DEFAULT; + else + val = ro->seclevel; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; +} + +static int dgram_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct dgram_sock *ro = dgram_sk(sk); + struct net *net = sock_net(sk); + int val; + int err = 0; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *)optval)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case WPAN_WANTACK: + ro->want_ack = !!val; + break; + case WPAN_SECURITY: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + switch (val) { + case WPAN_SECURITY_DEFAULT: + ro->secen_override = 0; + break; + case WPAN_SECURITY_ON: + ro->secen_override = 1; + ro->secen = 1; + break; + case WPAN_SECURITY_OFF: + ro->secen_override = 1; + ro->secen = 0; + break; + default: + err = -EINVAL; + break; + } + break; + case WPAN_SECURITY_LEVEL: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + if (val < WPAN_SECURITY_LEVEL_DEFAULT || + val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { + err = -EINVAL; + } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { + ro->seclevel_override = 0; + } else { + ro->seclevel_override = 1; + ro->seclevel = val; + } + break; + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static struct proto ieee802154_dgram_prot = { + .name = "IEEE-802.15.4-MAC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct dgram_sock), + .init = dgram_init, + .close = dgram_close, + .bind = dgram_bind, + .sendmsg = dgram_sendmsg, + .recvmsg = dgram_recvmsg, + .hash = dgram_hash, + .unhash = dgram_unhash, + .connect = dgram_connect, + .disconnect = dgram_disconnect, + .ioctl = dgram_ioctl, + .getsockopt = dgram_getsockopt, + .setsockopt = dgram_setsockopt, +}; + +static const struct proto_ops ieee802154_dgram_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +/* Create a socket. Initialise the socket, blank the addresses + * set the state. + */ +static int ieee802154_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + struct sock *sk; + int rc; + struct proto *proto; + const struct proto_ops *ops; + + if (!net_eq(net, &init_net)) + return -EAFNOSUPPORT; + + switch (sock->type) { + case SOCK_RAW: + proto = &ieee802154_raw_prot; + ops = &ieee802154_raw_ops; + break; + case SOCK_DGRAM: + proto = &ieee802154_dgram_prot; + ops = &ieee802154_dgram_ops; + break; + default: + rc = -ESOCKTNOSUPPORT; + goto out; + } + + rc = -ENOMEM; + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + if (!sk) + goto out; + rc = 0; + + sock->ops = ops; + + sock_init_data(sock, sk); + /* FIXME: sk->sk_destruct */ + sk->sk_family = PF_IEEE802154; + + /* Checksums on by default */ + sock_set_flag(sk, SOCK_ZAPPED); + + if (sk->sk_prot->hash) + sk->sk_prot->hash(sk); + + if (sk->sk_prot->init) { + rc = sk->sk_prot->init(sk); + if (rc) + sk_common_release(sk); + } +out: + return rc; +} + +static const struct net_proto_family ieee802154_family_ops = { + .family = PF_IEEE802154, + .create = ieee802154_create, + .owner = THIS_MODULE, +}; + +static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + if (!netif_running(dev)) + goto drop; + pr_debug("got frame, type %d, dev %p\n", dev->type, dev); +#ifdef DEBUG + print_hex_dump_bytes("ieee802154_rcv ", + DUMP_PREFIX_NONE, skb->data, skb->len); +#endif + + if (!net_eq(dev_net(dev), &init_net)) + goto drop; + + ieee802154_raw_deliver(dev, skb); + + if (dev->type != ARPHRD_IEEE802154) + goto drop; + + if (skb->pkt_type != PACKET_OTHERHOST) + return ieee802154_dgram_deliver(dev, skb); + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type ieee802154_packet_type = { + .type = htons(ETH_P_IEEE802154), + .func = ieee802154_rcv, +}; + +static int __init af_ieee802154_init(void) +{ + int rc = -EINVAL; + + rc = proto_register(&ieee802154_raw_prot, 1); + if (rc) + goto out; + + rc = proto_register(&ieee802154_dgram_prot, 1); + if (rc) + goto err_dgram; + + /* Tell SOCKET that we are alive */ + rc = sock_register(&ieee802154_family_ops); + if (rc) + goto err_sock; + dev_add_pack(&ieee802154_packet_type); + + rc = 0; + goto out; + +err_sock: + proto_unregister(&ieee802154_dgram_prot); +err_dgram: + proto_unregister(&ieee802154_raw_prot); +out: + return rc; +} + +static void __exit af_ieee802154_remove(void) +{ + dev_remove_pack(&ieee802154_packet_type); + sock_unregister(PF_IEEE802154); + proto_unregister(&ieee802154_dgram_prot); + proto_unregister(&ieee802154_raw_prot); +} + +module_init(af_ieee802154_init); +module_exit(af_ieee802154_remove); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_IEEE802154); diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c index 1613b9c65dfa..dff55c2d87f3 100644 --- a/net/ieee802154/sysfs.c +++ b/net/ieee802154/sysfs.c @@ -68,7 +68,7 @@ static DEVICE_ATTR_RO(name) MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); MASTER_SHOW(transmit_power, "%d +- 1 dB"); -MASTER_SHOW(cca_mode, "%d"); +MASTER_SHOW_COMPLEX(cca_mode, "%d", phy->cca.mode); static ssize_t channels_supported_show(struct device *dev, struct device_attribute *attr, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 214882e7d6de..f0b4a31d7bd6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1522,7 +1522,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, preferred, valid)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1566,7 +1567,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWADDR, NLM_F_MULTI) <= 0) { + RTM_NEWADDR, NLM_F_MULTI) < 0) { rcu_read_unlock(); goto done; } @@ -1749,7 +1750,8 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1881,7 +1883,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) { + -1) < 0) { rcu_read_unlock(); goto done; } @@ -1897,7 +1899,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -1908,7 +1910,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -2320,7 +2322,7 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; -static struct rtnl_af_ops inet_af_ops = { +static struct rtnl_af_ops inet_af_ops __read_mostly = { .family = AF_INET, .fill_link_af = inet_fill_link_af, .get_link_af_size = inet_get_link_af_size, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 23104a3f2924..57be71dd6a9e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -67,7 +67,7 @@ static int __net_init fib4_rules_init(struct net *net) return 0; fail: - kfree(local_table); + fib_free_table(local_table); return -ENOMEM; } #else @@ -109,6 +109,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) return tb; } +/* caller must hold either rtnl or rcu read lock */ struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; @@ -119,15 +120,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id) id = RT_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); head = &net->ipv4.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - if (tb->tb_id == id) { - rcu_read_unlock(); + if (tb->tb_id == id) return tb; - } } - rcu_read_unlock(); return NULL; } #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -167,16 +164,18 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (ipv4_is_multicast(addr)) return RTN_MULTICAST; + rcu_read_lock(); + local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; - rcu_read_lock(); if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } - rcu_read_unlock(); } + + rcu_read_unlock(); return ret; } @@ -919,7 +918,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) #undef BRD1_OK } -static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) +static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn) { struct fib_result res; @@ -929,6 +928,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) .flowi4_tos = frn->fl_tos, .flowi4_scope = frn->fl_scope, }; + struct fib_table *tb; + + rcu_read_lock(); + + tb = fib_get_table(net, frn->tb_id_in); frn->err = -ENOENT; if (tb) { @@ -945,6 +949,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) } local_bh_enable(); } + + rcu_read_unlock(); } static void nl_fib_input(struct sk_buff *skb) @@ -952,7 +958,6 @@ static void nl_fib_input(struct sk_buff *skb) struct net *net; struct fib_result_nl *frn; struct nlmsghdr *nlh; - struct fib_table *tb; u32 portid; net = sock_net(skb->sk); @@ -967,9 +972,7 @@ static void nl_fib_input(struct sk_buff *skb) nlh = nlmsg_hdr(skb); frn = (struct fib_result_nl *) nlmsg_data(nlh); - tb = fib_get_table(net, frn->tb_id_in); - - nl_fib_lookup(frn, tb); + nl_fib_lookup(net, frn); portid = NETLINK_CB(skb).portid; /* netlink portid */ NETLINK_CB(skb).portid = 0; /* from kernel */ diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 1e4f6600b31d..825981b1049a 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -32,7 +32,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, unsigned int); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 8f7bd56955b0..d3db718be51d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -81,27 +81,25 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, break; case FR_ACT_UNREACHABLE: - err = -ENETUNREACH; - goto errout; + return -ENETUNREACH; case FR_ACT_PROHIBIT: - err = -EACCES; - goto errout; + return -EACCES; case FR_ACT_BLACKHOLE: default: - err = -EINVAL; - goto errout; + return -EINVAL; } + rcu_read_lock(); + tbl = fib_get_table(rule->fr_net, rule->table); - if (!tbl) - goto errout; + if (tbl) + err = fib_table_lookup(tbl, &flp->u.ip4, + (struct fib_result *)arg->result, + arg->flags); - err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); - if (err > 0) - err = -EAGAIN; -errout: + rcu_read_unlock(); return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f99f41bd15b8..1e2090ea663e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(4) /* RTA_DST */ + nla_total_size(4) /* RTA_PRIORITY */ - + nla_total_size(4); /* RTA_PREFSRC */ + + nla_total_size(4) /* RTA_PREFSRC */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); @@ -410,24 +411,6 @@ errout: rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); } -/* Return the first fib alias matching TOS with - * priority less than or equal to PRIO. - */ -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) -{ - if (fah) { - struct fib_alias *fa; - list_for_each_entry(fa, fah, fa_list) { - if (fa->fa_tos > tos) - continue; - if (fa->fa_info->fib_priority >= prio || - fa->fa_tos < tos) - return fa; - } - } - return NULL; -} - static int fib_detect_death(struct fib_info *fi, int order, struct fib_info **last_resort, int *last_idx, int dflt) @@ -859,7 +842,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (type > RTAX_MAX) goto err_inval; - val = nla_get_u32(nla); + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err_inval; + } else { + val = nla_get_u32(nla); + } if (type == RTAX_ADVMSS && val > 65535 - 40) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) @@ -1081,7 +1073,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_nest_end(skb, mp); } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e9cb2588e416..3daf0224ff2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -83,28 +83,33 @@ #define MAX_STAT_DEPTH 32 -#define KEYLENGTH (8*sizeof(t_key)) +#define KEYLENGTH (8*sizeof(t_key)) +#define KEY_MAX ((t_key)~0) typedef unsigned int t_key; -#define T_TNODE 0 -#define T_LEAF 1 -#define NODE_TYPE_MASK 0x1UL -#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) +#define IS_TNODE(n) ((n)->bits) +#define IS_LEAF(n) (!(n)->bits) -#define IS_TNODE(n) (!(n->parent & T_LEAF)) -#define IS_LEAF(n) (n->parent & T_LEAF) +#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos) -struct rt_trie_node { - unsigned long parent; - t_key key; -}; - -struct leaf { - unsigned long parent; +struct tnode { t_key key; - struct hlist_head list; + unsigned char bits; /* 2log(KEYLENGTH) bits needed */ + unsigned char pos; /* 2log(KEYLENGTH) bits needed */ + unsigned char slen; + struct tnode __rcu *parent; struct rcu_head rcu; + union { + /* The fields in this struct are valid if bits > 0 (TNODE) */ + struct { + t_key empty_children; /* KEYLENGTH bits needed */ + t_key full_children; /* KEYLENGTH bits needed */ + struct tnode __rcu *child[0]; + }; + /* This list pointer if valid if bits == 0 (LEAF) */ + struct hlist_head list; + }; }; struct leaf_info { @@ -115,20 +120,6 @@ struct leaf_info { struct rcu_head rcu; }; -struct tnode { - unsigned long parent; - t_key key; - unsigned char pos; /* 2log(KEYLENGTH) bits needed */ - unsigned char bits; /* 2log(KEYLENGTH) bits needed */ - unsigned int full_children; /* KEYLENGTH bits needed */ - unsigned int empty_children; /* KEYLENGTH bits needed */ - union { - struct rcu_head rcu; - struct tnode *tnode_free; - }; - struct rt_trie_node __rcu *child[0]; -}; - #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats { unsigned int gets; @@ -151,19 +142,13 @@ struct trie_stat { }; struct trie { - struct rt_trie_node __rcu *trie; + struct tnode __rcu *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS - struct trie_use_stats stats; + struct trie_use_stats __percpu *stats; #endif }; -static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, - int wasfull); -static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); -static struct tnode *inflate(struct trie *t, struct tnode *tn); -static struct tnode *halve(struct trie *t, struct tnode *tn); -/* tnodes to free after resize(); protected by RTNL */ -static struct tnode *tnode_free_head; +static void resize(struct trie *t, struct tnode *tn); static size_t tnode_free_size; /* @@ -176,170 +161,101 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -/* - * caller must hold RTNL - */ -static inline struct tnode *node_parent(const struct rt_trie_node *node) -{ - unsigned long parent; - - parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); +/* caller must hold RTNL */ +#define node_parent(n) rtnl_dereference((n)->parent) - return (struct tnode *)(parent & ~NODE_TYPE_MASK); -} +/* caller must hold RCU read lock or RTNL */ +#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent) -/* - * caller must hold RCU read lock or RTNL - */ -static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) +/* wrapper for rcu_assign_pointer */ +static inline void node_set_parent(struct tnode *n, struct tnode *tp) { - unsigned long parent; - - parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || - lockdep_rtnl_is_held()); - - return (struct tnode *)(parent & ~NODE_TYPE_MASK); + if (n) + rcu_assign_pointer(n->parent, tp); } -/* Same as rcu_assign_pointer - * but that macro() assumes that value is a pointer. +#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p) + +/* This provides us with the number of children in this node, in the case of a + * leaf this will return 0 meaning none of the children are accessible. */ -static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) +static inline unsigned long tnode_child_length(const struct tnode *tn) { - smp_wmb(); - node->parent = (unsigned long)ptr | NODE_TYPE(node); + return (1ul << tn->bits) & ~(1ul); } -/* - * caller must hold RTNL - */ -static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) +/* caller must hold RTNL */ +static inline struct tnode *tnode_get_child(const struct tnode *tn, + unsigned long i) { - BUG_ON(i >= 1U << tn->bits); - return rtnl_dereference(tn->child[i]); } -/* - * caller must hold RCU read lock or RTNL - */ -static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) +/* caller must hold RCU read lock or RTNL */ +static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, + unsigned long i) { - BUG_ON(i >= 1U << tn->bits); - return rcu_dereference_rtnl(tn->child[i]); } -static inline int tnode_child_length(const struct tnode *tn) -{ - return 1 << tn->bits; -} - -static inline t_key mask_pfx(t_key k, unsigned int l) -{ - return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); -} - -static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) -{ - if (offset < KEYLENGTH) - return ((t_key)(a << offset)) >> (KEYLENGTH - bits); - else - return 0; -} - -static inline int tkey_equals(t_key a, t_key b) -{ - return a == b; -} - -static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) -{ - if (bits == 0 || offset >= KEYLENGTH) - return 1; - bits = bits > KEYLENGTH ? KEYLENGTH : bits; - return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; -} - -static inline int tkey_mismatch(t_key a, int offset, t_key b) -{ - t_key diff = a ^ b; - int i = offset; - - if (!diff) - return 0; - while ((diff << i) >> (KEYLENGTH-1) == 0) - i++; - return i; -} - -/* - To understand this stuff, an understanding of keys and all their bits is - necessary. Every node in the trie has a key associated with it, but not - all of the bits in that key are significant. - - Consider a node 'n' and its parent 'tp'. - - If n is a leaf, every bit in its key is significant. Its presence is - necessitated by path compression, since during a tree traversal (when - searching for a leaf - unless we are doing an insertion) we will completely - ignore all skipped bits we encounter. Thus we need to verify, at the end of - a potentially successful search, that we have indeed been walking the - correct key path. - - Note that we can never "miss" the correct key in the tree if present by - following the wrong path. Path compression ensures that segments of the key - that are the same for all keys with a given prefix are skipped, but the - skipped part *is* identical for each node in the subtrie below the skipped - bit! trie_insert() in this implementation takes care of that - note the - call to tkey_sub_equals() in trie_insert(). - - if n is an internal node - a 'tnode' here, the various parts of its key - have many different meanings. - - Example: - _________________________________________________________________ - | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | - ----------------------------------------------------------------- - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - - _________________________________________________________________ - | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | - ----------------------------------------------------------------- - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - - tp->pos = 7 - tp->bits = 3 - n->pos = 15 - n->bits = 4 - - First, let's just ignore the bits that come before the parent tp, that is - the bits from 0 to (tp->pos-1). They are *known* but at this point we do - not use them for anything. - - The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the - index into the parent's child array. That is, they will be used to find - 'n' among tp's children. - - The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits - for the node n. - - All the bits we have seen so far are significant to the node n. The rest - of the bits are really not needed or indeed known in n->key. - - The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into - n's child array, and will of course be different for each child. - - - The rest of the bits, from (n->pos + n->bits) onward, are completely unknown - at this point. - -*/ - -static inline void check_tnode(const struct tnode *tn) -{ - WARN_ON(tn && tn->pos+tn->bits > 32); -} +/* To understand this stuff, an understanding of keys and all their bits is + * necessary. Every node in the trie has a key associated with it, but not + * all of the bits in that key are significant. + * + * Consider a node 'n' and its parent 'tp'. + * + * If n is a leaf, every bit in its key is significant. Its presence is + * necessitated by path compression, since during a tree traversal (when + * searching for a leaf - unless we are doing an insertion) we will completely + * ignore all skipped bits we encounter. Thus we need to verify, at the end of + * a potentially successful search, that we have indeed been walking the + * correct key path. + * + * Note that we can never "miss" the correct key in the tree if present by + * following the wrong path. Path compression ensures that segments of the key + * that are the same for all keys with a given prefix are skipped, but the + * skipped part *is* identical for each node in the subtrie below the skipped + * bit! trie_insert() in this implementation takes care of that. + * + * if n is an internal node - a 'tnode' here, the various parts of its key + * have many different meanings. + * + * Example: + * _________________________________________________________________ + * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | + * ----------------------------------------------------------------- + * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 + * + * _________________________________________________________________ + * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | + * ----------------------------------------------------------------- + * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + * + * tp->pos = 22 + * tp->bits = 3 + * n->pos = 13 + * n->bits = 4 + * + * First, let's just ignore the bits that come before the parent tp, that is + * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this + * point we do not use them for anything. + * + * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the + * index into the parent's child array. That is, they will be used to find + * 'n' among tp's children. + * + * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits + * for the node n. + * + * All the bits we have seen so far are significant to the node n. The rest + * of the bits are really not needed or indeed known in n->key. + * + * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into + * n's child array, and will of course be different for each child. + * + * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown + * at this point. + */ static const int halve_threshold = 25; static const int inflate_threshold = 50; @@ -357,17 +273,23 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) call_rcu(&fa->rcu, __alias_free_mem); } -static void __leaf_free_rcu(struct rcu_head *head) -{ - struct leaf *l = container_of(head, struct leaf, rcu); - kmem_cache_free(trie_leaf_kmem, l); -} +#define TNODE_KMALLOC_MAX \ + ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *)) -static inline void free_leaf(struct leaf *l) +static void __node_free_rcu(struct rcu_head *head) { - call_rcu(&l->rcu, __leaf_free_rcu); + struct tnode *n = container_of(head, struct tnode, rcu); + + if (IS_LEAF(n)) + kmem_cache_free(trie_leaf_kmem, n); + else if (n->bits <= TNODE_KMALLOC_MAX) + kfree(n); + else + vfree(n); } +#define node_free(n) call_rcu(&n->rcu, __node_free_rcu) + static inline void free_leaf_info(struct leaf_info *leaf) { kfree_rcu(leaf, rcu); @@ -381,56 +303,31 @@ static struct tnode *tnode_alloc(size_t size) return vzalloc(size); } -static void __tnode_free_rcu(struct rcu_head *head) -{ - struct tnode *tn = container_of(head, struct tnode, rcu); - size_t size = sizeof(struct tnode) + - (sizeof(struct rt_trie_node *) << tn->bits); - - if (size <= PAGE_SIZE) - kfree(tn); - else - vfree(tn); -} - -static inline void tnode_free(struct tnode *tn) +static inline void empty_child_inc(struct tnode *n) { - if (IS_LEAF(tn)) - free_leaf((struct leaf *) tn); - else - call_rcu(&tn->rcu, __tnode_free_rcu); + ++n->empty_children ? : ++n->full_children; } -static void tnode_free_safe(struct tnode *tn) +static inline void empty_child_dec(struct tnode *n) { - BUG_ON(IS_LEAF(tn)); - tn->tnode_free = tnode_free_head; - tnode_free_head = tn; - tnode_free_size += sizeof(struct tnode) + - (sizeof(struct rt_trie_node *) << tn->bits); + n->empty_children-- ? : n->full_children--; } -static void tnode_free_flush(void) +static struct tnode *leaf_new(t_key key) { - struct tnode *tn; - - while ((tn = tnode_free_head)) { - tnode_free_head = tn->tnode_free; - tn->tnode_free = NULL; - tnode_free(tn); - } - - if (tnode_free_size >= PAGE_SIZE * sync_pages) { - tnode_free_size = 0; - synchronize_rcu(); - } -} - -static struct leaf *leaf_new(void) -{ - struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); + struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); if (l) { - l->parent = T_LEAF; + l->parent = NULL; + /* set key and pos to reflect full key value + * any trailing zeros in the key should be ignored + * as the nodes are searched + */ + l->key = key; + l->slen = 0; + l->pos = 0; + /* set bits to 0 indicating we are not a tnode */ + l->bits = 0; + INIT_HLIST_HEAD(&l->list); } return l; @@ -449,462 +346,530 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); + size_t sz = offsetof(struct tnode, child[1ul << bits]); struct tnode *tn = tnode_alloc(sz); + unsigned int shift = pos + bits; + + /* verify bits and pos their msb bits clear and values are valid */ + BUG_ON(!bits || (shift > KEYLENGTH)); if (tn) { - tn->parent = T_TNODE; + tn->parent = NULL; + tn->slen = pos; tn->pos = pos; tn->bits = bits; - tn->key = key; - tn->full_children = 0; - tn->empty_children = 1<<bits; + tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; + if (bits == KEYLENGTH) + tn->full_children = 1; + else + tn->empty_children = 1ul << bits; } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct rt_trie_node *) << bits); + sizeof(struct tnode *) << bits); return tn; } -/* - * Check whether a tnode 'n' is "full", i.e. it is an internal node +/* Check whether a tnode 'n' is "full", i.e. it is an internal node * and no bits are skipped. See discussion in dyntree paper p. 6 */ - -static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) +static inline int tnode_full(const struct tnode *tn, const struct tnode *n) { - if (n == NULL || IS_LEAF(n)) - return 0; - - return ((struct tnode *) n)->pos == tn->pos + tn->bits; + return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); } -static inline void put_child(struct tnode *tn, int i, - struct rt_trie_node *n) -{ - tnode_put_child_reorg(tn, i, n, -1); -} - - /* - * Add a child at position i overwriting the old value. - * Update the value of full_children and empty_children. - */ - -static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, - int wasfull) +/* Add a child at position i overwriting the old value. + * Update the value of full_children and empty_children. + */ +static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) { - struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); - int isfull; + struct tnode *chi = tnode_get_child(tn, i); + int isfull, wasfull; - BUG_ON(i >= 1<<tn->bits); + BUG_ON(i >= tnode_child_length(tn)); - /* update emptyChildren */ + /* update emptyChildren, overflow into fullChildren */ if (n == NULL && chi != NULL) - tn->empty_children++; - else if (n != NULL && chi == NULL) - tn->empty_children--; + empty_child_inc(tn); + if (n != NULL && chi == NULL) + empty_child_dec(tn); /* update fullChildren */ - if (wasfull == -1) - wasfull = tnode_full(tn, chi); - + wasfull = tnode_full(tn, chi); isfull = tnode_full(tn, n); + if (wasfull && !isfull) tn->full_children--; else if (!wasfull && isfull) tn->full_children++; - if (n) - node_set_parent(n, tn); + if (n && (tn->slen < n->slen)) + tn->slen = n->slen; rcu_assign_pointer(tn->child[i], n); } -#define MAX_WORK 10 -static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) +static void update_children(struct tnode *tn) { - int i; - struct tnode *old_tn; - int inflate_threshold_use; - int halve_threshold_use; - int max_work; + unsigned long i; - if (!tn) - return NULL; + /* update all of the child parent pointers */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); - pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", - tn, inflate_threshold, halve_threshold); + if (!inode) + continue; - /* No children */ - if (tn->empty_children == tnode_child_length(tn)) { - tnode_free_safe(tn); - return NULL; + /* Either update the children of a tnode that + * already belongs to us or update the child + * to point to ourselves. + */ + if (node_parent(inode) == tn) + update_children(inode); + else + node_set_parent(inode, tn); } - /* One child */ - if (tn->empty_children == tnode_child_length(tn) - 1) - goto one_child; - /* - * Double as long as the resulting node has a number of - * nonempty nodes that are above the threshold. - */ - - /* - * From "Implementing a dynamic compressed trie" by Stefan Nilsson of - * the Helsinki University of Technology and Matti Tikkanen of Nokia - * Telecommunications, page 6: - * "A node is doubled if the ratio of non-empty children to all - * children in the *doubled* node is at least 'high'." - * - * 'high' in this instance is the variable 'inflate_threshold'. It - * is expressed as a percentage, so we multiply it with - * tnode_child_length() and instead of multiplying by 2 (since the - * child array will be doubled by inflate()) and multiplying - * the left-hand side by 100 (to handle the percentage thing) we - * multiply the left-hand side by 50. - * - * The left-hand side may look a bit weird: tnode_child_length(tn) - * - tn->empty_children is of course the number of non-null children - * in the current node. tn->full_children is the number of "full" - * children, that is non-null tnodes with a skip value of 0. - * All of those will be doubled in the resulting inflated tnode, so - * we just count them one extra time here. - * - * A clearer way to write this would be: - * - * to_be_doubled = tn->full_children; - * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - - * tn->full_children; - * - * new_child_length = tnode_child_length(tn) * 2; - * - * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / - * new_child_length; - * if (new_fill_factor >= inflate_threshold) - * - * ...and so on, tho it would mess up the while () loop. - * - * anyway, - * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= - * inflate_threshold - * - * avoid a division: - * 100 * (not_to_be_doubled + 2*to_be_doubled) >= - * inflate_threshold * new_child_length - * - * expand not_to_be_doubled and to_be_doubled, and shorten: - * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children) >= inflate_threshold * new_child_length - * - * expand new_child_length: - * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children) >= - * inflate_threshold * tnode_child_length(tn) * 2 - * - * shorten again: - * 50 * (tn->full_children + tnode_child_length(tn) - - * tn->empty_children) >= inflate_threshold * - * tnode_child_length(tn) - * - */ +} - check_tnode(tn); +static inline void put_child_root(struct tnode *tp, struct trie *t, + t_key key, struct tnode *n) +{ + if (tp) + put_child(tp, get_index(key, tp), n); + else + rcu_assign_pointer(t->trie, n); +} - /* Keep root node larger */ +static inline void tnode_free_init(struct tnode *tn) +{ + tn->rcu.next = NULL; +} - if (!node_parent((struct rt_trie_node *)tn)) { - inflate_threshold_use = inflate_threshold_root; - halve_threshold_use = halve_threshold_root; - } else { - inflate_threshold_use = inflate_threshold; - halve_threshold_use = halve_threshold; - } +static inline void tnode_free_append(struct tnode *tn, struct tnode *n) +{ + n->rcu.next = tn->rcu.next; + tn->rcu.next = &n->rcu; +} - max_work = MAX_WORK; - while ((tn->full_children > 0 && max_work-- && - 50 * (tn->full_children + tnode_child_length(tn) - - tn->empty_children) - >= inflate_threshold_use * tnode_child_length(tn))) { +static void tnode_free(struct tnode *tn) +{ + struct callback_head *head = &tn->rcu; - old_tn = tn; - tn = inflate(t, tn); + while (head) { + head = head->next; + tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]); + node_free(tn); - if (IS_ERR(tn)) { - tn = old_tn; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.resize_node_skipped++; -#endif - break; - } + tn = container_of(head, struct tnode, rcu); } - check_tnode(tn); - - /* Return if at least one inflate is run */ - if (max_work != MAX_WORK) - return (struct rt_trie_node *) tn; - - /* - * Halve as long as the number of empty children in this - * node is above threshold. - */ - - max_work = MAX_WORK; - while (tn->bits > 1 && max_work-- && - 100 * (tnode_child_length(tn) - tn->empty_children) < - halve_threshold_use * tnode_child_length(tn)) { - - old_tn = tn; - tn = halve(t, tn); - if (IS_ERR(tn)) { - tn = old_tn; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.resize_node_skipped++; -#endif - break; - } + if (tnode_free_size >= PAGE_SIZE * sync_pages) { + tnode_free_size = 0; + synchronize_rcu(); } +} +static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) +{ + struct tnode *tp = node_parent(oldtnode); + unsigned long i; - /* Only one child remains */ - if (tn->empty_children == tnode_child_length(tn) - 1) { -one_child: - for (i = 0; i < tnode_child_length(tn); i++) { - struct rt_trie_node *n; - - n = rtnl_dereference(tn->child[i]); - if (!n) - continue; - - /* compress one level */ + /* setup the parent pointer out of and back into this node */ + NODE_INIT_PARENT(tn, tp); + put_child_root(tp, t, tn->key, tn); - node_set_parent(n, NULL); - tnode_free_safe(tn); - return n; - } - } - return (struct rt_trie_node *) tn; -} + /* update all of the child parent pointers */ + update_children(tn); + /* all pointers should be clean so we are done */ + tnode_free(oldtnode); -static void tnode_clean_free(struct tnode *tn) -{ - int i; - struct tnode *tofree; + /* resize children now that oldtnode is freed */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); - for (i = 0; i < tnode_child_length(tn); i++) { - tofree = (struct tnode *)rtnl_dereference(tn->child[i]); - if (tofree) - tnode_free(tofree); + /* resize child node */ + if (tnode_full(tn, inode)) + resize(t, inode); } - tnode_free(tn); } -static struct tnode *inflate(struct trie *t, struct tnode *tn) +static int inflate(struct trie *t, struct tnode *oldtnode) { - struct tnode *oldtnode = tn; - int olen = tnode_child_length(tn); - int i; + struct tnode *tn; + unsigned long i; + t_key m; pr_debug("In inflate\n"); - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); - + tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); if (!tn) - return ERR_PTR(-ENOMEM); - - /* - * Preallocate and store tnodes before the actual work so we - * don't get into an inconsistent state if memory allocation - * fails. In case of failure we return the oldnode and inflate - * of tnode is ignored. - */ + return -ENOMEM; - for (i = 0; i < olen; i++) { - struct tnode *inode; - - inode = (struct tnode *) tnode_get_child(oldtnode, i); - if (inode && - IS_TNODE(inode) && - inode->pos == oldtnode->pos + oldtnode->bits && - inode->bits > 1) { - struct tnode *left, *right; - t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; - - left = tnode_new(inode->key&(~m), inode->pos + 1, - inode->bits - 1); - if (!left) - goto nomem; - - right = tnode_new(inode->key|m, inode->pos + 1, - inode->bits - 1); - - if (!right) { - tnode_free(left); - goto nomem; - } + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); - put_child(tn, 2*i, (struct rt_trie_node *) left); - put_child(tn, 2*i+1, (struct rt_trie_node *) right); - } - } - - for (i = 0; i < olen; i++) { - struct tnode *inode; - struct rt_trie_node *node = tnode_get_child(oldtnode, i); - struct tnode *left, *right; - int size, j; + /* Assemble all of the pointers in our cluster, in this case that + * represents all of the pointers out of our allocated nodes that + * point to existing tnodes and the links between our allocated + * nodes. + */ + for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { + struct tnode *inode = tnode_get_child(oldtnode, --i); + struct tnode *node0, *node1; + unsigned long j, k; /* An empty child */ - if (node == NULL) + if (inode == NULL) continue; /* A leaf or an internal node with skipped bits */ - - if (IS_LEAF(node) || ((struct tnode *) node)->pos > - tn->pos + tn->bits - 1) { - put_child(tn, - tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), - node); + if (!tnode_full(oldtnode, inode)) { + put_child(tn, get_index(inode->key, tn), inode); continue; } - /* An internal node with two children */ - inode = (struct tnode *) node; + /* drop the node in the old tnode free list */ + tnode_free_append(oldtnode, inode); + /* An internal node with two children */ if (inode->bits == 1) { - put_child(tn, 2*i, rtnl_dereference(inode->child[0])); - put_child(tn, 2*i+1, rtnl_dereference(inode->child[1])); - - tnode_free_safe(inode); + put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); + put_child(tn, 2 * i, tnode_get_child(inode, 0)); continue; } - /* An internal node with more than two children */ - /* We will replace this node 'inode' with two new - * ones, 'left' and 'right', each with half of the + * ones, 'node0' and 'node1', each with half of the * original children. The two new nodes will have * a position one bit further down the key and this * means that the "significant" part of their keys * (see the discussion near the top of this file) * will differ by one bit, which will be "0" in - * left's key and "1" in right's key. Since we are + * node0's key and "1" in node1's key. Since we are * moving the key position by one step, the bit that * we are moving away from - the bit at position - * (inode->pos) - is the one that will differ between - * left and right. So... we synthesize that bit in the - * two new keys. - * The mask 'm' below will be a single "one" bit at - * the position (inode->pos) + * (tn->pos) - is the one that will differ between + * node0 and node1. So... we synthesize that bit in the + * two new keys. */ + node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); + if (!node1) + goto nomem; + node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); + + tnode_free_append(tn, node1); + if (!node0) + goto nomem; + tnode_free_append(tn, node0); + + /* populate child pointers in new nodes */ + for (k = tnode_child_length(inode), j = k / 2; j;) { + put_child(node1, --j, tnode_get_child(inode, --k)); + put_child(node0, j, tnode_get_child(inode, j)); + put_child(node1, --j, tnode_get_child(inode, --k)); + put_child(node0, j, tnode_get_child(inode, j)); + } - /* Use the old key, but set the new significant - * bit to zero. - */ + /* link new nodes to parent */ + NODE_INIT_PARENT(node1, tn); + NODE_INIT_PARENT(node0, tn); + + /* link parent to nodes */ + put_child(tn, 2 * i + 1, node1); + put_child(tn, 2 * i, node0); + } + + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); + + return 0; +nomem: + /* all pointers should be clean so we are done */ + tnode_free(tn); + return -ENOMEM; +} + +static int halve(struct trie *t, struct tnode *oldtnode) +{ + struct tnode *tn; + unsigned long i; - left = (struct tnode *) tnode_get_child(tn, 2*i); - put_child(tn, 2*i, NULL); + pr_debug("In halve\n"); - BUG_ON(!left); + tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); + if (!tn) + return -ENOMEM; - right = (struct tnode *) tnode_get_child(tn, 2*i+1); - put_child(tn, 2*i+1, NULL); + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); - BUG_ON(!right); + /* Assemble all of the pointers in our cluster, in this case that + * represents all of the pointers out of our allocated nodes that + * point to existing tnodes and the links between our allocated + * nodes. + */ + for (i = tnode_child_length(oldtnode); i;) { + struct tnode *node1 = tnode_get_child(oldtnode, --i); + struct tnode *node0 = tnode_get_child(oldtnode, --i); + struct tnode *inode; - size = tnode_child_length(left); - for (j = 0; j < size; j++) { - put_child(left, j, rtnl_dereference(inode->child[j])); - put_child(right, j, rtnl_dereference(inode->child[j + size])); + /* At least one of the children is empty */ + if (!node1 || !node0) { + put_child(tn, i / 2, node1 ? : node0); + continue; + } + + /* Two nonempty children */ + inode = tnode_new(node0->key, oldtnode->pos, 1); + if (!inode) { + tnode_free(tn); + return -ENOMEM; } - put_child(tn, 2*i, resize(t, left)); - put_child(tn, 2*i+1, resize(t, right)); + tnode_free_append(tn, inode); - tnode_free_safe(inode); + /* initialize pointers out of node */ + put_child(inode, 1, node1); + put_child(inode, 0, node0); + NODE_INIT_PARENT(inode, tn); + + /* link parent to node */ + put_child(tn, i / 2, inode); } - tnode_free_safe(oldtnode); - return tn; -nomem: - tnode_clean_free(tn); - return ERR_PTR(-ENOMEM); + + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); + + return 0; } -static struct tnode *halve(struct trie *t, struct tnode *tn) +static void collapse(struct trie *t, struct tnode *oldtnode) { - struct tnode *oldtnode = tn; - struct rt_trie_node *left, *right; - int i; - int olen = tnode_child_length(tn); + struct tnode *n, *tp; + unsigned long i; - pr_debug("In halve\n"); + /* scan the tnode looking for that one child that might still exist */ + for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) + n = tnode_get_child(oldtnode, --i); - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); + /* compress one level */ + tp = node_parent(oldtnode); + put_child_root(tp, t, oldtnode->key, n); + node_set_parent(n, tp); - if (!tn) - return ERR_PTR(-ENOMEM); + /* drop dead node */ + node_free(oldtnode); +} - /* - * Preallocate and store tnodes before the actual work so we - * don't get into an inconsistent state if memory allocation - * fails. In case of failure we return the oldnode and halve - * of tnode is ignored. +static unsigned char update_suffix(struct tnode *tn) +{ + unsigned char slen = tn->pos; + unsigned long stride, i; + + /* search though the list of children looking for nodes that might + * have a suffix greater than the one we currently have. This is + * why we start with a stride of 2 since a stride of 1 would + * represent the nodes with suffix length equal to tn->pos */ + for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) { + struct tnode *n = tnode_get_child(tn, i); - for (i = 0; i < olen; i += 2) { - left = tnode_get_child(oldtnode, i); - right = tnode_get_child(oldtnode, i+1); + if (!n || (n->slen <= slen)) + continue; - /* Two nonempty children */ - if (left && right) { - struct tnode *newn; + /* update stride and slen based on new value */ + stride <<= (n->slen - slen); + slen = n->slen; + i &= ~(stride - 1); - newn = tnode_new(left->key, tn->pos + tn->bits, 1); + /* if slen covers all but the last bit we can stop here + * there will be nothing longer than that since only node + * 0 and 1 << (bits - 1) could have that as their suffix + * length. + */ + if ((slen + 1) >= (tn->pos + tn->bits)) + break; + } - if (!newn) - goto nomem; + tn->slen = slen; - put_child(tn, i/2, (struct rt_trie_node *)newn); - } + return slen; +} - } +/* From "Implementing a dynamic compressed trie" by Stefan Nilsson of + * the Helsinki University of Technology and Matti Tikkanen of Nokia + * Telecommunications, page 6: + * "A node is doubled if the ratio of non-empty children to all + * children in the *doubled* node is at least 'high'." + * + * 'high' in this instance is the variable 'inflate_threshold'. It + * is expressed as a percentage, so we multiply it with + * tnode_child_length() and instead of multiplying by 2 (since the + * child array will be doubled by inflate()) and multiplying + * the left-hand side by 100 (to handle the percentage thing) we + * multiply the left-hand side by 50. + * + * The left-hand side may look a bit weird: tnode_child_length(tn) + * - tn->empty_children is of course the number of non-null children + * in the current node. tn->full_children is the number of "full" + * children, that is non-null tnodes with a skip value of 0. + * All of those will be doubled in the resulting inflated tnode, so + * we just count them one extra time here. + * + * A clearer way to write this would be: + * + * to_be_doubled = tn->full_children; + * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - + * tn->full_children; + * + * new_child_length = tnode_child_length(tn) * 2; + * + * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / + * new_child_length; + * if (new_fill_factor >= inflate_threshold) + * + * ...and so on, tho it would mess up the while () loop. + * + * anyway, + * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= + * inflate_threshold + * + * avoid a division: + * 100 * (not_to_be_doubled + 2*to_be_doubled) >= + * inflate_threshold * new_child_length + * + * expand not_to_be_doubled and to_be_doubled, and shorten: + * 100 * (tnode_child_length(tn) - tn->empty_children + + * tn->full_children) >= inflate_threshold * new_child_length + * + * expand new_child_length: + * 100 * (tnode_child_length(tn) - tn->empty_children + + * tn->full_children) >= + * inflate_threshold * tnode_child_length(tn) * 2 + * + * shorten again: + * 50 * (tn->full_children + tnode_child_length(tn) - + * tn->empty_children) >= inflate_threshold * + * tnode_child_length(tn) + * + */ +static bool should_inflate(const struct tnode *tp, const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + unsigned long threshold = used; - for (i = 0; i < olen; i += 2) { - struct tnode *newBinNode; + /* Keep root node larger */ + threshold *= tp ? inflate_threshold : inflate_threshold_root; + used -= tn->empty_children; + used += tn->full_children; - left = tnode_get_child(oldtnode, i); - right = tnode_get_child(oldtnode, i+1); + /* if bits == KEYLENGTH then pos = 0, and will fail below */ - /* At least one of the children is empty */ - if (left == NULL) { - if (right == NULL) /* Both are empty */ - continue; - put_child(tn, i/2, right); - continue; + return (used > 1) && tn->pos && ((50 * used) >= threshold); +} + +static bool should_halve(const struct tnode *tp, const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + unsigned long threshold = used; + + /* Keep root node larger */ + threshold *= tp ? halve_threshold : halve_threshold_root; + used -= tn->empty_children; + + /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ + + return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); +} + +static bool should_collapse(const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + + used -= tn->empty_children; + + /* account for bits == KEYLENGTH case */ + if ((tn->bits == KEYLENGTH) && tn->full_children) + used -= KEY_MAX; + + /* One child or none, time to drop us from the trie */ + return used < 2; +} + +#define MAX_WORK 10 +static void resize(struct trie *t, struct tnode *tn) +{ + struct tnode *tp = node_parent(tn); + struct tnode __rcu **cptr; + int max_work = MAX_WORK; + + pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", + tn, inflate_threshold, halve_threshold); + + /* track the tnode via the pointer from the parent instead of + * doing it ourselves. This way we can let RCU fully do its + * thing without us interfering + */ + cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; + BUG_ON(tn != rtnl_dereference(*cptr)); + + /* Double as long as the resulting node has a number of + * nonempty nodes that are above the threshold. + */ + while (should_inflate(tp, tn) && max_work) { + if (inflate(t, tn)) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(t->stats->resize_node_skipped); +#endif + break; } - if (right == NULL) { - put_child(tn, i/2, left); - continue; + max_work--; + tn = rtnl_dereference(*cptr); + } + + /* Return if at least one inflate is run */ + if (max_work != MAX_WORK) + return; + + /* Halve as long as the number of empty children in this + * node is above threshold. + */ + while (should_halve(tp, tn) && max_work) { + if (halve(t, tn)) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(t->stats->resize_node_skipped); +#endif + break; } - /* Two nonempty children */ - newBinNode = (struct tnode *) tnode_get_child(tn, i/2); - put_child(tn, i/2, NULL); - put_child(newBinNode, 0, left); - put_child(newBinNode, 1, right); - put_child(tn, i/2, resize(t, newBinNode)); + max_work--; + tn = rtnl_dereference(*cptr); + } + + /* Only one child remains */ + if (should_collapse(tn)) { + collapse(t, tn); + return; + } + + /* Return if at least one deflate was run */ + if (max_work != MAX_WORK) + return; + + /* push the suffix length to the parent node */ + if (tn->slen > tn->pos) { + unsigned char slen = update_suffix(tn); + + if (tp && (slen > tp->slen)) + tp->slen = slen; } - tnode_free_safe(oldtnode); - return tn; -nomem: - tnode_clean_free(tn); - return ERR_PTR(-ENOMEM); } /* readside must use rcu_read_lock currently dump routines via get_fa_head and dump */ -static struct leaf_info *find_leaf_info(struct leaf *l, int plen) +static struct leaf_info *find_leaf_info(struct tnode *l, int plen) { struct hlist_head *head = &l->list; struct leaf_info *li; @@ -916,7 +881,7 @@ static struct leaf_info *find_leaf_info(struct leaf *l, int plen) return NULL; } -static inline struct list_head *get_fa_head(struct leaf *l, int plen) +static inline struct list_head *get_fa_head(struct tnode *l, int plen) { struct leaf_info *li = find_leaf_info(l, plen); @@ -926,8 +891,51 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen) return &li->falh; } -static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) +static void leaf_pull_suffix(struct tnode *l) +{ + struct tnode *tp = node_parent(l); + + while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) { + if (update_suffix(tp) > l->slen) + break; + tp = node_parent(tp); + } +} + +static void leaf_push_suffix(struct tnode *l) +{ + struct tnode *tn = node_parent(l); + + /* if this is a new leaf then tn will be NULL and we can sort + * out parent suffix lengths as a part of trie_rebalance + */ + while (tn && (tn->slen < l->slen)) { + tn->slen = l->slen; + tn = node_parent(tn); + } +} + +static void remove_leaf_info(struct tnode *l, struct leaf_info *old) { + /* record the location of the previous list_info entry */ + struct hlist_node **pprev = old->hlist.pprev; + struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); + + /* remove the leaf info from the list */ + hlist_del_rcu(&old->hlist); + + /* only access li if it is pointing at the last valid hlist_node */ + if (hlist_empty(&l->list) || (*pprev)) + return; + + /* update the trie with the latest suffix length */ + l->slen = KEYLENGTH - li->plen; + leaf_pull_suffix(l); +} + +static void insert_leaf_info(struct tnode *l, struct leaf_info *new) +{ + struct hlist_head *head = &l->list; struct leaf_info *li = NULL, *last = NULL; if (hlist_empty(head)) { @@ -944,217 +952,174 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) else hlist_add_before_rcu(&new->hlist, &li->hlist); } + + /* if we added to the tail node then we need to update slen */ + if (l->slen < (KEYLENGTH - new->plen)) { + l->slen = KEYLENGTH - new->plen; + leaf_push_suffix(l); + } } /* rcu_read_lock needs to be hold by caller from readside */ +static struct tnode *fib_find_node(struct trie *t, u32 key) +{ + struct tnode *n = rcu_dereference_rtnl(t->trie); + + while (n) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the bits in the cindex. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if (index & (~0ul << bits)) + * we have a mismatch in skip bits and failed + * else + * we know the value is cindex + */ + if (index & (~0ul << n->bits)) + return NULL; -static struct leaf * -fib_find_node(struct trie *t, u32 key) -{ - int pos; - struct tnode *tn; - struct rt_trie_node *n; + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) + break; - pos = 0; - n = rcu_dereference_rtnl(t->trie); + n = tnode_get_child_rcu(n, index); + } - while (n != NULL && NODE_TYPE(n) == T_TNODE) { - tn = (struct tnode *) n; + return n; +} - check_tnode(tn); +/* Return the first fib alias matching TOS with + * priority less than or equal to PRIO. + */ +static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +{ + struct fib_alias *fa; - if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { - pos = tn->pos + tn->bits; - n = tnode_get_child_rcu(tn, - tkey_extract_bits(key, - tn->pos, - tn->bits)); - } else - break; - } - /* Case we have found a leaf. Compare prefixes */ + if (!fah) + return NULL; - if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) - return (struct leaf *)n; + list_for_each_entry(fa, fah, fa_list) { + if (fa->fa_tos > tos) + continue; + if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) + return fa; + } return NULL; } static void trie_rebalance(struct trie *t, struct tnode *tn) { - int wasfull; - t_key cindex, key; struct tnode *tp; - key = tn->key; - - while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); - tn = (struct tnode *)resize(t, tn); - - tnode_put_child_reorg(tp, cindex, - (struct rt_trie_node *)tn, wasfull); - - tp = node_parent((struct rt_trie_node *) tn); - if (!tp) - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - - tnode_free_flush(); - if (!tp) - break; + while ((tp = node_parent(tn)) != NULL) { + resize(t, tn); tn = tp; } /* Handle last (top) tnode */ if (IS_TNODE(tn)) - tn = (struct tnode *)resize(t, tn); - - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - tnode_free_flush(); + resize(t, tn); } /* only used from updater-side */ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) { - int pos, newpos; - struct tnode *tp = NULL, *tn = NULL; - struct rt_trie_node *n; - struct leaf *l; - int missbit; struct list_head *fa_head = NULL; + struct tnode *l, *n, *tp = NULL; struct leaf_info *li; - t_key cindex; - pos = 0; + li = leaf_info_new(plen); + if (!li) + return NULL; + fa_head = &li->falh; + n = rtnl_dereference(t->trie); /* If we point to NULL, stop. Either the tree is empty and we should * just put a new leaf in if, or we have reached an empty child slot, * and we should just put our new leaf in that. - * If we point to a T_TNODE, check if it matches our key. Note that - * a T_TNODE might be skipping any number of bits - its 'pos' need - * not be the parent's 'pos'+'bits'! - * - * If it does match the current key, get pos/bits from it, extract - * the index from our key, push the T_TNODE and walk the tree. * - * If it doesn't, we have to replace it with a new T_TNODE. - * - * If we point to a T_LEAF, it might or might not have the same key - * as we do. If it does, just change the value, update the T_LEAF's - * value, and return it. - * If it doesn't, we need to replace it with a T_TNODE. + * If we hit a node with a key that does't match then we should stop + * and create a new tnode to replace that node and insert ourselves + * and the other node into the new tnode. */ - - while (n != NULL && NODE_TYPE(n) == T_TNODE) { - tn = (struct tnode *) n; - - check_tnode(tn); - - if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { - tp = tn; - pos = tn->pos + tn->bits; - n = tnode_get_child(tn, - tkey_extract_bits(key, - tn->pos, - tn->bits)); - - BUG_ON(n && node_parent(n) != tn); - } else + while (n) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the "bits" in the prefix. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if !(index >> bits) + * we know the value is child index + * else + * we have a mismatch in skip bits and failed + */ + if (index >> n->bits) break; - } - - /* - * n ----> NULL, LEAF or TNODE - * - * tp is n's (parent) ----> NULL or TNODE - */ - BUG_ON(tp && IS_LEAF(tp)); - - /* Case 1: n is a leaf. Compare prefixes */ - - if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { - l = (struct leaf *) n; - li = leaf_info_new(plen); - - if (!li) - return NULL; + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) { + /* Case 1: n is a leaf, and prefixes match*/ + insert_leaf_info(n, li); + return fa_head; + } - fa_head = &li->falh; - insert_leaf_info(&l->list, li); - goto done; + tp = n; + n = tnode_get_child_rcu(n, index); } - l = leaf_new(); - - if (!l) - return NULL; - l->key = key; - li = leaf_info_new(plen); - - if (!li) { - free_leaf(l); + l = leaf_new(key); + if (!l) { + free_leaf_info(li); return NULL; } - fa_head = &li->falh; - insert_leaf_info(&l->list, li); - - if (t->trie && n == NULL) { - /* Case 2: n is NULL, and will just insert a new leaf */ - - node_set_parent((struct rt_trie_node *)l, tp); - - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(tp, cindex, (struct rt_trie_node *)l); - } else { - /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ - /* - * Add a new tnode here - * first tnode need some special handling - */ + insert_leaf_info(l, li); - if (n) { - pos = tp ? tp->pos+tp->bits : 0; - newpos = tkey_mismatch(key, pos, n->key); - tn = tnode_new(n->key, newpos, 1); - } else { - newpos = 0; - tn = tnode_new(key, newpos, 1); /* First tnode */ - } + /* Case 2: n is a LEAF or a TNODE and the key doesn't match. + * + * Add a new tnode here + * first tnode need some special handling + * leaves us in position for handling as case 3 + */ + if (n) { + struct tnode *tn; + tn = tnode_new(key, __fls(key ^ n->key), 1); if (!tn) { free_leaf_info(li); - free_leaf(l); + node_free(l); return NULL; } - node_set_parent((struct rt_trie_node *)tn, tp); + /* initialize routes out of node */ + NODE_INIT_PARENT(tn, tp); + put_child(tn, get_index(key, tn) ^ 1, n); - missbit = tkey_extract_bits(key, newpos, 1); - put_child(tn, missbit, (struct rt_trie_node *)l); - put_child(tn, 1-missbit, n); + /* start adding routes into the node */ + put_child_root(tp, t, key, tn); + node_set_parent(n, tn); - if (tp) { - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(tp, cindex, (struct rt_trie_node *)tn); - } else { - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - tp = tn; - } + /* parent now has a NULL spot where the leaf can go */ + tp = tn; } - if (tp && tp->pos + tp->bits > 32) - pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", - tp, tp->pos, tp->bits, key, plen); - - /* Rebalance the trie */ + /* Case 3: n is NULL, and will just insert a new leaf */ + if (tp) { + NODE_INIT_PARENT(l, tp); + put_child(tp, get_index(key, tp), l); + trie_rebalance(t, tp); + } else { + rcu_assign_pointer(t->trie, l); + } - trie_rebalance(t, tp); -done: return fa_head; } @@ -1171,7 +1136,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) u8 tos = cfg->fc_tos; u32 key, mask; int err; - struct leaf *l; + struct tnode *l; if (plen > 32) return -EINVAL; @@ -1328,18 +1293,130 @@ err: return err; } +static inline t_key prefix_mismatch(t_key key, struct tnode *n) +{ + t_key prefix = n->key; + + return (key ^ prefix) & (prefix | -prefix); +} + /* should be called with rcu_read_lock */ -static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, - t_key key, const struct flowi4 *flp, - struct fib_result *res, int fib_flags) +int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, + struct fib_result *res, int fib_flags) { + struct trie *t = (struct trie *)tb->tb_data; +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie_use_stats __percpu *stats = t->stats; +#endif + const t_key key = ntohl(flp->daddr); + struct tnode *n, *pn; struct leaf_info *li; - struct hlist_head *hhead = &l->list; + t_key cindex; + + n = rcu_dereference(t->trie); + if (!n) + return -EAGAIN; + +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(stats->gets); +#endif + + pn = n; + cindex = 0; + + /* Step 1: Travel to the longest prefix match in the trie */ + for (;;) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the "bits" in the prefix. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if (index & (~0ul << bits)) + * we have a mismatch in skip bits and failed + * else + * we know the value is cindex + */ + if (index & (~0ul << n->bits)) + break; + + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) + goto found; + + /* only record pn and cindex if we are going to be chopping + * bits later. Otherwise we are just wasting cycles. + */ + if (n->slen > n->pos) { + pn = n; + cindex = index; + } + + n = tnode_get_child_rcu(n, index); + if (unlikely(!n)) + goto backtrace; + } + + /* Step 2: Sort out leaves and begin backtracing for longest prefix */ + for (;;) { + /* record the pointer where our next node pointer is stored */ + struct tnode __rcu **cptr = n->child; + + /* This test verifies that none of the bits that differ + * between the key and the prefix exist in the region of + * the lsb and higher in the prefix. + */ + if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) + goto backtrace; + + /* exit out and process leaf */ + if (unlikely(IS_LEAF(n))) + break; + + /* Don't bother recording parent info. Since we are in + * prefix match mode we will have to come back to wherever + * we started this traversal anyway + */ + + while ((n = rcu_dereference(*cptr)) == NULL) { +backtrace: +#ifdef CONFIG_IP_FIB_TRIE_STATS + if (!n) + this_cpu_inc(stats->null_node_hit); +#endif + /* If we are at cindex 0 there are no more bits for + * us to strip at this level so we must ascend back + * up one level to see if there are any more bits to + * be stripped there. + */ + while (!cindex) { + t_key pkey = pn->key; + + pn = node_parent_rcu(pn); + if (unlikely(!pn)) + return -EAGAIN; +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(stats->backtrack); +#endif + /* Get Child's index */ + cindex = get_index(pkey, pn); + } + + /* strip the least significant bit from the cindex */ + cindex &= cindex - 1; + + /* grab pointer for next child node */ + cptr = &pn->child[cindex]; + } + } - hlist_for_each_entry_rcu(li, hhead, hlist) { +found: + /* Step 3: Process the leaf, if that fails fall back to backtracing */ + hlist_for_each_entry_rcu(li, &n->list, hlist) { struct fib_alias *fa; - if (l->key != (key & li->mask_plen)) + if ((key ^ n->key) & li->mask_plen) continue; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -1354,9 +1431,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; - if (err) { + if (unlikely(err < 0)) { #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_passed++; + this_cpu_inc(stats->semantic_match_passed); #endif return err; } @@ -1370,241 +1447,48 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_passed++; -#endif + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&fi->fib_clntref); + res->prefixlen = li->plen; res->nh_sel = nhsel; res->type = fa->fa_type; - res->scope = fa->fa_info->fib_scope; + res->scope = fi->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &li->falh; - if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&fi->fib_clntref); - return 0; - } - } - -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_miss++; -#endif - } - - return 1; -} - -int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, - struct fib_result *res, int fib_flags) -{ - struct trie *t = (struct trie *) tb->tb_data; - int ret; - struct rt_trie_node *n; - struct tnode *pn; - unsigned int pos, bits; - t_key key = ntohl(flp->daddr); - unsigned int chopped_off; - t_key cindex = 0; - unsigned int current_prefix_length = KEYLENGTH; - struct tnode *cn; - t_key pref_mismatch; - - rcu_read_lock(); - - n = rcu_dereference(t->trie); - if (!n) - goto failed; - #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.gets++; + this_cpu_inc(stats->semantic_match_passed); #endif - - /* Just a leaf? */ - if (IS_LEAF(n)) { - ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); - goto found; - } - - pn = (struct tnode *) n; - chopped_off = 0; - - while (pn) { - pos = pn->pos; - bits = pn->bits; - - if (!chopped_off) - cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), - pos, bits); - - n = tnode_get_child_rcu(pn, cindex); - - if (n == NULL) { -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.null_node_hit++; -#endif - goto backtrace; - } - - if (IS_LEAF(n)) { - ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); - if (ret > 0) - goto backtrace; - goto found; - } - - cn = (struct tnode *)n; - - /* - * It's a tnode, and we can do some extra checks here if we - * like, to avoid descending into a dead-end branch. - * This tnode is in the parent's child array at index - * key[p_pos..p_pos+p_bits] but potentially with some bits - * chopped off, so in reality the index may be just a - * subprefix, padded with zero at the end. - * We can also take a look at any skipped bits in this - * tnode - everything up to p_pos is supposed to be ok, - * and the non-chopped bits of the index (se previous - * paragraph) are also guaranteed ok, but the rest is - * considered unknown. - * - * The skipped bits are key[pos+bits..cn->pos]. - */ - - /* If current_prefix_length < pos+bits, we are already doing - * actual prefix matching, which means everything from - * pos+(bits-chopped_off) onward must be zero along some - * branch of this subtree - otherwise there is *no* valid - * prefix present. Here we can only check the skipped - * bits. Remember, since we have already indexed into the - * parent's child array, we know that the bits we chopped of - * *are* zero. - */ - - /* NOTA BENE: Checking only skipped bits - for the new node here */ - - if (current_prefix_length < pos+bits) { - if (tkey_extract_bits(cn->key, current_prefix_length, - cn->pos - current_prefix_length) - || !(cn->child[0])) - goto backtrace; - } - - /* - * If chopped_off=0, the index is fully validated and we - * only need to look at the skipped bits for this, the new, - * tnode. What we actually want to do is to find out if - * these skipped bits match our key perfectly, or if we will - * have to count on finding a matching prefix further down, - * because if we do, we would like to have some way of - * verifying the existence of such a prefix at this point. - */ - - /* The only thing we can do at this point is to verify that - * any such matching prefix can indeed be a prefix to our - * key, and if the bits in the node we are inspecting that - * do not match our key are not ZERO, this cannot be true. - * Thus, find out where there is a mismatch (before cn->pos) - * and verify that all the mismatching bits are zero in the - * new tnode's key. - */ - - /* - * Note: We aren't very concerned about the piece of - * the key that precede pn->pos+pn->bits, since these - * have already been checked. The bits after cn->pos - * aren't checked since these are by definition - * "unknown" at this point. Thus, what we want to see - * is if we are about to enter the "prefix matching" - * state, and in that case verify that the skipped - * bits that will prevail throughout this subtree are - * zero, as they have to be if we are to find a - * matching prefix. - */ - - pref_mismatch = mask_pfx(cn->key ^ key, cn->pos); - - /* - * In short: If skipped bits in this node do not match - * the search key, enter the "prefix matching" - * state.directly. - */ - if (pref_mismatch) { - /* fls(x) = __fls(x) + 1 */ - int mp = KEYLENGTH - __fls(pref_mismatch) - 1; - - if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) - goto backtrace; - - if (current_prefix_length >= cn->pos) - current_prefix_length = mp; + return err; + } } - pn = (struct tnode *)n; /* Descend */ - chopped_off = 0; - continue; - -backtrace: - chopped_off++; - - /* As zero don't change the child key (cindex) */ - while ((chopped_off <= pn->bits) - && !(cindex & (1<<(chopped_off-1)))) - chopped_off++; - - /* Decrease current_... with bits chopped off */ - if (current_prefix_length > pn->pos + pn->bits - chopped_off) - current_prefix_length = pn->pos + pn->bits - - chopped_off; - - /* - * Either we do the actual chop off according or if we have - * chopped off all bits in this tnode walk up to our parent. - */ - - if (chopped_off <= pn->bits) { - cindex &= ~(1 << (chopped_off-1)); - } else { - struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); - if (!parent) - goto failed; - - /* Get Child's index */ - cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); - pn = parent; - chopped_off = 0; - #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.backtrack++; + this_cpu_inc(stats->semantic_match_miss); #endif - goto backtrace; - } } -failed: - ret = 1; -found: - rcu_read_unlock(); - return ret; + goto backtrace; } EXPORT_SYMBOL_GPL(fib_table_lookup); /* * Remove the leaf and return parent. */ -static void trie_leaf_remove(struct trie *t, struct leaf *l) +static void trie_leaf_remove(struct trie *t, struct tnode *l) { - struct tnode *tp = node_parent((struct rt_trie_node *) l); + struct tnode *tp = node_parent(l); pr_debug("entering trie_leaf_remove(%p)\n", l); if (tp) { - t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); - put_child(tp, cindex, NULL); + put_child(tp, get_index(l->key, tp), NULL); trie_rebalance(t, tp); - } else + } else { RCU_INIT_POINTER(t->trie, NULL); + } - free_leaf(l); + node_free(l); } /* @@ -1618,7 +1502,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; - struct leaf *l; + struct tnode *l; struct leaf_info *li; if (plen > 32) @@ -1683,7 +1567,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) tb->tb_num_default--; if (list_empty(fa_head)) { - hlist_del_rcu(&li->hlist); + remove_leaf_info(l, li); free_leaf_info(li); } @@ -1716,12 +1600,13 @@ static int trie_flush_list(struct list_head *head) return found; } -static int trie_flush_leaf(struct leaf *l) +static int trie_flush_leaf(struct tnode *l) { int found = 0; struct hlist_head *lih = &l->list; struct hlist_node *tmp; struct leaf_info *li = NULL; + unsigned char plen = KEYLENGTH; hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); @@ -1729,8 +1614,14 @@ static int trie_flush_leaf(struct leaf *l) if (list_empty(&li->falh)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); + continue; } + + plen = li->plen; } + + l->slen = KEYLENGTH - plen; + return found; } @@ -1738,63 +1629,57 @@ static int trie_flush_leaf(struct leaf *l) * Scan for the next right leaf starting at node p->child[idx] * Since we have back pointer, no recursion necessary. */ -static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) +static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) { do { - t_key idx; + unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0; - if (c) - idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1; - else - idx = 0; - - while (idx < 1u << p->bits) { + while (idx < tnode_child_length(p)) { c = tnode_get_child_rcu(p, idx++); if (!c) continue; if (IS_LEAF(c)) - return (struct leaf *) c; + return c; /* Rescan start scanning in new node */ - p = (struct tnode *) c; + p = c; idx = 0; } /* Node empty, walk back up to parent */ - c = (struct rt_trie_node *) p; + c = p; } while ((p = node_parent_rcu(c)) != NULL); return NULL; /* Root of trie */ } -static struct leaf *trie_firstleaf(struct trie *t) +static struct tnode *trie_firstleaf(struct trie *t) { - struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie); + struct tnode *n = rcu_dereference_rtnl(t->trie); if (!n) return NULL; if (IS_LEAF(n)) /* trie is just a leaf */ - return (struct leaf *) n; + return n; return leaf_walk_rcu(n, NULL); } -static struct leaf *trie_nextleaf(struct leaf *l) +static struct tnode *trie_nextleaf(struct tnode *l) { - struct rt_trie_node *c = (struct rt_trie_node *) l; - struct tnode *p = node_parent_rcu(c); + struct tnode *p = node_parent_rcu(l); if (!p) return NULL; /* trie with just one leaf */ - return leaf_walk_rcu(p, c); + return leaf_walk_rcu(p, l); } -static struct leaf *trie_leafindex(struct trie *t, int index) +static struct tnode *trie_leafindex(struct trie *t, int index) { - struct leaf *l = trie_firstleaf(t); + struct tnode *l = trie_firstleaf(t); while (l && index-- > 0) l = trie_nextleaf(l); @@ -1809,19 +1694,28 @@ static struct leaf *trie_leafindex(struct trie *t, int index) int fib_table_flush(struct fib_table *tb) { struct trie *t = (struct trie *) tb->tb_data; - struct leaf *l, *ll = NULL; + struct tnode *l, *ll = NULL; int found = 0; for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { found += trie_flush_leaf(l); - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } + ll = l; } - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } pr_debug("trie_flush found=%d\n", found); return found; @@ -1829,6 +1723,11 @@ int fib_table_flush(struct fib_table *tb) void fib_free_table(struct fib_table *tb) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie *t = (struct trie *)tb->tb_data; + + free_percpu(t->stats); +#endif /* CONFIG_IP_FIB_TRIE_STATS */ kfree(tb); } @@ -1869,7 +1768,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, return skb->len; } -static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, +static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { struct leaf_info *li; @@ -1905,7 +1804,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { - struct leaf *l; + struct tnode *l; struct trie *t = (struct trie *) tb->tb_data; t_key key = cb->args[2]; int count = cb->args[3]; @@ -1951,7 +1850,7 @@ void __init fib_trie_init(void) 0, SLAB_PANIC, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", - max(sizeof(struct leaf), + max(sizeof(struct tnode), sizeof(struct leaf_info)), 0, SLAB_PANIC, NULL); } @@ -1972,7 +1871,14 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_num_default = 0; t = (struct trie *) tb->tb_data; - memset(t, 0, sizeof(*t)); + RCU_INIT_POINTER(t->trie, NULL); +#ifdef CONFIG_IP_FIB_TRIE_STATS + t->stats = alloc_percpu(struct trie_use_stats); + if (!t->stats) { + kfree(tb); + tb = NULL; + } +#endif return tb; } @@ -1987,10 +1893,10 @@ struct fib_trie_iter { unsigned int depth; }; -static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) +static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter) { + unsigned long cindex = iter->index; struct tnode *tn = iter->tnode; - unsigned int cindex = iter->index; struct tnode *p; /* A single entry routing table */ @@ -2000,8 +1906,8 @@ static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); rescan: - while (cindex < (1<<tn->bits)) { - struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); + while (cindex < tnode_child_length(tn)) { + struct tnode *n = tnode_get_child_rcu(tn, cindex); if (n) { if (IS_LEAF(n)) { @@ -2009,7 +1915,7 @@ rescan: iter->index = cindex + 1; } else { /* push down one level */ - iter->tnode = (struct tnode *) n; + iter->tnode = n; iter->index = 0; ++iter->depth; } @@ -2020,9 +1926,9 @@ rescan: } /* Current node exhausted, pop back up */ - p = node_parent_rcu((struct rt_trie_node *)tn); + p = node_parent_rcu(tn); if (p) { - cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; + cindex = get_index(tn->key, p) + 1; tn = p; --iter->depth; goto rescan; @@ -2032,10 +1938,10 @@ rescan: return NULL; } -static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, +static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { - struct rt_trie_node *n; + struct tnode *n; if (!t) return NULL; @@ -2045,7 +1951,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, return NULL; if (IS_TNODE(n)) { - iter->tnode = (struct tnode *) n; + iter->tnode = n; iter->index = 0; iter->depth = 1; } else { @@ -2059,7 +1965,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct rt_trie_node *n; + struct tnode *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -2067,7 +1973,6 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) rcu_read_lock(); for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { if (IS_LEAF(n)) { - struct leaf *l = (struct leaf *)n; struct leaf_info *li; s->leaves++; @@ -2075,19 +1980,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, &l->list, hlist) + hlist_for_each_entry_rcu(li, &n->list, hlist) ++s->prefixes; } else { - const struct tnode *tn = (const struct tnode *) n; - int i; - s->tnodes++; - if (tn->bits < MAX_STAT_DEPTH) - s->nodesizes[tn->bits]++; - - for (i = 0; i < (1<<tn->bits); i++) - if (!tn->child[i]) - s->nullpointers++; + if (n->bits < MAX_STAT_DEPTH) + s->nodesizes[n->bits]++; + s->nullpointers += n->empty_children; } } rcu_read_unlock(); @@ -2110,7 +2009,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - bytes = sizeof(struct leaf) * stat->leaves; + bytes = sizeof(struct tnode) * stat->leaves; seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); bytes += sizeof(struct leaf_info) * stat->prefixes; @@ -2131,25 +2030,38 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct rt_trie_node *) * pointers; + bytes += sizeof(struct tnode *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } #ifdef CONFIG_IP_FIB_TRIE_STATS static void trie_show_usage(struct seq_file *seq, - const struct trie_use_stats *stats) + const struct trie_use_stats __percpu *stats) { + struct trie_use_stats s = { 0 }; + int cpu; + + /* loop through all of the CPUs and gather up the stats */ + for_each_possible_cpu(cpu) { + const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); + + s.gets += pcpu->gets; + s.backtrack += pcpu->backtrack; + s.semantic_match_passed += pcpu->semantic_match_passed; + s.semantic_match_miss += pcpu->semantic_match_miss; + s.null_node_hit += pcpu->null_node_hit; + s.resize_node_skipped += pcpu->resize_node_skipped; + } + seq_printf(seq, "\nCounters:\n---------\n"); - seq_printf(seq, "gets = %u\n", stats->gets); - seq_printf(seq, "backtracks = %u\n", stats->backtrack); + seq_printf(seq, "gets = %u\n", s.gets); + seq_printf(seq, "backtracks = %u\n", s.backtrack); seq_printf(seq, "semantic match passed = %u\n", - stats->semantic_match_passed); - seq_printf(seq, "semantic match miss = %u\n", - stats->semantic_match_miss); - seq_printf(seq, "null node hit= %u\n", stats->null_node_hit); - seq_printf(seq, "skipped node resize = %u\n\n", - stats->resize_node_skipped); + s.semantic_match_passed); + seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); + seq_printf(seq, "null node hit= %u\n", s.null_node_hit); + seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); } #endif /* CONFIG_IP_FIB_TRIE_STATS */ @@ -2172,7 +2084,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Basic info: size of leaf:" " %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct leaf), sizeof(struct tnode)); + sizeof(struct tnode), sizeof(struct tnode)); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; @@ -2190,7 +2102,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) trie_collect_stats(t, &stat); trie_show_stats(seq, &stat); #ifdef CONFIG_IP_FIB_TRIE_STATS - trie_show_usage(seq, &t->stats); + trie_show_usage(seq, t->stats); #endif } } @@ -2211,7 +2123,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2223,7 +2135,7 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - struct rt_trie_node *n; + struct tnode *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2252,7 +2164,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct rt_trie_node *n; + struct tnode *n; ++*pos; /* next node in same table */ @@ -2338,29 +2250,26 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct rt_trie_node *n = v; + struct tnode *n = v; if (!node_parent_rcu(n)) fib_table_print(seq, iter->tb); if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *) n; - __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); + __be32 prf = htonl(n->key); seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- %pI4/%d %d %d %d\n", - &prf, tn->pos, tn->bits, tn->full_children, - tn->empty_children); - + seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", + &prf, KEYLENGTH - n->pos - n->bits, n->bits, + n->full_children, n->empty_children); } else { - struct leaf *l = (struct leaf *) n; struct leaf_info *li; - __be32 val = htonl(l->key); + __be32 val = htonl(n->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &n->list, hlist) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2410,9 +2319,9 @@ struct fib_route_iter { t_key key; }; -static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) +static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) { - struct leaf *l = NULL; + struct tnode *l = NULL; struct trie *t = iter->main_trie; /* use cache location of last found key */ @@ -2457,7 +2366,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; - struct leaf *l = v; + struct tnode *l = v; ++*pos; if (v == SEQ_START_TOKEN) { @@ -2503,7 +2412,7 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info */ static int fib_route_seq_show(struct seq_file *seq, void *v) { - struct leaf *l = v; + struct tnode *l = v; struct leaf_info *li; if (v == SEQ_START_TOKEN) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index b986298a7ba3..92ddea1e6457 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -70,7 +70,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; if (skb->remcsum_offload) { /* Already processed in GRO path */ @@ -82,14 +81,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) - __skb_checksum_complete(skb); - - delta = remcsum_adjust((void *)guehdr + hdrlen, - skb->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); + skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); return guehdr; } @@ -174,7 +166,8 @@ drop: } static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb) + struct sk_buff *skb, + struct udp_offload *uoff) { const struct net_offload *ops; struct sk_buff **pp = NULL; @@ -195,7 +188,8 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff) +static int fou_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) { const struct net_offload *ops; u8 proto = NAPI_GRO_CB(skb)->proto; @@ -226,7 +220,6 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; if (skb->remcsum_offload) return guehdr; @@ -241,12 +234,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return NULL; } - delta = remcsum_adjust((void *)guehdr + hdrlen, - NAPI_GRO_CB(skb)->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); - NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); skb->remcsum_offload = 1; @@ -254,7 +242,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, } static struct sk_buff **gue_gro_receive(struct sk_buff **head, - struct sk_buff *skb) + struct sk_buff *skb, + struct udp_offload *uoff) { const struct net_offload **offloads; const struct net_offload *ops; @@ -360,7 +349,8 @@ out: return pp; } -static int gue_gro_complete(struct sk_buff *skb, int nhoff) +static int gue_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) { const struct net_offload **offloads; struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); @@ -490,7 +480,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_user_data = fou; fou->sock = sock; - udp_set_convert_csum(sk, true); + inet_inc_convert_csum(sk); sk->sk_allocation = GFP_ATOMIC; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index a457232f0131..5a4828ba05ad 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -17,7 +17,7 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/skbuff.h> -#include <linux/rculist.h> +#include <linux/list.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/ip.h> @@ -26,8 +26,8 @@ #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> -#include <linux/hash.h> #include <linux/ethtool.h> +#include <linux/mutex.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/ip.h> @@ -50,38 +50,30 @@ #include <net/ip6_checksum.h> #endif -#define PORT_HASH_BITS 8 -#define PORT_HASH_SIZE (1<<PORT_HASH_BITS) +/* Protects sock_list and refcounts. */ +static DEFINE_MUTEX(geneve_mutex); /* per-network namespace private data for this module */ struct geneve_net { - struct hlist_head sock_list[PORT_HASH_SIZE]; - spinlock_t sock_lock; /* Protects sock_list */ + struct list_head sock_list; }; static int geneve_net_id; -static struct workqueue_struct *geneve_wq; - static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); } -static struct hlist_head *gs_head(struct net *net, __be16 port) +static struct geneve_sock *geneve_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct geneve_net *gn = net_generic(net, geneve_net_id); - - return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; -} - -/* Find geneve socket based on network namespace and UDP port */ -static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) -{ struct geneve_sock *gs; - hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) { - if (inet_sk(gs->sock->sk)->inet_sport == port) + list_for_each_entry(gs, &gn->sock_list, list) { + if (inet_sk(gs->sock->sk)->inet_sport == port && + inet_sk(gs->sock->sk)->sk.sk_family == family) return gs; } @@ -115,21 +107,25 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool xnet) + bool csum, bool xnet) { struct genevehdr *gnvh; int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); + skb = udp_tunnel_handle_offloads(skb, csum); + if (IS_ERR(skb)) + return PTR_ERR(skb); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) + if (unlikely(err)) { + kfree_skb(skb); return err; + } skb = vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) @@ -140,11 +136,107 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, - tos, ttl, df, src_port, dst_port, xnet); + return udp_tunnel_xmit_skb(rt, skb, src, dst, + tos, ttl, df, src_port, dst_port, xnet, + !csum); } EXPORT_SYMBOL_GPL(geneve_xmit_skb); +static int geneve_hlen(struct genevehdr *gh) +{ + return sizeof(*gh) + gh->opt_len * 4; +} + +static struct sk_buff **geneve_gro_receive(struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff) +{ + struct sk_buff *p, **pp = NULL; + struct genevehdr *gh, *gh2; + unsigned int hlen, gh_len, off_gnv; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_gnv = skb_gro_offset(skb); + hlen = off_gnv + sizeof(*gh); + gh = skb_gro_header_fast(skb, off_gnv); + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + if (gh->ver != GENEVE_VER || gh->oam) + goto out; + gh_len = geneve_hlen(gh); + + hlen = off_gnv + gh_len; + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + gh2 = (struct genevehdr *)(p->data + off_gnv); + if (gh->opt_len != gh2->opt_len || + memcmp(gh, gh2, gh_len)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, gh_len); + skb_gro_postpull_rcsum(skb, gh, gh_len); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int geneve_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) +{ + struct genevehdr *gh; + struct packet_offload *ptype; + __be16 type; + int gh_len; + int err = -ENOSYS; + + udp_tunnel_gro_complete(skb, nhoff); + + gh = (struct genevehdr *)(skb->data + nhoff); + gh_len = geneve_hlen(gh); + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); + + rcu_read_unlock(); + return err; +} + static void geneve_notify_add_rx_port(struct geneve_sock *gs) { struct sock *sk = gs->sock->sk; @@ -159,6 +251,15 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) } } +static void geneve_notify_del_rx_port(struct geneve_sock *gs) +{ + struct sock *sk = gs->sock->sk; + sa_family_t sa_family = sk->sk_family; + + if (sa_family == AF_INET) + udp_del_offload(&gs->udp_offloads); +} + /* Callback from net/ipv4/udp.c to receive packets */ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { @@ -201,15 +302,6 @@ error: return 1; } -static void geneve_del_work(struct work_struct *work) -{ - struct geneve_sock *gs = container_of(work, struct geneve_sock, - del_work); - - udp_tunnel_sock_release(gs->sock); - kfree_rcu(gs, rcu); -} - static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port) { @@ -250,8 +342,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, if (!gs) return ERR_PTR(-ENOMEM); - INIT_WORK(&gs->del_work, geneve_del_work); - sock = geneve_create_sock(net, ipv6, port); if (IS_ERR(sock)) { kfree(gs); @@ -259,19 +349,15 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, } gs->sock = sock; - atomic_set(&gs->refcnt, 1); + gs->refcnt = 1; gs->rcv = rcv; gs->rcv_data = data; /* Initialize the geneve udp offloads structure */ gs->udp_offloads.port = port; - gs->udp_offloads.callbacks.gro_receive = NULL; - gs->udp_offloads.callbacks.gro_complete = NULL; - - spin_lock(&gn->sock_lock); - hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); + gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; + gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; geneve_notify_add_rx_port(gs); - spin_unlock(&gn->sock_lock); /* Mark socket as an encapsulation socket */ tunnel_cfg.sk_user_data = gs; @@ -280,6 +366,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); + list_add(&gs->list, &gn->sock_list); + return gs; } @@ -289,52 +377,52 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, { struct geneve_sock *gs; - gs = geneve_socket_create(net, port, rcv, data, ipv6); - if (!IS_ERR(gs)) - return gs; - - if (no_share) /* Return error if sharing is not allowed. */ - return ERR_PTR(-EINVAL); + mutex_lock(&geneve_mutex); - gs = geneve_find_sock(net, port); + gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); if (gs) { - if (gs->rcv == rcv) - atomic_inc(&gs->refcnt); + if (!no_share && gs->rcv == rcv) + gs->refcnt++; else gs = ERR_PTR(-EBUSY); } else { - gs = ERR_PTR(-EINVAL); + gs = geneve_socket_create(net, port, rcv, data, ipv6); } + mutex_unlock(&geneve_mutex); + return gs; } EXPORT_SYMBOL_GPL(geneve_sock_add); void geneve_sock_release(struct geneve_sock *gs) { - if (!atomic_dec_and_test(&gs->refcnt)) - return; + mutex_lock(&geneve_mutex); - queue_work(geneve_wq, &gs->del_work); + if (--gs->refcnt) + goto unlock; + + list_del(&gs->list); + geneve_notify_del_rx_port(gs); + udp_tunnel_sock_release(gs->sock); + kfree_rcu(gs, rcu); + +unlock: + mutex_unlock(&geneve_mutex); } EXPORT_SYMBOL_GPL(geneve_sock_release); static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); - unsigned int h; - spin_lock_init(&gn->sock_lock); - - for (h = 0; h < PORT_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&gn->sock_list[h]); + INIT_LIST_HEAD(&gn->sock_list); return 0; } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit = NULL, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; @@ -343,10 +431,6 @@ static int __init geneve_init_module(void) { int rc; - geneve_wq = alloc_workqueue("geneve", 0, 0); - if (!geneve_wq) - return -ENOMEM; - rc = register_pernet_subsys(&geneve_net_ops); if (rc) return rc; @@ -355,11 +439,10 @@ static int __init geneve_init_module(void) return 0; } -late_initcall(geneve_init_module); +module_init(geneve_init_module); static void __exit geneve_cleanup_module(void) { - destroy_workqueue(geneve_wq); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 36f5584d93c5..5e564014a0b7 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -205,7 +205,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; */ static struct sock *icmp_sk(struct net *net) { - return net->ipv4.icmp_sk[smp_processor_id()]; + return *this_cpu_ptr(net->ipv4.icmp_sk); } static inline struct sock *icmp_xmit_lock(struct net *net) @@ -1140,8 +1140,8 @@ static void __net_exit icmp_sk_exit(struct net *net) int i; for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); net->ipv4.icmp_sk = NULL; } @@ -1149,9 +1149,8 @@ static int __net_init icmp_sk_init(struct net *net) { int i, err; - net->ipv4.icmp_sk = - kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv4.icmp_sk == NULL) + net->ipv4.icmp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { @@ -1162,7 +1161,7 @@ static int __net_init icmp_sk_init(struct net *net) if (err < 0) goto fail; - net->ipv4.icmp_sk[i] = sk; + *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff/skb_shared_info struct overhead. @@ -1203,8 +1202,8 @@ static int __net_init icmp_sk_init(struct net *net) fail: for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); return err; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e34dccbc4d70..81751f12645f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -203,7 +203,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, icsk->icsk_ca_ops->get_info(sk, ext, skb); out: - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); @@ -271,7 +272,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, @@ -758,7 +760,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 3a83ce5efa80..787b3c294ce6 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -129,7 +129,8 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) + if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr && + !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ac8491245e5b..6e7727f27393 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -252,10 +252,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; - if (dev->header_ops) { /* Need space for new headers */ if (skb_cow_head(skb, dev->needed_headroom - @@ -268,6 +264,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, * to gre header. */ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + skb_reset_mac_header(skb); } else { if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; @@ -275,6 +272,10 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, tnl_params = &tunnel->parms.iph; } + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); + if (IS_ERR(skb)) + goto out; + __gre_xmit(skb, dev, tnl_params, skb->protocol); return NETDEV_TX_OK; @@ -672,6 +673,7 @@ static bool ipgre_netlink_encap_parms(struct nlattr *data[], static int gre_tap_init(struct net_device *dev) { __gre_tunnel_init(dev); + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; return ip_tunnel_init(dev); } @@ -827,6 +829,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { @@ -841,6 +844,7 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static int __net_init ipgre_tap_init_net(struct net *net) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8a89c738b7a3..31d8c71986b4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -37,6 +37,7 @@ #include <net/route.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/checksum.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/transp_v6.h> #endif @@ -45,14 +46,6 @@ #include <linux/errqueue.h> #include <asm/uaccess.h> -#define IP_CMSG_PKTINFO 1 -#define IP_CMSG_TTL 2 -#define IP_CMSG_TOS 4 -#define IP_CMSG_RECVOPTS 8 -#define IP_CMSG_RETOPTS 16 -#define IP_CMSG_PASSSEC 32 -#define IP_CMSG_ORIGDSTADDR 64 - /* * SOL_IP control messages. */ @@ -104,6 +97,20 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, + int offset) +{ + __wsum csum = skb->csum; + + if (skb->ip_summed != CHECKSUM_COMPLETE) + return; + + if (offset != 0) + csum = csum_sub(csum, csum_partial(skb->data, offset, 0)); + + put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); +} + static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; @@ -144,47 +151,73 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); } -void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) +void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, + int offset) { struct inet_sock *inet = inet_sk(skb->sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ - if (flags & 1) + if (flags & IP_CMSG_PKTINFO) { ip_cmsg_recv_pktinfo(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_PKTINFO; + if (!flags) + return; + } + + if (flags & IP_CMSG_TTL) { ip_cmsg_recv_ttl(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_TTL; + if (!flags) + return; + } + + if (flags & IP_CMSG_TOS) { ip_cmsg_recv_tos(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_TOS; + if (!flags) + return; + } + + if (flags & IP_CMSG_RECVOPTS) { ip_cmsg_recv_opts(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_RECVOPTS; + if (!flags) + return; + } + + if (flags & IP_CMSG_RETOPTS) { ip_cmsg_recv_retopts(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_RETOPTS; + if (!flags) + return; + } + + if (flags & IP_CMSG_PASSSEC) { ip_cmsg_recv_security(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_PASSSEC; + if (!flags) + return; + } + + if (flags & IP_CMSG_ORIGDSTADDR) { ip_cmsg_recv_dstaddr(msg, skb); + flags &= ~IP_CMSG_ORIGDSTADDR; + if (!flags) + return; + } + + if (flags & IP_CMSG_CHECKSUM) + ip_cmsg_recv_checksum(msg, skb, offset); } -EXPORT_SYMBOL(ip_cmsg_recv); +EXPORT_SYMBOL(ip_cmsg_recv_offset); int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6) @@ -450,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin) { + if (sin && skb->len) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); @@ -461,17 +494,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; - sin->sin_family = AF_UNSPEC; - - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || - ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { - struct inet_sock *inet = inet_sk(sk); + memset(sin, 0, sizeof(*sin)); + if (skb->len && + (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - sin->sin_port = 0; - memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - if (inet->cmsg_flags) + if (inet_sk(sk)->cmsg_flags) ip_cmsg_recv(msg, skb); } @@ -522,6 +552,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_ALL: case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: + case IP_CHECKSUM: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -619,6 +650,19 @@ static int do_ip_setsockopt(struct sock *sk, int level, else inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; break; + case IP_CHECKSUM: + if (val) { + if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { + inet_inc_convert_csum(sk); + inet->cmsg_flags |= IP_CMSG_CHECKSUM; + } + } else { + if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { + inet_dec_convert_csum(sk); + inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; + } + } + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -1222,6 +1266,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_RECVORIGDSTADDR: val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; break; + case IP_CHECKSUM: + val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; + break; case IP_TOS: val = inet->tos; break; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 63e745aadab6..2cd08280c77b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -514,6 +514,9 @@ const struct ip_tunnel_encap_ops __rcu * int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, unsigned int num) { + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + return !cmpxchg((const struct ip_tunnel_encap_ops **) &iptun_encaps[num], NULL, ops) ? 0 : -1; @@ -525,6 +528,9 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, { int ret; + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + ret = (cmpxchg((const struct ip_tunnel_encap_ops **) &iptun_encaps[num], ops, NULL) == ops) ? 0 : -1; @@ -567,6 +573,9 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, if (t->encap.type == TUNNEL_ENCAP_NONE) return 0; + if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + rcu_read_lock(); ops = rcu_dereference(iptun_encaps[t->encap.type]); if (likely(ops && ops->build_header)) @@ -963,6 +972,14 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL_GPL(ip_tunnel_dellink); +struct net *ip_tunnel_get_link_net(const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + return tunnel->net; +} +EXPORT_SYMBOL(ip_tunnel_get_link_net); + int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1a7e979e80ba..94efe148181c 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -531,6 +531,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = vti_get_size, .fill_info = vti_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static int __init vti_init(void) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 7fa18bc7e47f..b26376ef87f6 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -209,9 +209,9 @@ static int __init ic_open_devs(void) last = &ic_first_dev; rtnl_lock(); - /* bring loopback device up first */ + /* bring loopback and DSA master network devices up first */ for_each_netdev(&init_net, dev) { - if (!(dev->flags & IFF_LOOPBACK)) + if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev)) continue; if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) pr_err("IP-Config: Failed to open %s\n", dev->name); @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev) { + if (dev != ic_dev && !netdev_uses_dsa(dev)) { DBG(("IP-Config: Downing %s\n", dev->name)); dev_change_flags(dev, d->flags); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 40403114f00a..b58d6689874c 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -498,6 +498,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipip_get_size, .fill_info = ipip_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel ipip_handler __read_mostly = { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c8034587859d..9d78427652d2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2290,7 +2290,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (err < 0 && err != -ENOENT) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index ff2d23d8c87a..6ecfce63201a 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -27,10 +27,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { - mr.range[0].min.all = (__force __be16) - data[priv->sreg_proto_min].data[0]; - mr.range[0].max.all = (__force __be16) - data[priv->sreg_proto_max].data[0]; + mr.range[0].min.all = + *(__be16 *)&data[priv->sreg_proto_min].data[0]; + mr.range[0].max.all = + *(__be16 *)&data[priv->sreg_proto_max].data[0]; mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index c0d82f78d364..2a3720fb5a5f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -966,8 +966,11 @@ bool ping_rcv(struct sk_buff *skb) sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + pr_debug("rcv on socket %p\n", sk); - ping_queue_rcv_skb(sk, skb_get(skb)); + if (skb2) + ping_queue_rcv_skb(sk, skb2); sock_put(sk); return true; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a2155b02602..0c63b2abd873 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1325,14 +1325,22 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) return ret; } -static DEFINE_SPINLOCK(rt_uncached_lock); -static LIST_HEAD(rt_uncached_list); +struct uncached_list { + spinlock_t lock; + struct list_head head; +}; + +static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); static void rt_add_uncached_list(struct rtable *rt) { - spin_lock_bh(&rt_uncached_lock); - list_add_tail(&rt->rt_uncached, &rt_uncached_list); - spin_unlock_bh(&rt_uncached_lock); + struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); + + rt->rt_uncached_list = ul; + + spin_lock_bh(&ul->lock); + list_add_tail(&rt->rt_uncached, &ul->head); + spin_unlock_bh(&ul->lock); } static void ipv4_dst_destroy(struct dst_entry *dst) @@ -1340,27 +1348,32 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; if (!list_empty(&rt->rt_uncached)) { - spin_lock_bh(&rt_uncached_lock); + struct uncached_list *ul = rt->rt_uncached_list; + + spin_lock_bh(&ul->lock); list_del(&rt->rt_uncached); - spin_unlock_bh(&rt_uncached_lock); + spin_unlock_bh(&ul->lock); } } void rt_flush_dev(struct net_device *dev) { - if (!list_empty(&rt_uncached_list)) { - struct net *net = dev_net(dev); - struct rtable *rt; + struct net *net = dev_net(dev); + struct rtable *rt; + int cpu; - spin_lock_bh(&rt_uncached_lock); - list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + for_each_possible_cpu(cpu) { + struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); + + spin_lock_bh(&ul->lock); + list_for_each_entry(rt, &ul->head, rt_uncached) { if (rt->dst.dev != dev) continue; rt->dst.dev = net->loopback_dev; dev_hold(rt->dst.dev); dev_put(dev); } - spin_unlock_bh(&rt_uncached_lock); + spin_unlock_bh(&ul->lock); } } @@ -1554,11 +1567,10 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && + skb->protocol == htons(ETH_P_IP) && (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { - flags |= RTCF_DOREDIRECT; - do_cache = false; - } + inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) + IPCB(skb)->flags |= IPSKB_DOREDIRECT; if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -2303,6 +2315,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; + if (IPCB(skb)->flags & IPSKB_DOREDIRECT) + r->rtm_flags |= RTCF_DOREDIRECT; if (nla_put_be32(skb, RTA_DST, dst)) goto nla_put_failure; @@ -2377,7 +2391,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2469,7 +2484,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) err = rt_fill_info(net, dst, src, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (err <= 0) + if (err < 0) goto errout_free; err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); @@ -2717,6 +2732,7 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; int __init ip_rt_init(void) { int rc = 0; + int cpu; ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); if (!ip_idents) @@ -2724,6 +2740,12 @@ int __init ip_rt_init(void) prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + for_each_possible_cpu(cpu) { + struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); + + INIT_LIST_HEAD(&ul->head); + spin_lock_init(&ul->lock); + } #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 27ead0dd16bc..63c29dba68a8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> +#include <linux/jhash.h> #include <net/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); @@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) return NULL; } +/* Must be called with rcu lock held */ +static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) +{ + const struct tcp_congestion_ops *ca = tcp_ca_find(name); +#ifdef CONFIG_MODULES + if (!ca && capable(CAP_NET_ADMIN)) { + rcu_read_unlock(); + request_module("tcp_%s", name); + rcu_read_lock(); + ca = tcp_ca_find(name); + } +#endif + return ca; +} + +/* Simple linear search, not much in here. */ +struct tcp_congestion_ops *tcp_ca_find_key(u32 key) +{ + struct tcp_congestion_ops *e; + + list_for_each_entry_rcu(e, &tcp_cong_list, list) { + if (e->key == key) + return e; + } + + return NULL; +} + /* * Attach new congestion control algorithm to the list * of available options. @@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) return -EINVAL; } + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + spin_lock(&tcp_cong_list_lock); - if (tcp_ca_find(ca->name)) { - pr_notice("%s already registered\n", ca->name); + if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { + pr_notice("%s already registered or non-unique key\n", + ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); @@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); + + /* Wait for outstanding readers to complete before the + * module gets removed entirely. + * + * A try_module_get() should fail by now as our module is + * in "going" state since no refs are held anymore and + * module_exit() handler being called. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +u32 tcp_ca_get_key_by_name(const char *name) +{ + const struct tcp_congestion_ops *ca; + u32 key; + + might_sleep(); + + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + key = ca ? ca->key : TCP_CA_UNSPEC; + rcu_read_unlock(); + + return key; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name); + +char *tcp_ca_get_name_by_key(u32 key, char *buffer) +{ + const struct tcp_congestion_ops *ca; + char *ret = NULL; + + rcu_read_lock(); + ca = tcp_ca_find_key(key); + if (ca) + ret = strncpy(buffer, ca->name, + TCP_CA_NAME_MAX); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); + /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { @@ -107,6 +180,18 @@ void tcp_init_congestion_control(struct sock *sk) icsk->icsk_ca_ops->init(sk); } +static void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + + if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); +} + /* Manage refcounts on socket close. */ void tcp_cleanup_congestion_control(struct sock *sk) { @@ -241,42 +326,26 @@ out: int tcp_set_congestion_control(struct sock *sk, const char *name) { struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_congestion_ops *ca; + const struct tcp_congestion_ops *ca; int err = 0; - rcu_read_lock(); - ca = tcp_ca_find(name); + if (icsk->icsk_ca_dst_locked) + return -EPERM; - /* no change asking for existing value */ + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) goto out; - -#ifdef CONFIG_MODULES - /* not found attempt to autoload module */ - if (!ca && capable(CAP_NET_ADMIN)) { - rcu_read_unlock(); - request_module("tcp_%s", name); - rcu_read_lock(); - ca = tcp_ca_find(name); - } -#endif if (!ca) err = -ENOENT; - else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) err = -EPERM; - else if (!try_module_get(ca->owner)) err = -EBUSY; - - else { - tcp_cleanup_congestion_control(sk); - icsk->icsk_ca_ops = ca; - - if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - } + else + tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075ab4d5af5e..d3dfff78fa19 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3183,8 +3183,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) - ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); + if (ca_ops->pkts_acked) { + long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); + ca_ops->pkts_acked(sk, pkts_acked, rtt_us); + } } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { @@ -3358,34 +3360,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } /* This routine deals with acks during a TLP episode. + * We mark the end of a TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { struct tcp_sock *tp = tcp_sk(sk); - bool is_tlp_dupack = (ack == tp->tlp_high_seq) && - !(flag & (FLAG_SND_UNA_ADVANCED | - FLAG_NOT_DUP | FLAG_DATA_SACKED)); - /* Mark the end of TLP episode on receiving TLP dupack or when - * ack is after tlp_high_seq. - */ - if (is_tlp_dupack) { - tp->tlp_high_seq = 0; + if (before(ack, tp->tlp_high_seq)) return; - } - if (after(ack, tp->tlp_high_seq)) { + if (flag & FLAG_DSACKING_ACK) { + /* This DSACK means original and TLP probe arrived; no loss */ + tp->tlp_high_seq = 0; + } else if (after(ack, tp->tlp_high_seq)) { + /* ACK advances: there was a loss, so reduce cwnd. Reset + * tlp_high_seq in tcp_init_cwnd_reduction() + */ + tcp_init_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_try_keep_open(sk); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } else if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED))) { + /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; - /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ - if (!(flag & FLAG_DSACKING_ACK)) { - tcp_init_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); - tcp_try_keep_open(sk); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSSPROBERECOVERY); - } } } @@ -5870,10 +5872,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) * TCP ECN negotiation. * * Exception: tcp_ca wants ECN. This is required for DCTCP - * congestion control; it requires setting ECT on all packets, - * including SYN. We inverse the test in this case: If our - * local socket wants ECN, but peer only set ece/cwr (but not - * ECT in IP header) its probably a non-DCTCP aware sender. + * congestion control: Linux DCTCP asserts ECT on all packets, + * including SYN, which is most optimal solution; however, + * others, such as FreeBSD do not. */ static void tcp_ecn_create_request(struct request_sock *req, const struct sk_buff *skb, @@ -5883,18 +5884,15 @@ static void tcp_ecn_create_request(struct request_sock *req, const struct tcphdr *th = tcp_hdr(skb); const struct net *net = sock_net(listen_sk); bool th_ecn = th->ece && th->cwr; - bool ect, need_ecn, ecn_ok; + bool ect, ecn_ok; if (!th_ecn) return; ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); - need_ecn = tcp_ca_needs_ecn(listen_sk); ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); - if (!ect && !need_ecn && ecn_ok) - inet_rsk(req)->ecn_ok = 1; - else if (ect && need_ecn) + if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) inet_rsk(req)->ecn_ok = 1; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a3f72d7fc06c..ad3e65bdd368 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1340,6 +1340,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index ed9c9a91851c..e5f41bd5ec1b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -886,7 +886,8 @@ static int tcp_metrics_dump_info(struct sk_buff *skb, if (tcp_metrics_fill_info(skb, tm) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d2680b65db..bc9216dc9de1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -399,6 +399,32 @@ static void tcp_ecn_openreq_child(struct tcp_sock *tp, tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } +void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + bool ca_got_dst = false; + + if (ca_key != TCP_CA_UNSPEC) { + const struct tcp_congestion_ops *ca; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + ca_got_dst = true; + } + rcu_read_unlock(); + } + + if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + tcp_assign_congestion_control(sk); + + tcp_set_ca_state(sk, TCP_CA_Open); +} +EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -451,10 +477,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - if (!try_module_get(newicsk->icsk_ca_ops->owner)) - tcp_assign_congestion_control(newsk); - - tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f18262e2326..20ab06b228ac 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2019,7 +2019,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; - if (tso_segs == 1) { + if (tso_segs == 1 || !max_segs) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) @@ -2032,7 +2032,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) + if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, @@ -2939,6 +2939,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } EXPORT_SYMBOL(tcp_make_synack); +static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_congestion_ops *ca; + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + + if (ca_key == TCP_CA_UNSPEC) + return; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + module_put(icsk->icsk_ca_ops->owner); + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + } + rcu_read_unlock(); +} + /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { @@ -2964,6 +2983,8 @@ static void tcp_connect_init(struct sock *sk) tcp_mtup_init(sk); tcp_sync_mss(sk, dst_mtu(dst)); + tcp_ca_dst_init(sk, dst); + if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric_advmss(dst); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 13b4dcf86ef6..97ef1f8b7be8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1329,7 +1329,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); err = copied; if (flags & MSG_TRUNC) @@ -1806,7 +1806,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, inet_compute_pseudo); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 7927db0a9279..4a000f1dd757 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -99,11 +99,13 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin s_slot = cb->args[0]; num = s_num = cb->args[1]; - for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { + for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { struct sock *sk; struct hlist_nulls_node *node; struct udp_hslot *hslot = &table->hash[slot]; + num = 0; + if (hlist_nulls_empty(&hslot->head)) continue; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d3e537ef6b7f..d10f6f4ead27 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -339,7 +339,8 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb); + pp = uo_priv->offload->callbacks.gro_receive(head, skb, + uo_priv->offload); out_unlock: rcu_read_unlock(); @@ -395,7 +396,9 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) if (uo_priv != NULL) { NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr)); + err = uo_priv->offload->callbacks.gro_complete(skb, + nhoff + sizeof(struct udphdr), + uo_priv->offload); } rcu_read_unlock(); diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 1671263e5fa0..c83b35485056 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, inet_sk(sk)->mc_loop = 0; /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ - udp_set_convert_csum(sk, true); + inet_inc_convert_csum(sk); rcu_assign_sk_user_data(sk, cfg->sk_user_data); @@ -75,10 +75,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, - __u8 tos, __u8 ttl, __be16 df, __be16 src_port, - __be16 dst_port, bool xnet) +int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) { struct udphdr *uh; @@ -90,9 +90,9 @@ int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, uh->source = src_port; uh->len = htons(skb->len); - udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len); + udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP, + return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f7c8bbeb27b7..8623118cb2bb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -201,6 +201,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -238,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; /* Check if a valid qdisc is available */ @@ -489,7 +491,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -619,7 +622,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) { + -1) < 0) { rcu_read_unlock(); goto done; } @@ -635,7 +638,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -646,7 +649,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -4047,7 +4050,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) goto error; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; error: nlmsg_cancel(skb, nlh); @@ -4076,7 +4080,8 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, @@ -4101,7 +4106,8 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } enum addr_type_t { @@ -4134,7 +4140,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } @@ -4151,7 +4157,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } break; @@ -4166,7 +4172,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } break; @@ -4209,7 +4215,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, goto cont; if (in6_dump_addrs(idev, skb, cb, type, - s_ip_idx, &ip_idx) <= 0) + s_ip_idx, &ip_idx) < 0) goto done; cont: idx++; @@ -4376,6 +4382,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; + array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; } static inline size_t inet6_ifla6_size(void) @@ -4638,7 +4645,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, goto nla_put_failure; nla_nest_end(skb, protoinfo); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -4670,7 +4678,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWLINK, NLM_F_MULTI) <= 0) + RTM_NEWLINK, NLM_F_MULTI) < 0) goto out; cont: idx++; @@ -4747,7 +4755,8 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, ci.valid_time = ntohl(pinfo->valid); if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -5253,6 +5262,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { + .procname = "accept_ra_mtu", + .data = &ipv6_devconf.accept_ra_mtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, @@ -5389,7 +5405,7 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -static struct rtnl_af_ops inet6_ops = { +static struct rtnl_af_ops inet6_ops __read_mostly = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index fd0dc47f471d..e43e79d0a612 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -490,7 +490,8 @@ static int ip6addrlbl_fill(struct sk_buff *skb, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } idx++; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 100c589a2a6c..c215be70cac0 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin) { + if (sin && skb->len) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -393,11 +393,9 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; - sin->sin6_family = AF_UNSPEC; - if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { + memset(sin, 0, sizeof(*sin)); + if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) { sin->sin6_family = AF_INET6; - sin->sin6_flowinfo = 0; - sin->sin6_port = 0; if (np->rxopt.all) { if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) @@ -412,12 +410,9 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) ipv6_iface_scope_id(&sin->sin6_addr, IP6CB(skb)->iif); } else { - struct inet_sock *inet = inet_sk(sk); - ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin->sin6_addr); - sin->sin6_scope_id = 0; - if (inet->cmsg_flags) + if (inet_sk(sk)->cmsg_flags) ip_cmsg_recv(msg, skb); } } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d674152b6ede..a5e95199585e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -427,7 +427,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Dest addr check */ - if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) { + if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { if (type != ICMPV6_PKT_TOOBIG && !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b2d1838897c9..263ef4143bff 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -277,7 +277,6 @@ static int fib6_dump_node(struct fib6_walker *w) w->leaf = rt; return 1; } - WARN_ON(res == 0); } w->leaf = NULL; return 0; @@ -630,33 +629,59 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } -static int fib6_commit_metrics(struct dst_entry *dst, - struct nlattr *mx, int mx_len) +static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) { - struct nlattr *nla; - int remaining; - u32 *mp; + int i; + + for (i = 0; i < RTAX_MAX; i++) { + if (test_bit(i, mxc->mx_valid)) + mp[i] = mxc->mx[i]; + } +} + +static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) +{ + if (!mxc->mx) + return 0; if (dst->flags & DST_HOST) { - mp = dst_metrics_write_ptr(dst); - } else { - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); - if (!mp) + u32 *mp = dst_metrics_write_ptr(dst); + + if (unlikely(!mp)) return -ENOMEM; - dst_init_metrics(dst, mp, 0); - } - nla_for_each_attr(nla, mx, mx_len, remaining) { - int type = nla_type(nla); + fib6_copy_metrics(mp, mxc); + } else { + dst_init_metrics(dst, mxc->mx, false); - if (type) { - if (type > RTAX_MAX) - return -EINVAL; + /* We've stolen mx now. */ + mxc->mx = NULL; + } - mp[type - 1] = nla_get_u32(nla); + return 0; +} + +static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, + struct net *net) +{ + if (atomic_read(&rt->rt6i_ref) != 1) { + /* This route is used as dummy address holder in some split + * nodes. It is not leaked, but it still holds other resources, + * which must be released in time. So, scan ascendant nodes + * and replace dummy references to this route with references + * to still alive ones. + */ + while (fn) { + if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { + fn->leaf = fib6_find_prefix(net, fn); + atomic_inc(&fn->leaf->rt6i_ref); + rt6_release(rt); + } + fn = fn->parent; } + /* No more references are possible at this point. */ + BUG_ON(atomic_read(&rt->rt6i_ref) != 1); } - return 0; } /* @@ -664,7 +689,7 @@ static int fib6_commit_metrics(struct dst_entry *dst, */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct nlattr *mx, int mx_len) + struct nl_info *info, struct mx6_config *mxc) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -773,11 +798,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; @@ -797,21 +821,22 @@ add: pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info); - rt6_release(iter); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } + fib6_purge_rt(iter, fn, info->nl_net); + rt6_release(iter); } return 0; @@ -838,8 +863,8 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, + struct nl_info *info, struct mx6_config *mxc) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -934,7 +959,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, } #endif - err = fib6_add_rt2node(fn, rt, info, mx, mx_len); + err = fib6_add_rt2node(fn, rt, info, mxc); if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) @@ -1322,24 +1347,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fn = fib6_repair_tree(net, fn); } - if (atomic_read(&rt->rt6i_ref) != 1) { - /* This route is used as dummy address holder in some split - * nodes. It is not leaked, but it still holds other resources, - * which must be released in time. So, scan ascendant nodes - * and replace dummy references to this route with references - * to still alive ones. - */ - while (fn) { - if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { - fn->leaf = fib6_find_prefix(net, fn); - atomic_inc(&fn->leaf->rt6i_ref); - rt6_release(rt); - } - fn = fn->parent; - } - /* No more references are possible at this point. */ - BUG_ON(atomic_read(&rt->rt6i_ref) != 1); - } + fib6_purge_rt(rt, fn, net); inet6_rt_notify(RTM_DELROUTE, rt, info); rt6_release(rt); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 13cda4c6313b..6dee2a8ca0a9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1662,6 +1662,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { @@ -1675,6 +1676,7 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { .changelink = ip6gre_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; /* diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ce69a12ae48c..1a036f35d833 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1041,6 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline int ip6_ufo_append_data(struct sock *sk, + struct sk_buff_head *queue, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, @@ -1056,7 +1057,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - skb = skb_peek_tail(&sk->sk_write_queue); + skb = skb_peek_tail(queue); if (skb == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, @@ -1079,7 +1080,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; } @@ -1135,99 +1136,106 @@ static void ip6_append_data_mtu(unsigned int *mtu, } } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, - int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) +static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, + struct inet6_cork *v6_cork, + int hlimit, int tclass, struct ipv6_txoptions *opt, + struct rt6_info *rt, struct flowi6 *fl6) { - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct inet_cork *cork; + unsigned int mtu; + + /* + * setup for corking + */ + if (opt) { + if (WARN_ON(v6_cork->opt)) + return -EINVAL; + + v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); + if (unlikely(v6_cork->opt == NULL)) + return -ENOBUFS; + + v6_cork->opt->tot_len = opt->tot_len; + v6_cork->opt->opt_flen = opt->opt_flen; + v6_cork->opt->opt_nflen = opt->opt_nflen; + + v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, + sk->sk_allocation); + if (opt->dst0opt && !v6_cork->opt->dst0opt) + return -ENOBUFS; + + v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, + sk->sk_allocation); + if (opt->dst1opt && !v6_cork->opt->dst1opt) + return -ENOBUFS; + + v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, + sk->sk_allocation); + if (opt->hopopt && !v6_cork->opt->hopopt) + return -ENOBUFS; + + v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, + sk->sk_allocation); + if (opt->srcrt && !v6_cork->opt->srcrt) + return -ENOBUFS; + + /* need source address above miyazawa*/ + } + dst_hold(&rt->dst); + cork->base.dst = &rt->dst; + cork->fl.u.ip6 = *fl6; + v6_cork->hop_limit = hlimit; + v6_cork->tclass = tclass; + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } + cork->base.fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->base.flags |= IPCORK_ALLFRAG; + cork->base.length = 0; + + return 0; +} + +static int __ip6_append_data(struct sock *sk, + struct flowi6 *fl6, + struct sk_buff_head *queue, + struct inet_cork *cork, + struct inet6_cork *v6_cork, + struct page_frag *pfrag, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + unsigned int flags, int dontfrag) +{ struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; - int exthdrlen; - int dst_exthdrlen; + int exthdrlen = 0; + int dst_exthdrlen = 0; int hh_len; int copy; int err; int offset = 0; __u8 tx_flags = 0; u32 tskey = 0; + struct rt6_info *rt = (struct rt6_info *)cork->dst; + struct ipv6_txoptions *opt = v6_cork->opt; + int csummode = CHECKSUM_NONE; - if (flags&MSG_PROBE) - return 0; - cork = &inet->cork.base; - if (skb_queue_empty(&sk->sk_write_queue)) { - /* - * setup for corking - */ - if (opt) { - if (WARN_ON(np->cork.opt)) - return -EINVAL; - - np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); - if (unlikely(np->cork.opt == NULL)) - return -ENOBUFS; - - np->cork.opt->tot_len = opt->tot_len; - np->cork.opt->opt_flen = opt->opt_flen; - np->cork.opt->opt_nflen = opt->opt_nflen; - - np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, - sk->sk_allocation); - if (opt->dst0opt && !np->cork.opt->dst0opt) - return -ENOBUFS; - - np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, - sk->sk_allocation); - if (opt->dst1opt && !np->cork.opt->dst1opt) - return -ENOBUFS; - - np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, - sk->sk_allocation); - if (opt->hopopt && !np->cork.opt->hopopt) - return -ENOBUFS; - - np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, - sk->sk_allocation); - if (opt->srcrt && !np->cork.opt->srcrt) - return -ENOBUFS; - - /* need source address above miyazawa*/ - } - dst_hold(&rt->dst); - cork->dst = &rt->dst; - inet->cork.fl.u.ip6 = *fl6; - np->cork.hop_limit = hlimit; - np->cork.tclass = tclass; - if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); - else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } - cork->fragsize = mtu; - if (dst_allfrag(rt->dst.path)) - cork->flags |= IPCORK_ALLFRAG; - cork->length = 0; - exthdrlen = (opt ? opt->opt_flen : 0); - length += exthdrlen; - transhdrlen += exthdrlen; + skb = skb_peek_tail(queue); + if (!skb) { + exthdrlen = opt ? opt->opt_flen : 0; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; - } else { - rt = (struct rt6_info *)cork->dst; - fl6 = &inet->cork.fl.u.ip6; - opt = np->cork.opt; - transhdrlen = 0; - exthdrlen = 0; - dst_exthdrlen = 0; - mtu = cork->fragsize; } + + mtu = cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1276,6 +1284,14 @@ emsgsize: tskey = sk->sk_tskey++; } + /* If this is the first and only packet and device + * supports checksum offloading, let's use it. + */ + if (!skb && + length + fragheaderlen < mtu && + rt->dst.dev->features & NETIF_F_V6_CSUM && + !exthdrlen) + csummode = CHECKSUM_PARTIAL; /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1292,13 +1308,12 @@ emsgsize: * --yoshfuji */ - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip6_ufo_append_data(sk, getfrag, from, length, + err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); if (err) @@ -1389,7 +1404,7 @@ alloc_new_skb: * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + @@ -1439,7 +1454,7 @@ alloc_new_skb: /* * Put the packet on the pending queue */ - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); continue; } @@ -1458,7 +1473,6 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - struct page_frag *pfrag = sk_page_frag(sk); err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) @@ -1501,43 +1515,81 @@ error: IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } + +int ip6_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, int hlimit, + int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, int dontfrag) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + int exthdrlen; + int err; + + if (flags&MSG_PROBE) + return 0; + if (skb_queue_empty(&sk->sk_write_queue)) { + /* + * setup for corking + */ + err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, + tclass, opt, rt, fl6); + if (err) + return err; + + exthdrlen = (opt ? opt->opt_flen : 0); + length += exthdrlen; + transhdrlen += exthdrlen; + } else { + fl6 = &inet->cork.fl.u.ip6; + transhdrlen = 0; + } + + return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, + &np->cork, sk_page_frag(sk), getfrag, + from, length, transhdrlen, flags, dontfrag); +} EXPORT_SYMBOL_GPL(ip6_append_data); -static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) +static void ip6_cork_release(struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { - if (np->cork.opt) { - kfree(np->cork.opt->dst0opt); - kfree(np->cork.opt->dst1opt); - kfree(np->cork.opt->hopopt); - kfree(np->cork.opt->srcrt); - kfree(np->cork.opt); - np->cork.opt = NULL; + if (v6_cork->opt) { + kfree(v6_cork->opt->dst0opt); + kfree(v6_cork->opt->dst1opt); + kfree(v6_cork->opt->hopopt); + kfree(v6_cork->opt->srcrt); + kfree(v6_cork->opt); + v6_cork->opt = NULL; } - if (inet->cork.base.dst) { - dst_release(inet->cork.base.dst); - inet->cork.base.dst = NULL; - inet->cork.base.flags &= ~IPCORK_ALLFRAG; + if (cork->base.dst) { + dst_release(cork->base.dst); + cork->base.dst = NULL; + cork->base.flags &= ~IPCORK_ALLFRAG; } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + memset(&cork->fl, 0, sizeof(cork->fl)); } -int ip6_push_pending_frames(struct sock *sk) +struct sk_buff *__ip6_make_skb(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; - struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct ipv6_txoptions *opt = v6_cork->opt; + struct rt6_info *rt = (struct rt6_info *)cork->base.dst; + struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; - int err = 0; - skb = __skb_dequeue(&sk->sk_write_queue); + skb = __skb_dequeue(queue); if (skb == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); @@ -1545,7 +1597,7 @@ int ip6_push_pending_frames(struct sock *sk) /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); - while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); @@ -1570,10 +1622,10 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, + ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, np->autoflowlabel)); - hdr->hop_limit = np->cork.hop_limit; + hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; @@ -1590,34 +1642,104 @@ int ip6_push_pending_frames(struct sock *sk) ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } + ip6_cork_release(cork, v6_cork); +out: + return skb; +} + +int ip6_send_skb(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + int err; + err = ip6_local_out(skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) - goto error; + IP6_INC_STATS(net, rt->rt6i_idev, + IPSTATS_MIB_OUTDISCARDS); } -out: - ip6_cork_release(inet, np); return err; -error: - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); - goto out; +} + +int ip6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + + skb = ip6_finish_skb(sk); + if (!skb) + return 0; + + return ip6_send_skb(skb); } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); -void ip6_flush_pending_frames(struct sock *sk) +static void __ip6_flush_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { + while ((skb = __skb_dequeue_tail(queue)) != NULL) { if (skb_dst(skb)) IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } - ip6_cork_release(inet_sk(sk), inet6_sk(sk)); + ip6_cork_release(cork, v6_cork); +} + +void ip6_flush_pending_frames(struct sock *sk) +{ + __ip6_flush_pending_frames(sk, &sk->sk_write_queue, + &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); + +struct sk_buff *ip6_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, + struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, + int dontfrag) +{ + struct inet_cork_full cork; + struct inet6_cork v6_cork; + struct sk_buff_head queue; + int exthdrlen = (opt ? opt->opt_flen : 0); + int err; + + if (flags & MSG_PROBE) + return NULL; + + __skb_queue_head_init(&queue); + + cork.base.flags = 0; + cork.base.addr = 0; + cork.base.opt = NULL; + v6_cork.opt = NULL; + err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); + if (err) + return ERR_PTR(err); + + if (dontfrag < 0) + dontfrag = inet6_sk(sk)->dontfrag; + + err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, + ¤t->task_frag, getfrag, from, + length + exthdrlen, transhdrlen + exthdrlen, + flags, dontfrag); + if (err) { + __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); + return ERR_PTR(err); + } + + return __ip6_make_skb(sk, &queue, &cork, &v6_cork); +} diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 92b3da571980..266a264ec212 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1760,6 +1760,14 @@ nla_put_failure: return -EMSGSIZE; } +struct net *ip6_tnl_get_link_net(const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + return tunnel->net; +} +EXPORT_SYMBOL(ip6_tnl_get_link_net); + static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, @@ -1783,6 +1791,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = { .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 8db6c98fe218..32d9b268e7d8 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -62,14 +62,14 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, - struct sk_buff *skb, struct net_device *dev, - struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port) +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, + __be16 dst_port, bool nocheck) { struct udphdr *uh; struct ipv6hdr *ip6h; - struct sock *sk = sock->sk; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); @@ -85,7 +85,7 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, | IPSKB_REROUTED); skb_dst_set(skb, dst); - udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len); + udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ace10d0b3aac..5fb9e212eca8 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1016,6 +1016,7 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .changelink = vti6_changelink, .get_size = vti6_get_size, .fill_info = vti6_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 722669754bbf..34b682617f50 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2388,7 +2388,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (err < 0 && err != -ENOENT) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 66980d8d98d1..8d766d9100cb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -996,13 +996,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, lock_sock(sk); skb = np->pktoptions; if (skb) - atomic_inc(&skb->users); - release_sock(sk); - - if (skb) { ip6_datagram_recv_ctl(sk, &msg, skb); - kfree_skb(skb); - } else { + release_sock(sk); + if (!skb) { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 682866777d53..8a9d7c19e247 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1348,7 +1348,7 @@ skip_routeinfo: } } - if (ndopts.nd_opts_mtu) { + if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) { __be32 n; u32 mtu; diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 2433a6bfb191..11820b6b3613 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -27,10 +27,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16) - data[priv->sreg_proto_min].data[0]; - range.max_proto.all = (__force __be16) - data[priv->sreg_proto_max].data[0]; + range.min_proto.all = + *(__be16 *)&data[priv->sreg_proto_min].data[0]; + range.max_proto.all = + *(__be16 *)&data[priv->sreg_proto_max].data[0]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c91083156edb..7622951e5fbb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -853,14 +853,14 @@ EXPORT_SYMBOL(rt6_lookup); */ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) + struct mx6_config *mxc) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); + err = fib6_add(&table->tb6_root, rt, info, mxc); write_unlock_bh(&table->tb6_lock); return err; @@ -868,10 +868,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, int ip6_ins_rt(struct rt6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; - return __ip6_ins_rt(rt, &info, NULL, 0); + struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; + struct mx6_config mxc = { .mx = NULL, }; + + return __ip6_ins_rt(rt, &info, &mxc); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -1160,12 +1160,9 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct net *net = dev_net(dst->dev); rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) { - u32 features = dst_metric(dst, RTAX_FEATURES); + if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(dst, RTAX_FEATURES, features); - } + dst_metric_set(dst, RTAX_MTU, mtu); rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } @@ -1245,12 +1242,16 @@ restart: rt = net->ipv6.ip6_null_entry; else if (rt->dst.error) { rt = net->ipv6.ip6_null_entry; - } else if (rt == net->ipv6.ip6_null_entry) { + goto out; + } + + if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } +out: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); @@ -1470,9 +1471,51 @@ out: return entries > rt_max_size; } -/* - * - */ +static int ip6_convert_metrics(struct mx6_config *mxc, + const struct fib6_config *cfg) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (cfg->fc_mx == NULL) + return 0; + + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (unlikely(!mp)) + return -ENOMEM; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + u32 val; + + if (unlikely(type > RTAX_MAX)) + goto err; + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err; + } else { + val = nla_get_u32(nla); + } + + mp[type - 1] = val; + __set_bit(type - 1, mxc->mx_valid); + } + } + + mxc->mx = mp; + + return 0; + err: + kfree(mp); + return -EINVAL; +} int ip6_route_add(struct fib6_config *cfg) { @@ -1482,6 +1525,7 @@ int ip6_route_add(struct fib6_config *cfg) struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; + struct mx6_config mxc = { .mx = NULL, }; int addr_type; if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) @@ -1677,8 +1721,14 @@ install_route: cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); + err = ip6_convert_metrics(&mxc, cfg); + if (err) + goto out; + + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); + kfree(mxc.mx); + return err; out: if (dev) dev_put(dev); @@ -2534,7 +2584,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_OIF */ + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ - + nla_total_size(sizeof(struct rta_cacheinfo)); + + nla_total_size(sizeof(struct rta_cacheinfo)) + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ } static int rt6_fill_node(struct net *net, @@ -2675,7 +2726,8 @@ static int rt6_fill_node(struct net *net, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 213546bd6d5d..3cc197c72b59 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1763,6 +1763,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, .dellink = ipip6_dellink, + .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel sit_handler __read_mostly = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5ff87805258e..5d46832c6f72 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1199,6 +1199,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && @@ -1387,6 +1389,28 @@ ipv6_pktoptions: return 0; } +static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, + const struct tcphdr *th) +{ + /* This is tricky: we move IP6CB at its correct location into + * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because + * _decode_session6() uses IP6CB(). + * barrier() makes sure compiler won't play aliasing games. + */ + memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), + sizeof(struct inet6_skb_parm)); + barrier(); + + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + skb->len - th->doff*4); + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_tw_isn = 0; + TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); + TCP_SKB_CB(skb)->sacked = 0; +} + static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; @@ -1418,24 +1442,9 @@ static int tcp_v6_rcv(struct sk_buff *skb) th = tcp_hdr(skb); hdr = ipv6_hdr(skb); - /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() - * barrier() makes sure compiler wont play fool^Waliasing games. - */ - memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), - sizeof(struct inet6_skb_parm)); - barrier(); - - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff*4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; - TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); - TCP_SKB_CB(skb)->sacked = 0; sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, - tcp_v6_iif(skb)); + inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1451,6 +1460,8 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + tcp_v6_fill_cb(skb, hdr, th); + #ifdef CONFIG_TCP_MD5SIG if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; @@ -1482,6 +1493,8 @@ no_tcp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; + tcp_v6_fill_cb(skb, hdr, th); + if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { csum_error: TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); @@ -1505,6 +1518,8 @@ do_time_wait: goto discard_it; } + tcp_v6_fill_cb(skb, hdr, th); + if (skb->len < (th->doff<<2)) { inet_twsk_put(inet_twsk(sk)); goto bad_packet; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 189dc4ae3eca..d048d46779fc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -909,7 +909,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; } - if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, ip6_compute_pseudo); @@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, { unsigned int offset; struct udphdr *uh = udp_hdr(skb); + struct sk_buff *frags = skb_shinfo(skb)->frag_list; __wsum csum = 0; - if (skb_queue_len(&sk->sk_write_queue) == 1) { + if (!frags) { /* Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); @@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } + do { + csum = csum_add(csum, frags->csum); + } while ((frags = frags->next)); uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, csum); @@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * Sending */ -static int udp_v6_push_pending_frames(struct sock *sk) +static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) { - struct sk_buff *skb; + struct sock *sk = skb->sk; struct udphdr *uh; - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; - - if (up->pending == AF_INET) - return udp_push_pending_frames(sk); - - fl6 = &inet->cork.fl.u.ip6; - - /* Grab the skbuff where UDP header space exists. */ - skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) - goto out; + int offset = skb_transport_offset(skb); + int len = skb->len - offset; /* * Create a UDP header @@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk) uh = udp_hdr(skb); uh->source = fl6->fl6_sport; uh->dest = fl6->fl6_dport; - uh->len = htons(up->len); + uh->len = htons(len); uh->check = 0; if (is_udplite) - csum = udplite_csum_outgoing(sk, skb); - else if (up->no_check6_tx) { /* UDP csum disabled */ + csum = udplite_csum(skb); + else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, - up->len); + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; } else - csum = udp_csum_outgoing(sk, skb); + csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, - up->len, fl6->flowi6_proto, csum); + len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: - err = ip6_push_pending_frames(sk); + err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { UDP6_INC_STATS_USER(sock_net(sk), @@ -1082,6 +1071,30 @@ send: } else UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +static int udp_v6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + struct udp_sock *up = udp_sk(sk); + struct flowi6 fl6; + int err = 0; + + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + /* ip6_finish_skb will release the cork, so make a copy of + * fl6 here. + */ + fl6 = inet_sk(sk)->cork.fl.u.ip6; + + skb = ip6_finish_skb(sk); + if (!skb) + goto out; + + err = udp_v6_send_skb(skb, &fl6); + out: up->len = 0; up->pending = 0; @@ -1164,6 +1177,7 @@ do_udp_sendmsg: if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { /* * There are pending frames. @@ -1294,6 +1308,20 @@ do_udp_sendmsg: goto do_confirm; back_from_confirm: + /* Lockless fast path for the non-corking case */ + if (!corkreq) { + struct sk_buff *skb; + + skb = ip6_make_skb(sk, getfrag, msg, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, + &fl6, (struct rt6_info *)dst, + msg->msg_flags, dontfrag); + err = PTR_ERR(skb); + if (!IS_ERR_OR_NULL(skb)) + err = udp_v6_send_skb(skb, &fl6); + goto release_dst; + } + lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ @@ -1311,7 +1339,6 @@ do_append_data: if (dontfrag < 0) dontfrag = np->dontfrag; up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, @@ -1323,6 +1350,11 @@ do_append_data: else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; + if (err > 0) + err = np->recverr ? net_xmit_errno(err) : 0; + release_sock(sk); + +release_dst: if (dst) { if (connected) { ip6_dst_store(sk, dst, @@ -1339,9 +1371,6 @@ do_append_data: dst = NULL; } - if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; - release_sock(sk); out: dst_release(dst); fl6_sock_release(flowlabel); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 5f983644373a..48bf5a06847b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -130,12 +130,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { struct flowi6 *fl6 = &fl->u.ip6; int onlyproto = 0; - u16 offset = skb_network_header_len(skb); const struct ipv6hdr *hdr = ipv6_hdr(skb); + u16 offset = sizeof(*hdr); struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); - u8 nexthdr = nh[IP6CB(skb)->nhoff]; + u16 nhoff = IP6CB(skb)->nhoff; int oif = 0; + u8 nexthdr; + + if (!nhoff) + nhoff = offsetof(struct ipv6hdr, nexthdr); + + nexthdr = nh[nhoff]; if (skb_dst(skb)) oif = skb_dst(skb)->dev->ifindex; diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 7f2cafddfb6e..1cde711bcab5 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -533,7 +533,7 @@ void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery) info.discovery = discovery; /* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */ - self->slot_timeout = sysctl_slot_timeout * HZ / 1000; + self->slot_timeout = msecs_to_jiffies(sysctl_slot_timeout); irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info); } @@ -1015,13 +1015,15 @@ void irlap_apply_connection_parameters(struct irlap_cb *self, int now) * Or, this is how much we can keep the pf bit in primary mode. * Therefore, it must be lower or equal than our *OWN* max turn around. * Jean II */ - self->poll_timeout = self->qos_tx.max_turn_time.value * HZ / 1000; + self->poll_timeout = msecs_to_jiffies( + self->qos_tx.max_turn_time.value); /* The Final timeout applies only to the primary station. * It defines the maximum time the primary wait (mostly in RECV mode) * for an answer from the secondary station before polling it again. * Therefore, it must be greater or equal than our *PARTNER* * max turn around time - Jean II */ - self->final_timeout = self->qos_rx.max_turn_time.value * HZ / 1000; + self->final_timeout = msecs_to_jiffies( + self->qos_rx.max_turn_time.value); /* The Watchdog Bit timeout applies only to the secondary station. * It defines the maximum time the secondary wait (mostly in RECV mode) * for poll from the primary station before getting annoyed. diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0ac907adb2f4..b4e923f77954 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -40,6 +40,18 @@ static struct genl_family l2tp_nl_family = { .netnsok = true, }; +static const struct genl_multicast_group l2tp_multicast_group[] = { + { + .name = L2TP_GENL_MCGROUP, + }, +}; + +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, + int flags, struct l2tp_tunnel *tunnel, u8 cmd); +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, + int flags, struct l2tp_session *session, + u8 cmd); + /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; @@ -97,6 +109,52 @@ out: return ret; } +static int l2tp_tunnel_notify(struct genl_family *family, + struct genl_info *info, + struct l2tp_tunnel *tunnel, + u8 cmd) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, tunnel, cmd); + + if (ret >= 0) + return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + + nlmsg_free(msg); + + return ret; +} + +static int l2tp_session_notify(struct genl_family *family, + struct genl_info *info, + struct l2tp_session *session, + u8 cmd) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, session, cmd); + + if (ret >= 0) + return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + + nlmsg_free(msg); + + return ret; +} + static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info) { u32 tunnel_id; @@ -188,6 +246,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info break; } + if (ret >= 0) + ret = l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_CREATE); out: return ret; } @@ -211,6 +272,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info goto out; } + l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_DELETE); + (void) l2tp_tunnel_delete(tunnel); out: @@ -239,12 +303,15 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info if (info->attrs[L2TP_ATTR_DEBUG]) tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]); + ret = l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_MODIFY); + out: return ret; } static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, - struct l2tp_tunnel *tunnel) + struct l2tp_tunnel *tunnel, u8 cmd) { void *hdr; struct nlattr *nest; @@ -254,8 +321,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla struct ipv6_pinfo *np = NULL; #endif - hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, - L2TP_CMD_TUNNEL_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; @@ -324,7 +390,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla } out: - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -359,7 +426,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) } ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, - NLM_F_ACK, tunnel); + NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET); if (ret < 0) goto err_out; @@ -385,7 +452,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - tunnel) <= 0) + tunnel, L2TP_CMD_TUNNEL_GET) < 0) goto out; ti++; @@ -539,6 +606,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id, session_id, peer_session_id, &cfg); + if (ret >= 0) { + session = l2tp_session_find(net, tunnel, session_id); + if (session) + ret = l2tp_session_notify(&l2tp_nl_family, info, session, + L2TP_CMD_SESSION_CREATE); + } + out: return ret; } @@ -555,6 +629,9 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf goto out; } + l2tp_session_notify(&l2tp_nl_family, info, + session, L2TP_CMD_SESSION_DELETE); + pw_type = session->pwtype; if (pw_type < __L2TP_PWTYPE_MAX) if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) @@ -601,12 +678,15 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_MRU]) session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]); + ret = l2tp_session_notify(&l2tp_nl_family, info, + session, L2TP_CMD_SESSION_MODIFY); + out: return ret; } static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, - struct l2tp_session *session) + struct l2tp_session *session, u8 cmd) { void *hdr; struct nlattr *nest; @@ -615,7 +695,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl sk = tunnel->sock; - hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; @@ -673,7 +753,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl goto nla_put_failure; nla_nest_end(skb, nest); - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -699,7 +780,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, - 0, session); + 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) goto err_out; @@ -737,7 +818,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session) <= 0) + session, L2TP_CMD_SESSION_GET) < 0) break; si++; @@ -896,7 +977,9 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); static int l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); - return genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops); + return genl_register_family_with_ops_groups(&l2tp_nl_family, + l2tp_nl_ops, + l2tp_multicast_group); } static void l2tp_nl_cleanup(void) diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 612a5ddaf93b..799bafc2af39 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -18,28 +18,28 @@ static struct ctl_table llc2_timeout_table[] = { { .procname = "ack", .data = &sysctl_llc2_ack_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_ack_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "busy", .data = &sysctl_llc2_busy_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_busy_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "p", .data = &sysctl_llc2_p_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_p_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "rej", .data = &sysctl_llc2_rej_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_rej_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5cce6055f3ae..10ac6324c1d0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1651,7 +1651,7 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - bool ret; + bool ret = false; int ac; if (local->hw.queues < IEEE80211_NUM_ACS) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 8c8c67819072..ca405b6b686d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -86,20 +86,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } } - /* tear down aggregation sessions and remove STAs */ - mutex_lock(&local->sta_mtx); - list_for_each_entry(sta, &local->sta_list, list) { - if (sta->uploaded) { - enum ieee80211_sta_state state; - - state = sta->sta_state; - for (; state > IEEE80211_STA_NOTEXIST; state--) - WARN_ON(drv_sta_state(local, sta->sdata, sta, - state, state - 1)); - } - } - mutex_unlock(&local->sta_mtx); - /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) @@ -111,6 +97,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) case NL80211_IFTYPE_STATION: ieee80211_mgd_quiesce(sdata); break; + case NL80211_IFTYPE_WDS: + /* tear down aggregation sessions and remove STAs */ + mutex_lock(&local->sta_mtx); + sta = sdata->u.wds.sta; + if (sta && sta->uploaded) { + enum ieee80211_sta_state state; + + state = sta->sta_state; + for (; state > IEEE80211_STA_NOTEXIST; state--) + WARN_ON(drv_sta_state(local, sta->sdata, + sta, state, + state - 1)); + } + mutex_unlock(&local->sta_mtx); + break; default: break; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ed38d8302659..1101563357ea 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -272,7 +272,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (rate && rate->flags & IEEE80211_RATE_ERP_G) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; else if (rate) - channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; + channel_flags |= IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ; else channel_flags |= IEEE80211_CHAN_2GHZ; put_unaligned_le16(channel_flags, pos); diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index c035708ada16..5d9f68c75e5f 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -51,10 +51,7 @@ ieee802154_add_iface(struct wpan_phy *phy, const char *name, struct net_device *err; err = ieee802154_if_add(local, name, type, extended_addr); - if (IS_ERR(err)) - return PTR_ERR(err); - - return 0; + return PTR_ERR_OR_ZERO(err); } static int @@ -87,6 +84,26 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel) } static int +ieee802154_set_cca_mode(struct wpan_phy *wpan_phy, + const struct wpan_phy_cca *cca) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + int ret; + + ASSERT_RTNL(); + + /* check if phy support this setting */ + if (!(local->hw.flags & IEEE802154_HW_CCA_MODE)) + return -EOPNOTSUPP; + + ret = drv_set_cca_mode(local, cca); + if (!ret) + wpan_phy->cca = *cca; + + return ret; +} + +static int ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id) { @@ -201,6 +218,7 @@ const struct cfg802154_ops mac802154_config_ops = { .add_virtual_intf = ieee802154_add_iface, .del_virtual_intf = ieee802154_del_iface, .set_channel = ieee802154_set_channel, + .set_cca_mode = ieee802154_set_cca_mode, .set_pan_id = ieee802154_set_pan_id, .set_short_addr = ieee802154_set_short_addr, .set_backoff_exponent = ieee802154_set_backoff_exponent, diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index f21e864613d0..98180a9fff4a 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -70,7 +70,8 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm) return local->ops->set_txpower(&local->hw, dbm); } -static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode) +static inline int drv_set_cca_mode(struct ieee802154_local *local, + const struct wpan_phy_cca *cca) { might_sleep(); @@ -79,7 +80,7 @@ static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode) return -EOPNOTSUPP; } - return local->ops->set_cca_mode(&local->hw, cca_mode); + return local->ops->set_cca_mode(&local->hw, cca); } static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 9ae893057dd7..6fb6bdf9868c 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -137,25 +137,11 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) static int mac802154_slave_open(struct net_device *dev) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - struct ieee802154_sub_if_data *subif; struct ieee802154_local *local = sdata->local; int res = 0; ASSERT_RTNL(); - if (sdata->vif.type == NL802154_IFTYPE_NODE) { - mutex_lock(&sdata->local->iflist_mtx); - list_for_each_entry(subif, &sdata->local->interfaces, list) { - if (subif != sdata && - subif->vif.type == sdata->vif.type && - ieee802154_sdata_running(subif)) { - mutex_unlock(&sdata->local->iflist_mtx); - return -EBUSY; - } - } - mutex_unlock(&sdata->local->iflist_mtx); - } - set_bit(SDATA_STATE_RUNNING, &sdata->state); if (!local->open_count) { @@ -175,6 +161,88 @@ err: return res; } +static int +ieee802154_check_mac_settings(struct ieee802154_local *local, + struct wpan_dev *wpan_dev, + struct wpan_dev *nwpan_dev) +{ + ASSERT_RTNL(); + + if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { + if (wpan_dev->promiscuous_mode != nwpan_dev->promiscuous_mode) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_AFILT) { + if (wpan_dev->pan_id != nwpan_dev->pan_id) + return -EBUSY; + + if (wpan_dev->short_addr != nwpan_dev->short_addr) + return -EBUSY; + + if (wpan_dev->extended_addr != nwpan_dev->extended_addr) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) { + if (wpan_dev->min_be != nwpan_dev->min_be) + return -EBUSY; + + if (wpan_dev->max_be != nwpan_dev->max_be) + return -EBUSY; + + if (wpan_dev->csma_retries != nwpan_dev->csma_retries) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) { + if (wpan_dev->frame_retries != nwpan_dev->frame_retries) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_LBT) { + if (wpan_dev->lbt != nwpan_dev->lbt) + return -EBUSY; + } + + return 0; +} + +static int +ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, + enum nl802154_iftype iftype) +{ + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_sub_if_data *nsdata; + + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(nsdata, &local->interfaces, list) { + if (nsdata != sdata && ieee802154_sdata_running(nsdata)) { + int ret; + + /* TODO currently we don't support multiple node types + * we need to run skb_clone at rx path. Check if there + * exist really an use case if we need to support + * multiple node types at the same time. + */ + if (sdata->vif.type == NL802154_IFTYPE_NODE && + nsdata->vif.type == NL802154_IFTYPE_NODE) + return -EBUSY; + + /* check all phy mac sublayer settings are the same. + * We have only one phy, different values makes trouble. + */ + ret = ieee802154_check_mac_settings(local, wpan_dev, + &nsdata->wpan_dev); + if (ret < 0) + return ret; + } + } + + return 0; +} + static int mac802154_wpan_open(struct net_device *dev) { int rc; @@ -183,6 +251,10 @@ static int mac802154_wpan_open(struct net_device *dev) struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct wpan_phy *phy = sdata->local->phy; + rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type); + if (rc < 0) + return rc; + rc = mac802154_slave_open(dev); if (rc < 0) return rc; diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 6aacb1816889..bdccb4ecd30f 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -81,7 +81,7 @@ static int mac802154_set_mac_params(struct net_device *dev, /* PHY */ wpan_dev->wpan_phy->transmit_power = params->transmit_power; - wpan_dev->wpan_phy->cca_mode = params->cca_mode; + wpan_dev->wpan_phy->cca = params->cca; wpan_dev->wpan_phy->cca_ed_level = params->cca_ed_level; /* MAC */ @@ -98,7 +98,7 @@ static int mac802154_set_mac_params(struct net_device *dev, } if (local->hw.flags & IEEE802154_HW_CCA_MODE) { - ret = drv_set_cca_mode(local, params->cca_mode); + ret = drv_set_cca_mode(local, ¶ms->cca); if (ret < 0) return ret; } @@ -122,7 +122,7 @@ static void mac802154_get_mac_params(struct net_device *dev, /* PHY */ params->transmit_power = wpan_dev->wpan_phy->transmit_power; - params->cca_mode = wpan_dev->wpan_phy->cca_mode; + params->cca = wpan_dev->wpan_phy->cca; params->cca_ed_level = wpan_dev->wpan_phy->cca_ed_level; /* MAC */ diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index ca27837974fe..809df534a720 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -31,10 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP))) + SKB_GSO_TCP_ECN))) goto out; /* Setup inner SKB. */ @@ -63,14 +60,14 @@ out: return segs; } -static struct packet_offload mpls_mc_offload = { +static struct packet_offload mpls_mc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_MC), .callbacks = { .gso_segment = mpls_gso_segment, }, }; -static struct packet_offload mpls_uc_offload = { +static struct packet_offload mpls_uc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_UC), .callbacks = { .gso_segment = mpls_gso_segment, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b8295a430a56..e55759056361 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2887,7 +2887,8 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, if (ip_vs_genl_fill_service(skb, svc) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -3079,7 +3080,8 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, if (ip_vs_genl_fill_dest(skb, dest) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -3215,7 +3217,8 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 1d5341f3761d..5d3daae98bf0 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -183,6 +183,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct nf_conn *ct; struct net *net; + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -191,8 +193,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; @@ -322,6 +322,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; struct net *net; + /* no diff required for incoming packets */ + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -330,9 +333,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - /* no diff required for incoming packets */ - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a11674806707..13fad8668f83 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,16 +611,15 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - /* We have to check the DYING flag inside the lock to prevent - a race against nf_ct_get_next_corpse() possibly called from - user context, else we insert an already 'dead' hash, blocking - further use of that particular connection -JM */ + /* We have to check the DYING flag after unlink to prevent + * a race against nf_ct_get_next_corpse() possibly called from + * user context, else we insert an already 'dead' hash, blocking + * further use of that particular connection -JM. + */ + nf_ct_del_from_dying_or_unconfirmed_list(ct); - if (unlikely(nf_ct_is_dying(ct))) { - nf_conntrack_double_unlock(hash, reply_hash); - local_bh_enable(); - return NF_ACCEPT; - } + if (unlikely(nf_ct_is_dying(ct))) + goto out; /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're @@ -636,8 +635,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - nf_ct_del_from_dying_or_unconfirmed_list(ct); - /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -673,6 +670,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: + nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); @@ -1426,12 +1424,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush_report(struct net *net, u32 portid, int report) -{ - nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report); -} -EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); - static int untrack_refs(void) { int cnt = 0, cpu; @@ -1624,13 +1616,18 @@ int nf_conntrack_init_start(void) for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); - /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB - * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ if (!nf_conntrack_htable_size) { + /* Idea from tcp.c: use 1/16384 of memory. + * On i386: 32MB machine has 512 buckets. + * >= 1GB machines have 16384 buckets. + * >= 4GB machines have 65536 buckets. + */ nf_conntrack_htable_size = (((totalram_pages << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); - if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) + if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) + nf_conntrack_htable_size = 65536; + else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) nf_conntrack_htable_size = 16384; if (nf_conntrack_htable_size < 32) nf_conntrack_htable_size = 32; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1bd9ed9e62f6..d1c23940a86a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -749,13 +749,47 @@ static int ctnetlink_done(struct netlink_callback *cb) return 0; } -struct ctnetlink_dump_filter { +struct ctnetlink_filter { struct { u_int32_t val; u_int32_t mask; } mark; }; +static struct ctnetlink_filter * +ctnetlink_alloc_filter(const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_MARK + struct ctnetlink_filter *filter; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return ERR_PTR(-ENOMEM); + + filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); + filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + return filter; +#else + return ERR_PTR(-EOPNOTSUPP); +#endif +} + +static int ctnetlink_filter_match(struct nf_conn *ct, void *data) +{ + struct ctnetlink_filter *filter = data; + + if (filter == NULL) + return 1; + +#ifdef CONFIG_NF_CONNTRACK_MARK + if ((ct->mark & filter->mark.mask) == filter->mark.val) + return 1; +#endif + + return 0; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { @@ -768,10 +802,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) int res; spinlock_t *lockp; -#ifdef CONFIG_NF_CONNTRACK_MARK - const struct ctnetlink_dump_filter *filter = cb->data; -#endif - last = (struct nf_conn *)cb->args[1]; local_bh_disable(); @@ -798,12 +828,9 @@ restart: continue; cb->args[1] = 0; } -#ifdef CONFIG_NF_CONNTRACK_MARK - if (filter && !((ct->mark & filter->mark.mask) == - filter->mark.val)) { + if (!ctnetlink_filter_match(ct, cb->data)) continue; - } -#endif + rcu_read_lock(); res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, @@ -1001,6 +1028,25 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, }; +static int ctnetlink_flush_conntrack(struct net *net, + const struct nlattr * const cda[], + u32 portid, int report) +{ + struct ctnetlink_filter *filter = NULL; + + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); + } + + nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter, + portid, report); + kfree(filter); + + return 0; +} + static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1024,11 +1070,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { - /* Flush the whole table */ - nf_conntrack_flush_report(net, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)); - return 0; + return ctnetlink_flush_conntrack(net, cda, + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); } if (err < 0) @@ -1076,21 +1120,16 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, .dump = ctnetlink_dump_table, .done = ctnetlink_done, }; -#ifdef CONFIG_NF_CONNTRACK_MARK + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { - struct ctnetlink_dump_filter *filter; + struct ctnetlink_filter *filter; - filter = kzalloc(sizeof(struct ctnetlink_dump_filter), - GFP_ATOMIC); - if (filter == NULL) - return -ENOMEM; + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); - filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->mark.mask = - ntohl(nla_get_be32(cda[CTA_MARK_MASK])); c.data = filter; } -#endif return netlink_dump_start(ctnl, skb, nlh, &c); } diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index f6e2ae91a80b..ce3e840c8704 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -98,9 +98,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, new_end_seq = htonl(ntohl(sack->end_seq) - seq->offset_before); - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); + pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n", + ntohl(sack->start_seq), ntohl(new_start_seq), + ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, sack->start_seq, new_start_seq, 0); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 43c926cae9c0..0d8448f19dfe 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -425,8 +425,7 @@ static int netfilter_log_sysctl_init(struct net *net) nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i]; nf_log_sysctl_table[i].data = NULL; - nf_log_sysctl_table[i].maxlen = - NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN; nf_log_sysctl_table[i].mode = 0644; nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 129a8daa4abf..70f697827b9b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -427,7 +427,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -713,16 +714,12 @@ static int nft_flush_table(struct nft_ctx *ctx) struct nft_chain *chain, *nc; struct nft_set *set, *ns; - list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + list_for_each_entry(chain, &ctx->table->chains, list) { ctx->chain = chain; err = nft_delrule_by_chain(ctx); if (err < 0) goto out; - - err = nft_delchain(ctx); - if (err < 0) - goto out; } list_for_each_entry_safe(set, ns, &ctx->table->sets, list) { @@ -735,6 +732,14 @@ static int nft_flush_table(struct nft_ctx *ctx) goto out; } + list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + ctx->chain = chain; + + err = nft_delchain(ctx); + if (err < 0) + goto out; + } + err = nft_deltable(ctx); out: return err; @@ -967,7 +972,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -1703,7 +1709,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -2357,7 +2364,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; nla_nest_end(skb, desc); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -3031,7 +3039,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, nla_nest_end(skb, nest); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -3320,7 +3329,8 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 13c2e17bbe27..8b117c90ecd7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -272,7 +272,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { - struct sk_buff *nskb, *oskb = skb; + struct sk_buff *oskb = skb; struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; @@ -283,12 +283,11 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: - nskb = netlink_skb_clone(oskb, GFP_KERNEL); - if (!nskb) + skb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); - nskb->sk = oskb->sk; - skb = nskb; + skb->sk = oskb->sk; nfnl_lock(subsys_id); ss = rcu_dereference_protected(table[subsys_id].subsys, @@ -305,7 +304,7 @@ replay: { nfnl_unlock(subsys_id); netlink_ack(skb, nlh, -EOPNOTSUPP); - return kfree_skb(nskb); + return kfree_skb(skb); } } @@ -321,7 +320,8 @@ replay: nlh = nlmsg_hdr(skb); err = 0; - if (nlh->nlmsg_len < NLMSG_HDRLEN) { + if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || + skb->len < nlh->nlmsg_len) { err = -EINVAL; goto ack; } @@ -385,7 +385,7 @@ replay: nfnl_err_reset(&err_list); ss->abort(oskb); nfnl_unlock(subsys_id); - kfree_skb(nskb); + kfree_skb(skb); goto replay; } } @@ -426,7 +426,7 @@ done: nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); - kfree_skb(nskb); + kfree_skb(skb); } static void nfnetlink_rcv(struct sk_buff *skb) @@ -463,13 +463,13 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static int nfnetlink_bind(int group) +static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; int type; if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) - return -EINVAL; + return 0; type = nfnl_group2type[group]; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 9e287cb56a04..a5599fc51a6f 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -86,7 +86,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, static int nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { - const struct nf_conn_help *help = nfct_help(ct); + struct nf_conn_help *help = nfct_help(ct); if (attr == NULL) return -EINVAL; @@ -94,7 +94,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) if (help->helper->data_len == 0) return -EINVAL; - memcpy(&help->data, nla_data(attr), help->helper->data_len); + memcpy(help->data, nla_data(attr), help->helper->data_len); return 0; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 1e316ce4cb5d..75887d7d2c6a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -33,7 +33,7 @@ static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, struct nft_data *data) { - const struct rhashtable *priv = nft_set_priv(set); + struct rhashtable *priv = nft_set_priv(set); const struct nft_hash_elem *he; he = rhashtable_lookup(priv, key); @@ -83,46 +83,53 @@ static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct rhashtable *priv = nft_set_priv(set); - struct rhash_head *he, __rcu **pprev; - pprev = elem->cookie; - he = rht_dereference((*pprev), priv); + rhashtable_remove(priv, elem->cookie); + synchronize_rcu(); + kfree(elem->cookie); +} - rhashtable_remove_pprev(priv, he, pprev); +struct nft_compare_arg { + const struct nft_set *set; + struct nft_set_elem *elem; +}; - synchronize_rcu(); - kfree(he); +static bool nft_hash_compare(void *ptr, void *arg) +{ + struct nft_hash_elem *he = ptr; + struct nft_compare_arg *x = arg; + + if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) { + x->elem->cookie = he; + x->elem->flags = 0; + if (x->set->flags & NFT_SET_MAP) + nft_data_copy(&x->elem->data, he->data); + + return true; + } + + return false; } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv); - struct rhash_head __rcu * const *pprev; - struct nft_hash_elem *he; - u32 h; - - h = rhashtable_hashfn(priv, &elem->key, set->klen); - pprev = &tbl->buckets[h]; - rht_for_each_entry_rcu(he, tbl->buckets[h], node) { - if (nft_data_cmp(&he->key, &elem->key, set->klen)) { - pprev = &he->node.next; - continue; - } + struct rhashtable *priv = nft_set_priv(set); + struct nft_compare_arg arg = { + .set = set, + .elem = elem, + }; - elem->cookie = (void *)pprev; - elem->flags = 0; - if (set->flags & NFT_SET_MAP) - nft_data_copy(&elem->data, he->data); + if (rhashtable_lookup_compare(priv, &elem->key, + &nft_hash_compare, &arg)) return 0; - } + return -ENOENT; } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - const struct rhashtable *priv = nft_set_priv(set); + struct rhashtable *priv = nft_set_priv(set); const struct bucket_table *tbl; const struct nft_hash_elem *he; struct nft_set_elem elem; @@ -130,7 +137,9 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, tbl = rht_dereference_rcu(priv->tbl, priv); for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(he, tbl->buckets[i], node) { + struct rhash_head *pos; + + rht_for_each_entry_rcu(he, pos, tbl, i, node) { if (iter->count < iter->skip) goto cont; @@ -153,13 +162,6 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) return sizeof(struct rhashtable); } -#ifdef CONFIG_PROVE_LOCKING -static int lockdep_nfnl_lock_is_held(void *parent) -{ - return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); -} -#endif - static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) @@ -173,9 +175,6 @@ static int nft_hash_init(const struct nft_set *set, .hashfn = jhash, .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = lockdep_nfnl_lock_is_held, -#endif }; return rhashtable_init(priv, ¶ms); @@ -183,18 +182,23 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_destroy(const struct nft_set *set) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl = priv->tbl; - struct nft_hash_elem *he, *next; + struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl; + struct nft_hash_elem *he; + struct rhash_head *pos, *next; unsigned int i; + /* Stop an eventual async resizing */ + priv->being_destroyed = true; + mutex_lock(&priv->mutex); + + tbl = rht_dereference(priv->tbl, priv); for (i = 0; i < tbl->size; i++) { - for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node); - he != NULL; he = next) { - next = rht_entry(he->node.next, struct nft_hash_elem, node); + rht_for_each_entry_safe(he, pos, next, tbl, i, node) nft_hash_elem_destroy(set, he); - } } + mutex_unlock(&priv->mutex); + rhashtable_destroy(priv); } diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index afe2b0b45ec4..aff54fb1c8a0 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16) - data[priv->sreg_proto_min].data[0]; - range.max_proto.all = (__force __be16) - data[priv->sreg_proto_max].data[0]; + range.min_proto.all = + *(__be16 *)&data[priv->sreg_proto_min].data[0]; + range.max_proto.all = + *(__be16 *)&data[priv->sreg_proto_max].data[0]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index c529161cdbf8..0778855ea5e7 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -225,6 +225,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_lock(); list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { + int foptsize, optnum; + f = &kf->finger; if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) @@ -233,110 +235,109 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) optp = _optp; fmatch = FMATCH_WRONG; - if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { - int foptsize, optnum; + if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl)) + continue; - /* - * Should not happen if userspace parser was written correctly. - */ - if (f->wss.wc >= OSF_WSS_MAX) - continue; + /* + * Should not happen if userspace parser was written correctly. + */ + if (f->wss.wc >= OSF_WSS_MAX) + continue; - /* Check options */ + /* Check options */ - foptsize = 0; - for (optnum = 0; optnum < f->opt_num; ++optnum) - foptsize += f->opt[optnum].length; + foptsize = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) + foptsize += f->opt[optnum].length; - if (foptsize > MAX_IPOPTLEN || - optsize > MAX_IPOPTLEN || - optsize != foptsize) - continue; + if (foptsize > MAX_IPOPTLEN || + optsize > MAX_IPOPTLEN || + optsize != foptsize) + continue; - check_WSS = f->wss.wc; + check_WSS = f->wss.wc; - for (optnum = 0; optnum < f->opt_num; ++optnum) { - if (f->opt[optnum].kind == (*optp)) { - __u32 len = f->opt[optnum].length; - const __u8 *optend = optp + len; - int loop_cont = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) { + if (f->opt[optnum].kind == (*optp)) { + __u32 len = f->opt[optnum].length; + const __u8 *optend = optp + len; + int loop_cont = 0; - fmatch = FMATCH_OK; + fmatch = FMATCH_OK; - switch (*optp) { - case OSFOPT_MSS: - mss = optp[3]; - mss <<= 8; - mss |= optp[2]; + switch (*optp) { + case OSFOPT_MSS: + mss = optp[3]; + mss <<= 8; + mss |= optp[2]; - mss = ntohs((__force __be16)mss); - break; - case OSFOPT_TS: - loop_cont = 1; - break; - } + mss = ntohs((__force __be16)mss); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } - optp = optend; - } else - fmatch = FMATCH_OPT_WRONG; + optp = optend; + } else + fmatch = FMATCH_OPT_WRONG; - if (fmatch != FMATCH_OK) - break; - } + if (fmatch != FMATCH_OK) + break; + } - if (fmatch != FMATCH_OPT_WRONG) { - fmatch = FMATCH_WRONG; + if (fmatch != FMATCH_OPT_WRONG) { + fmatch = FMATCH_WRONG; - switch (check_WSS) { - case OSF_WSS_PLAIN: - if (f->wss.val == 0 || window == f->wss.val) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MSS: - /* - * Some smart modems decrease mangle MSS to - * SMART_MSS_2, so we check standard, decreased - * and the one provided in the fingerprint MSS - * values. - */ + switch (check_WSS) { + case OSF_WSS_PLAIN: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MSS: + /* + * Some smart modems decrease mangle MSS to + * SMART_MSS_2, so we check standard, decreased + * and the one provided in the fingerprint MSS + * values. + */ #define SMART_MSS_1 1460 #define SMART_MSS_2 1448 - if (window == f->wss.val * mss || - window == f->wss.val * SMART_MSS_1 || - window == f->wss.val * SMART_MSS_2) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MTU: - if (window == f->wss.val * (mss + 40) || - window == f->wss.val * (SMART_MSS_1 + 40) || - window == f->wss.val * (SMART_MSS_2 + 40)) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MODULO: - if ((window % f->wss.val) == 0) - fmatch = FMATCH_OK; - break; - } + if (window == f->wss.val * mss || + window == f->wss.val * SMART_MSS_1 || + window == f->wss.val * SMART_MSS_2) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MTU: + if (window == f->wss.val * (mss + 40) || + window == f->wss.val * (SMART_MSS_1 + 40) || + window == f->wss.val * (SMART_MSS_2 + 40)) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MODULO: + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; } + } - if (fmatch != FMATCH_OK) - continue; + if (fmatch != FMATCH_OK) + continue; - fcount++; + fcount++; - if (info->flags & XT_OSF_LOG) - nf_log_packet(net, p->family, p->hooknum, skb, - p->in, p->out, NULL, - "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", - f->genre, f->version, f->subtype, - &ip->saddr, ntohs(tcp->source), - &ip->daddr, ntohs(tcp->dest), - f->ttl - ip->ttl); + if (info->flags & XT_OSF_LOG) + nf_log_packet(net, p->family, p->hooknum, skb, + p->in, p->out, NULL, + "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", + f->genre, f->version, f->subtype, + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest), + f->ttl - ip->ttl); - if ((info->flags & XT_OSF_LOG) && - info->loglevel == XT_OSF_LOGLEVEL_FIRST) - break; - } + if ((info->flags & XT_OSF_LOG) && + info->loglevel == XT_OSF_LOGLEVEL_FIRST) + break; } rcu_read_unlock(); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c2f2a53a4879..7fd1104ba900 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -324,8 +324,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, return 0; add_std_failure: - if (doi_def) - cipso_v4_doi_free(doi_def); + cipso_v4_doi_free(doi_def); return ret_val; } @@ -641,7 +640,8 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) if (ret_val != 0) goto listall_cb_failure; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index e66e977ef2fa..70440748fe5c 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -93,23 +93,20 @@ static int netlbl_mgmt_add_common(struct genl_info *info, struct netlbl_audit *audit_info) { int ret_val = -EINVAL; - struct netlbl_dom_map *entry = NULL; struct netlbl_domaddr_map *addrmap = NULL; struct cipso_v4_doi *cipsov4 = NULL; u32 tmp_val; + struct netlbl_dom_map *entry = kzalloc(sizeof(*entry), GFP_KERNEL); - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } + if (!entry) + return -ENOMEM; entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); if (entry->domain == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_entry; } nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); @@ -125,16 +122,16 @@ static int netlbl_mgmt_add_common(struct genl_info *info, break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) - goto add_failure; + goto add_free_domain; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) - goto add_failure; + goto add_free_domain; entry->def.cipso = cipsov4; break; default: - goto add_failure; + goto add_free_domain; } if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { @@ -145,7 +142,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); @@ -153,12 +150,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != sizeof(struct in_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != sizeof(struct in_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); @@ -166,7 +163,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_addrmap; } map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; @@ -178,7 +175,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) { kfree(map); - goto add_failure; + goto add_free_addrmap; } entry->def.type = NETLBL_NLTYPE_ADDRSELECT; @@ -192,7 +189,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); @@ -200,12 +197,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); @@ -213,7 +210,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_addrmap; } map->list.addr = *addr; map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; @@ -227,7 +224,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) { kfree(map); - goto add_failure; + goto add_free_addrmap; } entry->def.type = NETLBL_NLTYPE_ADDRSELECT; @@ -237,16 +234,17 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) - goto add_failure; + goto add_free_addrmap; return 0; -add_failure: - if (cipsov4) - cipso_v4_doi_putdef(cipsov4); - if (entry) - kfree(entry->domain); +add_free_addrmap: kfree(addrmap); +add_doi_put_def: + cipso_v4_doi_putdef(cipsov4); +add_free_domain: + kfree(entry->domain); +add_free_entry: kfree(entry); return ret_val; } @@ -456,7 +454,8 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) goto listall_cb_failure; cb_arg->seq++; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); @@ -620,7 +619,8 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, if (ret_val != 0) goto protocols_cb_failure; - return genlmsg_end(skb, data); + genlmsg_end(skb, data); + return 0; protocols_cb_failure: genlmsg_cancel(skb, data); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 78a63c18779e..aec7994f78cf 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1163,7 +1163,8 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, goto list_cb_failure; cb_arg->seq++; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; list_cb_failure: genlmsg_cancel(cb_arg->skb, data); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ef5f77b44ec7..a36777b7cfb6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -61,6 +61,7 @@ #include <linux/rhashtable.h> #include <asm/cacheflush.h> #include <linux/hash.h> +#include <linux/genetlink.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -97,12 +98,12 @@ static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: - * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock - * combined with an RCU read-side lock. Insertion and removal are protected - * with nl_sk_hash_lock while using RCU list modification primitives and may - * run in parallel to nl_table_lock protected lookups. Destruction of the - * Netlink socket may only occur *after* nl_table_lock has been acquired - * either during or after the socket has been removed from the list. + * Lookup and traversal are protected with an RCU read-side lock. Insertion + * and removal are protected with per bucket lock while using RCU list + * modification primitives and may run in parallel to RCU protected lookups. + * Destruction of the Netlink socket may only occur *after* nl_table_lock has + * been acquired * either during or after the socket has been removed from + * the list and after an RCU grace period. */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); @@ -110,19 +111,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); -/* Protects netlink socket hash table mutations */ -DEFINE_MUTEX(nl_sk_hash_lock); -EXPORT_SYMBOL_GPL(nl_sk_hash_lock); - -#ifdef CONFIG_PROVE_LOCKING -static int lockdep_nl_sk_hash_is_held(void *parent) -{ - if (debug_locks) - return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); - return 1; -} -#endif - static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); @@ -525,14 +513,14 @@ out: return err; } -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) { #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 struct page *p_start, *p_end; /* First page is flushed through netlink_{get,set}_status */ p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); + p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); while (p_start <= p_end) { flush_dcache_page(p_start); p_start++; @@ -550,9 +538,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) static void netlink_set_status(struct nl_mmap_hdr *hdr, enum nl_mmap_status status) { + smp_mb(); hdr->nm_status = status; flush_dcache_page(pgvec_to_page(hdr)); - smp_wmb(); } static struct nl_mmap_hdr * @@ -707,31 +695,23 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, u32 dst_portid, u32 dst_group, - struct sock_iocb *siocb) + struct scm_cookie *scm) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; struct nl_mmap_hdr *hdr; struct sk_buff *skb; unsigned int maxlen; - bool excl = true; int err = 0, len = 0; - /* Netlink messages are validated by the receiver before processing. - * In order to avoid userspace changing the contents of the message - * after validation, the socket and the ring may only be used by a - * single process, otherwise we fall back to copying. - */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 || - atomic_read(&nlk->mapped) > 1) - excl = false; - mutex_lock(&nlk->pg_vec_lock); ring = &nlk->tx_ring; maxlen = ring->frame_size - NL_MMAP_HDRLEN; do { + unsigned int nm_len; + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); if (hdr == NULL) { if (!(msg->msg_flags & MSG_DONTWAIT) && @@ -739,41 +719,29 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, schedule(); continue; } - if (hdr->nm_len > maxlen) { + + nm_len = ACCESS_ONCE(hdr->nm_len); + if (nm_len > maxlen) { err = -EINVAL; goto out; } - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, nm_len); - if (likely(dst_portid == 0 && dst_group == 0 && excl)) { - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - sock_hold(sk); - netlink_ring_setup_skb(skb, sk, ring, hdr); - NETLINK_CB(skb).flags |= NETLINK_SKB_TX; - __skb_put(skb, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - } else { - skb = alloc_skb(hdr->nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, hdr->nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); + skb = alloc_skb(nm_len, GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto out; } + __skb_put(skb, nm_len); + memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); + netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); netlink_increment_head(ring); NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm->creds; err = security_netlink_send(sk, skb); if (err) { @@ -813,7 +781,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) hdr->nm_pid = NETLINK_CB(skb).creds.pid; hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, hdr->nm_len); netlink_set_status(hdr, NL_MMAP_STATUS_VALID); NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; @@ -852,7 +820,7 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap #define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 +#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 #endif /* CONFIG_NETLINK_MMAP */ static void netlink_skb_destructor(struct sk_buff *skb) @@ -1022,26 +990,33 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, .net = net, .portid = portid, }; - u32 hash; - hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid)); - - return rhashtable_lookup_compare(&table->hash, hash, + return rhashtable_lookup_compare(&table->hash, &portid, &netlink_compare, &arg); } +static bool __netlink_insert(struct netlink_table *table, struct sock *sk) +{ + struct netlink_compare_arg arg = { + .net = sock_net(sk), + .portid = nlk_sk(sk)->portid, + }; + + return rhashtable_lookup_compare_insert(&table->hash, + &nlk_sk(sk)->node, + &netlink_compare, &arg); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { struct netlink_table *table = &nl_table[protocol]; struct sock *sk; - read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); - read_unlock(&nl_table_lock); return sk; } @@ -1072,29 +1047,33 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 portid) +static int netlink_insert(struct sock *sk, u32 portid) { struct netlink_table *table = &nl_table[sk->sk_protocol]; - int err = -EADDRINUSE; + int err; - mutex_lock(&nl_sk_hash_lock); - if (__netlink_lookup(table, portid, net)) - goto err; + lock_sock(sk); err = -EBUSY; if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; - if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX)) + if (BITS_PER_LONG > 32 && + unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node); + err = 0; + if (!__netlink_insert(table, sk)) { + err = -EADDRINUSE; + sock_put(sk); + } + err: - mutex_unlock(&nl_sk_hash_lock); + release_sock(sk); return err; } @@ -1102,17 +1081,19 @@ static void netlink_remove(struct sock *sk) { struct netlink_table *table; - mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } - mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); - if (nlk_sk(sk)->subscriptions) + if (nlk_sk(sk)->subscriptions) { __sk_del_bind_node(sk); + netlink_update_listeners(sk); + } + if (sk->sk_protocol == NETLINK_GENERIC) + atomic_inc(&genl_sk_destructing_cnt); netlink_table_ungrab(); } @@ -1159,8 +1140,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1212,6 +1193,13 @@ out_module: goto out; } +static void deferred_put_nlk_sk(struct rcu_head *head) +{ + struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + + sock_put(&nlk->sk); +} + static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -1229,6 +1217,20 @@ static int netlink_release(struct socket *sock) * will be purged. */ + /* must not acquire netlink_table_lock in any way again before unbind + * and notifying genetlink is done as otherwise it might deadlock + */ + if (nlk->netlink_unbind) { + int i; + + for (i = 0; i < nlk->ngroups; i++) + if (test_bit(i, nlk->groups)) + nlk->netlink_unbind(sock_net(sk), i + 1); + } + if (sk->sk_protocol == NETLINK_GENERIC && + atomic_dec_return(&genl_sk_destructing_cnt) == 0) + wake_up(&genl_sk_destructing_waitq); + sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); @@ -1246,8 +1248,8 @@ static int netlink_release(struct socket *sock) module_put(nlk->module); - netlink_table_grab(); if (netlink_is_kernel(sk)) { + netlink_table_grab(); BUG_ON(nl_table[sk->sk_protocol].registered == 0); if (--nl_table[sk->sk_protocol].registered == 0) { struct listeners *old; @@ -1261,10 +1263,8 @@ static int netlink_release(struct socket *sock) nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } - } else if (nlk->subscriptions) { - netlink_update_listeners(sk); + netlink_table_ungrab(); } - netlink_table_ungrab(); kfree(nlk->groups); nlk->groups = NULL; @@ -1272,7 +1272,7 @@ static int netlink_release(struct socket *sock) local_bh_disable(); sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); local_bh_enable(); - sock_put(sk); + call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -1287,7 +1287,6 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); - netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1295,13 +1294,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); - netlink_table_ungrab(); goto retry; } rcu_read_unlock(); - netlink_table_ungrab(); - err = netlink_insert(sk, net, portid); + err = netlink_insert(sk, portid); if (err == -EADDRINUSE) goto retry; @@ -1430,9 +1427,10 @@ static int netlink_realloc_groups(struct sock *sk) return err; } -static void netlink_unbind(int group, long unsigned int groups, - struct netlink_sock *nlk) +static void netlink_undo_bind(int group, long unsigned int groups, + struct sock *sk) { + struct netlink_sock *nlk = nlk_sk(sk); int undo; if (!nlk->netlink_unbind) @@ -1440,7 +1438,7 @@ static void netlink_unbind(int group, long unsigned int groups, for (undo = 0; undo < group; undo++) if (test_bit(undo, &groups)) - nlk->netlink_unbind(undo); + nlk->netlink_unbind(sock_net(sk), undo); } static int netlink_bind(struct socket *sock, struct sockaddr *addr, @@ -1478,20 +1476,20 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, for (group = 0; group < nlk->ngroups; group++) { if (!test_bit(group, &groups)) continue; - err = nlk->netlink_bind(group); + err = nlk->netlink_bind(net, group); if (!err) continue; - netlink_unbind(group, groups, nlk); + netlink_undo_bind(group, groups, sk); return err; } } if (!nlk->portid) { err = nladdr->nl_pid ? - netlink_insert(sk, net, nladdr->nl_pid) : + netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups, groups, nlk); + netlink_undo_bind(nlk->ngroups, groups, sk); return err; } } @@ -2142,7 +2140,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (!val || val - 1 >= nlk->ngroups) return -EINVAL; if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { - err = nlk->netlink_bind(val); + err = nlk->netlink_bind(sock_net(sk), val); if (err) return err; } @@ -2151,7 +2149,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) - nlk->netlink_unbind(val); + nlk->netlink_unbind(sock_net(sk), val); err = 0; break; @@ -2261,7 +2259,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); @@ -2275,10 +2272,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) - siocb->scm = &scm; - - err = scm_send(sock, msg, siocb->scm, true); + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; @@ -2307,7 +2301,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (netlink_tx_is_mmaped(sk) && msg->msg_iter.iov->iov_base == NULL) { err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - siocb); + &scm); goto out; } @@ -2321,7 +2315,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm.creds; NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; @@ -2343,7 +2337,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: - scm_destroy(siocb->scm); + scm_destroy(&scm); return err; } @@ -2351,7 +2345,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2414,11 +2407,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (nlk->flags & NETLINK_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); - if (NULL == siocb->scm) { - memset(&scm, 0, sizeof(scm)); - siocb->scm = &scm; - } - siocb->scm->creds = *NETLINK_CREDS(skb); + memset(&scm, 0, sizeof(scm)); + scm.creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) copied = data_skb->len; @@ -2433,7 +2423,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } } - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out: netlink_rcv_wake(sk); return err ? : copied; @@ -2494,7 +2484,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (cfg && cfg->input) nlk_sk(sk)->netlink_rcv = cfg->input; - if (netlink_insert(sk, net, 0)) + if (netlink_insert(sk, 0)) goto out_sock_release; nlk = nlk_sk(sk); @@ -2913,7 +2903,9 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); for (j = 0; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { + struct rhash_head *node; + + rht_for_each_entry_rcu(nlk, node, tbl, j, node) { s = (struct sock *)nlk; if (sock_net(s) != seq_file_net(seq)) @@ -2931,9 +2923,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) } static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nl_table_lock) __acquires(RCU) + __acquires(RCU) { - read_lock(&nl_table_lock); rcu_read_lock(); return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2941,6 +2932,8 @@ static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct rhashtable *ht; + const struct bucket_table *tbl; + struct rhash_head *node; struct netlink_sock *nlk; struct nl_seq_iter *iter; struct net *net; @@ -2957,17 +2950,17 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) i = iter->link; ht = &nl_table[i].hash; - rht_for_each_entry(nlk, nlk->node.next, ht, node) + tbl = rht_dereference_rcu(ht->tbl, ht); + rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node) if (net_eq(sock_net((struct sock *)nlk), net)) return nlk; j = iter->hash_idx + 1; do { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); for (; j < tbl->size; j++) { - rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { + rht_for_each_entry_rcu(nlk, node, tbl, j, node) { if (net_eq(sock_net((struct sock *)nlk), net)) { iter->link = i; iter->hash_idx = j; @@ -2983,10 +2976,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) __releases(nl_table_lock) + __releases(RCU) { rcu_read_unlock(); - read_unlock(&nl_table_lock); } @@ -3133,9 +3125,6 @@ static int __init netlink_proto_init(void) .max_shift = 16, /* 64K */ .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = lockdep_nl_sk_hash_is_held, -#endif }; if (err != 0) diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index b20a1731759b..89008405d6b4 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -2,6 +2,7 @@ #define _AF_NETLINK_H #include <linux/rhashtable.h> +#include <linux/atomic.h> #include <net/sock.h> #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -39,8 +40,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - int (*netlink_bind)(int group); - void (*netlink_unbind)(int group); + int (*netlink_bind)(struct net *net, int group); + void (*netlink_unbind)(struct net *net, int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -50,6 +51,7 @@ struct netlink_sock { #endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; + struct rcu_head rcu; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) @@ -65,14 +67,13 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; extern struct netlink_table *nl_table; extern rwlock_t nl_table_lock; -extern struct mutex nl_sk_hash_lock; #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index de8c74a3c061..3ee63a3cff30 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sk_diag_put_rings_cfg(sk, skb)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); @@ -103,7 +104,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct netlink_table *tbl = &nl_table[protocol]; struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); + const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; @@ -113,7 +114,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, req = nlmsg_data(cb->nlh); for (i = 0; i < htbl->size; i++) { - rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) { + struct rhash_head *pos; + + rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { sk = (struct sock *)nlsk; if (!net_eq(sock_net(sk), net)) @@ -170,7 +173,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) req = nlmsg_data(cb->nlh); - mutex_lock(&nl_sk_hash_lock); + rcu_read_lock(); read_lock(&nl_table_lock); if (req->sdiag_protocol == NDIAG_PROTO_ALL) { @@ -184,7 +187,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return -ENOENT; } @@ -192,7 +195,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return skb->len; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 76393f2f4b22..2ed5f964772e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -23,6 +23,9 @@ static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ static DECLARE_RWSEM(cb_lock); +atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq); + void genl_lock(void) { mutex_lock(&genl_mutex); @@ -435,15 +438,18 @@ int genl_unregister_family(struct genl_family *family) genl_lock_all(); - genl_unregister_mc_groups(family); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { if (family->id != rc->id || strcmp(rc->name, family->name)) continue; + genl_unregister_mc_groups(family); + list_del(&rc->family_list); family->n_ops = 0; - genl_unlock_all(); + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -756,7 +762,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, nla_nest_end(skb, nla_grps); } - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -796,7 +803,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family, nla_nest_end(skb, nest); nla_nest_end(skb, nla_grps); - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -983,11 +991,63 @@ static struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static int genl_bind(struct net *net, int group) +{ + int i, err = -ENOENT; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); + else + err = 0; + break; + } + } + } + up_read(&cb_lock); + + return err; +} + +static void genl_unbind(struct net *net, int group) +{ + int i; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (f->mcast_unbind) + f->mcast_unbind(net, fam_grp); + break; + } + } + } + up_read(&cb_lock); +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, + .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ diff --git a/net/nfc/core.c b/net/nfc/core.c index 819b87702b70..7f1b6351755c 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -555,7 +555,6 @@ EXPORT_SYMBOL(nfc_find_se); int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) { - struct nfc_se *se; int rc; @@ -605,7 +604,6 @@ error: int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) { - struct nfc_se *se; int rc; diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 91df487aa0a9..844673cb7c18 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -116,23 +116,6 @@ int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event, } EXPORT_SYMBOL(nfc_hci_send_event); -int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response, - const u8 *param, size_t param_len) -{ - u8 pipe; - - pr_debug("\n"); - - pipe = hdev->gate2pipe[gate]; - if (pipe == NFC_HCI_INVALID_PIPE) - return -EADDRNOTAVAIL; - - return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE, - response, param, param_len, NULL, NULL, - 0); -} -EXPORT_SYMBOL(nfc_hci_send_response); - /* * Execute an hci command sent to gate. * skb will contain response data if success. skb can be NULL if you are not @@ -331,7 +314,7 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) if (r < 0) return r; - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); return 0; } @@ -345,7 +328,7 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, pr_debug("\n"); - if (hdev->gate2pipe[dest_gate] == NFC_HCI_DO_NOT_CREATE_PIPE) + if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE) return 0; if (hdev->gate2pipe[dest_gate] != NFC_HCI_INVALID_PIPE) @@ -380,6 +363,8 @@ open_pipe: return r; } + hdev->pipes[pipe].gate = dest_gate; + hdev->pipes[pipe].dest_host = dest_host; hdev->gate2pipe[dest_gate] = pipe; return 0; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index ef50e7716c4a..6e061da2258a 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -46,6 +46,32 @@ int nfc_hci_result_to_errno(u8 result) } EXPORT_SYMBOL(nfc_hci_result_to_errno); +void nfc_hci_reset_pipes(struct nfc_hci_dev *hdev) +{ + int i = 0; + + for (i = 0; i < NFC_HCI_MAX_PIPES; i++) { + hdev->pipes[i].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST; + } + memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); +} +EXPORT_SYMBOL(nfc_hci_reset_pipes); + +void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host) +{ + int i = 0; + + for (i = 0; i < NFC_HCI_MAX_PIPES; i++) { + if (hdev->pipes[i].dest_host != host) + continue; + + hdev->pipes[i].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST; + } +} +EXPORT_SYMBOL(nfc_hci_reset_pipes_per_host); + static void nfc_hci_msg_tx_work(struct work_struct *work) { struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev, @@ -167,48 +193,69 @@ exit: void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - int r = 0; - u8 gate = nfc_hci_pipe2gate(hdev, pipe); - u8 local_gate, new_pipe; - u8 gate_opened = 0x00; + u8 gate = hdev->pipes[pipe].gate; + u8 status = NFC_HCI_ANY_OK; + struct hci_create_pipe_resp *create_info; + struct hci_delete_pipe_noti *delete_info; + struct hci_all_pipe_cleared_noti *cleared_info; pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: if (skb->len != 5) { - r = -EPROTO; - break; + status = NFC_HCI_ANY_E_NOK; + goto exit; } + create_info = (struct hci_create_pipe_resp *)skb->data; - local_gate = skb->data[3]; - new_pipe = skb->data[4]; - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0); - - /* save the new created pipe and bind with local gate, + /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we * are the destination and it is our local gate */ - hdev->gate2pipe[local_gate] = new_pipe; + hdev->gate2pipe[create_info->dest_gate] = create_info->pipe; + hdev->pipes[create_info->pipe].gate = create_info->dest_gate; + hdev->pipes[create_info->pipe].dest_host = + create_info->src_host; break; case NFC_HCI_ANY_OPEN_PIPE: - /* if the pipe is already created, we allow remote host to - * open it - */ - if (gate != 0xff) - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, - &gate_opened, 1); + if (gate == NFC_HCI_INVALID_GATE) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + break; + case NFC_HCI_ADM_NOTIFY_PIPE_DELETED: + if (skb->len != 1) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + delete_info = (struct hci_delete_pipe_noti *)skb->data; + + hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST; break; case NFC_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0); + if (skb->len != 1) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + cleared_info = (struct hci_all_pipe_cleared_noti *)skb->data; + + nfc_hci_reset_pipes_per_host(hdev, cleared_info->host); break; default: pr_info("Discarded unknown cmd %x to gate %x\n", cmd, gate); - r = -EINVAL; break; } + if (hdev->ops->cmd_received) + hdev->ops->cmd_received(hdev, pipe, cmd, skb); + +exit: + nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE, + status, NULL, 0, NULL, NULL, 0); + kfree_skb(skb); } @@ -330,15 +377,15 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = nfc_hci_pipe2gate(hdev, pipe); + u8 gate = hdev->pipes[pipe].gate; - if (gate == 0xff) { + if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; } if (hdev->ops->event_received) { - r = hdev->ops->event_received(hdev, gate, event, skb); + r = hdev->ops->event_received(hdev, pipe, event, skb); if (r <= 0) goto exit_noskb; } @@ -573,7 +620,7 @@ static int hci_dev_down(struct nfc_dev *nfc_dev) if (hdev->ops->close) hdev->ops->close(hdev); - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); return 0; } @@ -932,7 +979,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, nfc_set_drvdata(hdev->ndev, hdev); - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); hdev->quirks = quirks; diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index c3d2e2c1394c..ab4c8e80b1ad 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -65,6 +65,14 @@ struct hci_create_pipe_resp { u8 pipe; } __packed; +struct hci_delete_pipe_noti { + u8 pipe; +} __packed; + +struct hci_all_pipe_cleared_noti { + u8 host; +} __packed; + #define NFC_HCI_FRAGMENT 0x7f #define HCP_HEADER(type, instr) ((((type) & 0x03) << 6) | ((instr) & 0x3f)) @@ -77,8 +85,6 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, data_exchange_cb_t cb, void *cb_context, unsigned long completion_delay); -u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe); - void nfc_hci_hcp_message_rx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, u8 instruction, struct sk_buff *skb); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index e9de1514656e..1fe725d66085 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -124,17 +124,6 @@ out_skb_err: return err; } -u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe) -{ - int gate; - - for (gate = 0; gate < NFC_HCI_MAX_GATES; gate++) - if (hdev->gate2pipe[gate] == pipe) - return gate; - - return 0xff; -} - /* * Receive hcp message for pipe, with type and cmd. * skb contains optional message data only. diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 44989fc8cddf..be387e6219a0 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -102,7 +102,8 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, goto nla_put_failure; } - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -518,7 +519,8 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -908,7 +910,8 @@ static int nfc_genl_send_params(struct sk_buff *msg, nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux))) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: @@ -1247,8 +1250,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev, nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) goto nla_put_failure; - if (genlmsg_end(msg, hdr) < 0) - goto nla_put_failure; + genlmsg_end(msg, hdr); } return 0; diff --git a/net/nonet.c b/net/nonet.c deleted file mode 100644 index b1a73fda9c12..000000000000 --- a/net/nonet.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * net/nonet.c - * - * Dummy functions to allow us to configure network support entirely - * out of the kernel. - * - * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) Matthew Wilcox 2003 - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/kernel.h> - -static int sock_no_open(struct inode *irrelevant, struct file *dontcare) -{ - return -ENXIO; -} - -const struct file_operations bad_sock_fops = { - .owner = THIS_MODULE, - .open = sock_no_open, - .llseek = noop_llseek, -}; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 764fdc39c63b..b4cffe686126 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -147,7 +147,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; - skb_set_inner_protocol(skb, skb->protocol); + if (!skb->inner_protocol) + skb_set_inner_protocol(skb, skb->protocol); skb->protocol = mpls->mpls_ethertype; invalidate_flow_key(key); @@ -211,7 +212,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) int err; err = skb_vlan_pop(skb); - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = 0; @@ -221,7 +222,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = vlan->vlan_tci; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 332b5a031739..ae5e77cdc0ca 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; static struct genl_family dp_datapath_genl_family; +static const struct nla_policy flow_policy[]; + static const struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP, }; @@ -83,8 +85,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, unsigned int group) { return info->nlhdr->nlmsg_flags & NLM_F_ECHO || - genl_has_listeners(family, genl_info_net(info)->genl_sock, - group); + genl_has_listeners(family, genl_info_net(info), group); } static void ovs_notify(struct genl_family *family, @@ -420,7 +421,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (!dp_ifindex) return -ENODEV; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; @@ -462,10 +463,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 0, upcall_info->cmd); upcall->dp_ifindex = dp_ifindex; - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - err = ovs_nla_put_flow(key, key, user_skb); + err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); BUG_ON(err); - nla_nest_end(user_skb, nla); if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -525,7 +524,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct vport *input_vport; int len; int err; - bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool log = !a[OVS_PACKET_ATTR_PROBE]; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || @@ -611,6 +610,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, }; static const struct genl_ops dp_packet_genl_ops[] = { @@ -664,46 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, } } -static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) +static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) { - return NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */ - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */ - + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ - + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ - + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ - + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ + return ovs_identifier_is_ufid(sfid) && + !(ufid_flags & OVS_UFID_F_OMIT_KEY); } -/* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_match(const struct sw_flow *flow, - struct sk_buff *skb) +static bool should_fill_mask(uint32_t ufid_flags) { - struct nlattr *nla; - int err; + return !(ufid_flags & OVS_UFID_F_OMIT_MASK); +} - /* Fill flow key. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); - if (!nla) - return -EMSGSIZE; +static bool should_fill_actions(uint32_t ufid_flags) +{ + return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); +} - err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); - if (err) - return err; +static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, + uint32_t ufid_flags) +{ + size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - nla_nest_end(skb, nla); + /* OVS_FLOW_ATTR_UFID */ + if (sfid && ovs_identifier_is_ufid(sfid)) + len += nla_total_size(sfid->ufid_len); - /* Fill flow mask. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); - if (!nla) - return -EMSGSIZE; + /* OVS_FLOW_ATTR_KEY */ + if (!sfid || should_fill_key(sfid, ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); - if (err) - return err; + /* OVS_FLOW_ATTR_MASK */ + if (should_fill_mask(ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - nla_nest_end(skb, nla); - return 0; + /* OVS_FLOW_ATTR_ACTIONS */ + if (should_fill_actions(ufid_flags)) + len += nla_total_size(acts->actions_len); + + return len + + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + + nla_total_size(8); /* OVS_FLOW_ATTR_USED */ } /* Called with ovs_mutex or RCU read lock. */ @@ -774,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, - u32 seq, u32 flags, u8 cmd) + u32 seq, u32 flags, u8 cmd, u32 ufid_flags) { const int skb_orig_len = skb->len; struct ovs_header *ovs_header; @@ -787,19 +789,34 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, ovs_header->dp_ifindex = dp_ifindex; - err = ovs_flow_cmd_fill_match(flow, skb); + err = ovs_nla_put_identifier(flow, skb); if (err) goto error; + if (should_fill_key(&flow->id, ufid_flags)) { + err = ovs_nla_put_masked_key(flow, skb); + if (err) + goto error; + } + + if (should_fill_mask(ufid_flags)) { + err = ovs_nla_put_mask(flow, skb); + if (err) + goto error; + } + err = ovs_flow_cmd_fill_stats(flow, skb); if (err) goto error; - err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); - if (err) - goto error; + if (should_fill_actions(ufid_flags)) { + err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); + if (err) + goto error; + } - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; error: genlmsg_cancel(skb, ovs_header); @@ -808,15 +825,19 @@ error: /* May not be called with RCU read lock. */ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, struct genl_info *info, - bool always) + bool always, + uint32_t ufid_flags) { struct sk_buff *skb; + size_t len; if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) return NULL; - skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); + len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); + skb = genlmsg_new_unicast(len, info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -827,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, int dp_ifindex, struct genl_info *info, u8 cmd, - bool always) + bool always, u32 ufid_flags) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, - always); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), + &flow->id, info, always, ufid_flags); if (IS_ERR_OR_NULL(skb)) return skb; retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, - cmd); + cmd, ufid_flags); BUG_ON(retval < 0); return skb; } @@ -848,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow *flow, *new_flow; + struct sw_flow *flow = NULL, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; + struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; @@ -878,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->unmasked_key, &mask); + ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + ovs_flow_mask_key(&new_flow->key, &key, &mask); + + /* Extract flow identifier. */ + error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], + &key, log); + if (error) + goto err_kfree_flow; /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, @@ -894,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; } - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -906,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -ENODEV; goto err_unlock_ovs; } + /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (ovs_identifier_is_ufid(&new_flow->id)) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); + if (!flow) + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); @@ -923,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -941,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -EEXIST; goto err_unlock_ovs; } - /* The unmasked key has to be the same for flow updates. */ - if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - /* Look for any overlapping flow. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + /* The flow identifier has to be the same for flow updates. + * Look for any overlapping flow. + */ + if (unlikely(!ovs_flow_cmp(flow, &match))) { + if (ovs_identifier_is_key(&flow->id)) + flow = ovs_flow_tbl_lookup_exact(&dp->table, + &match); + else /* UFID matches but key is different */ + flow = NULL; if (!flow) { error = -ENOENT; goto err_unlock_ovs; @@ -959,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -1015,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; + struct sw_flow_id sfid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; /* Extract key. */ error = -EINVAL; @@ -1025,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto error; } + ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); @@ -1041,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) } /* Can allocate before locking if have acts. */ - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -1055,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; } /* Check that the flow exists. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { error = -ENOENT; goto err_unlock_ovs; @@ -1071,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } } else { /* Could not alloc without acts before locking. */ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); + info, OVS_FLOW_CMD_NEW, false, + ufid_flags); + if (unlikely(IS_ERR(reply))) { error = PTR_ERR(reply); goto err_unlock_ovs; @@ -1114,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct datapath *dp; struct sw_flow_match match; - int err; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); + int err = 0; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (!a[OVS_FLOW_ATTR_KEY]) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + log); + } else if (!ufid_present) { OVS_NLERR(log, "Flow get message rejected, Key attribute missing."); - return -EINVAL; + err = -EINVAL; } - - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (err) return err; @@ -1135,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { err = -ENOENT; goto unlock; } reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, - OVS_FLOW_CMD_NEW, true); + OVS_FLOW_CMD_NEW, true, ufid_flags); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1161,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; - struct sw_flow *flow; + struct sw_flow *flow = NULL; struct datapath *dp; struct sw_flow_match match; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int err; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (likely(a[OVS_FLOW_ATTR_KEY])) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); @@ -1182,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { err = -ENOENT; goto unlock; @@ -1197,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_unlock(); reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, - info, false); + &flow->id, info, false, ufid_flags); if (likely(reply)) { if (likely(!IS_ERR(reply))) { rcu_read_lock(); /*To keep RCU checker happy. */ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_DEL); + OVS_FLOW_CMD_DEL, + ufid_flags); rcu_read_unlock(); BUG_ON(err < 0); @@ -1223,9 +1291,18 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *a[__OVS_FLOW_ATTR_MAX]; struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct table_instance *ti; struct datapath *dp; + u32 ufid_flags; + int err; + + err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy); + if (err) + return err; + ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); rcu_read_lock(); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1248,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_FLOW_CMD_NEW) < 0) + OVS_FLOW_CMD_NEW, ufid_flags) < 0) break; cb->args[0] = bucket; @@ -1264,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, + [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, }; static const struct genl_ops dp_flow_genl_ops[] = { @@ -1349,7 +1428,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) goto nla_put_failure; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: genlmsg_cancel(skb, ovs_header); @@ -1723,7 +1803,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (err == -EMSGSIZE) goto error; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: err = -EMSGSIZE; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 70bef2ab7f2b..e2c348b8baca 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -70,6 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); + int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[node]); @@ -105,7 +106,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, if (likely(new_stats)) { new_stats->used = jiffies; new_stats->packet_count = 1; - new_stats->byte_count = skb->len; + new_stats->byte_count = len; new_stats->tcp_flags = tcp_flags; spin_lock_init(&new_stats->lock); @@ -120,7 +121,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, stats->used = jiffies; stats->packet_count++; - stats->byte_count += skb->len; + stats->byte_count += len; stats->tcp_flags |= tcp_flags; unlock: spin_unlock(&stats->lock); @@ -471,7 +472,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) */ key->eth.tci = 0; - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); else if (eth->h_proto == htons(ETH_P_8021Q)) if (unlikely(parse_vlan(skb, key))) @@ -690,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1 > sizeof(key->tun_opts)); - memcpy(GENEVE_OPTS(key, tun_info->options_len), + memcpy(TUN_METADATA_OPTS(key, tun_info->options_len), tun_info->options, tun_info->options_len); key->tun_opts_len = tun_info->options_len; } else { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a8b30f334388..a076e445ccc2 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel { struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; - const struct geneve_opt *options; + const void *options; u8 options_len; }; @@ -61,10 +61,10 @@ struct ovs_tunnel_info { * maximum size. This allows us to get the benefits of variable length * matching for small options. */ -#define GENEVE_OPTS(flow_key, opt_len) \ - ((struct geneve_opt *)((flow_key)->tun_opts + \ - FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ - opt_len)) +#define TUN_METADATA_OFFSET(opt_len) \ + (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) +#define TUN_METADATA_OPTS(flow_key, opt_len) \ + ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be32 saddr, __be32 daddr, @@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { tun_info->tunnel.tun_id = tun_id; @@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, @@ -197,6 +197,16 @@ struct sw_flow_match { struct sw_flow_mask *mask; }; +#define MAX_UFID_LENGTH 16 /* 128 bits */ + +struct sw_flow_id { + u32 ufid_len; + union { + u32 ufid[MAX_UFID_LENGTH / 4]; + struct sw_flow_key *unmasked_key; + }; +}; + struct sw_flow_actions { struct rcu_head rcu; u32 actions_len; @@ -213,13 +223,15 @@ struct flow_stats { struct sw_flow { struct rcu_head rcu; - struct hlist_node hash_node[2]; - u32 hash; + struct { + struct hlist_node node[2]; + u32 hash; + } flow_table, ufid_table; int stats_last_writer; /* NUMA-node id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; - struct sw_flow_key unmasked_key; + struct sw_flow_id id; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one @@ -243,6 +255,16 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) +{ + return sfid->ufid_len; +} + +static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid) +{ + return !ovs_identifier_is_ufid(sfid); +} + void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, const struct sk_buff *); void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 9645a21d9eaa..8b9a612b39d1 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,14 @@ #include <net/mpls.h> #include "flow_netlink.h" +#include "vport-vxlan.h" + +struct ovs_len_tbl { + int len; + const struct ovs_len_tbl *next; +}; + +#define OVS_ATTR_NESTED -1 static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) @@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. + */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } @@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } +static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), - [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), - [OVS_KEY_ATTR_TUNNEL] = -1, - [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), +static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, + [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, + [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, + [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, + .next = ovs_tunnel_key_lens, }, + [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { + expected_len = ovs_key_lens[type].len; + if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; @@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); } - opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, - nla_len(a)); + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), nla_len(a), is_mask); return 0; } +static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, +}; + +static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + unsigned long opt_key_offset; + struct ovs_vxlan_opts opts; + int err; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); + if (err < 0) + return err; + + memset(&opts, 0, sizeof(opts)); + + if (tb[OVS_VXLAN_EXT_GBP]) + opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + return 0; +} + static int ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + int opts_type = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); int err; - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_OAM] = 0, - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, - }; - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { OVS_NLERR(log, "Tunnel attr %d out of range max %d", type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a) && - ovs_tunnel_key_lens[type] != -1) { + if (ovs_tunnel_key_lens[type].len != nla_len(a) && + ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", - type, nla_len(a), ovs_tunnel_key_lens[type]); + type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; } @@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; - tun_flags |= TUNNEL_OPTIONS_PRESENT; + tun_flags |= TUNNEL_GENEVE_OPT; + opts_type = type; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_VXLAN_OPT; + opts_type = type; break; default: OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", @@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } } + return opts_type; +} + +static int vxlan_opt_to_nlattr(struct sk_buff *skb, + const void *tun_opts, int swkey_tun_opts_len) +{ + const struct ovs_vxlan_opts *opts = tun_opts; + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + if (!nla) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } static int __ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) @@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts && - nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, - swkey_tun_opts_len, tun_opts)) - return -EMSGSIZE; + if (tun_opts) { + if (output->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_VXLAN_OPT && + vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) + return -EMSGSIZE; + } return 0; } static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { struct nlattr *nla; int err; @@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask, log)) + is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return 0; } -static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) +static void nlattr_set(struct nlattr *attr, u8 val, + const struct ovs_len_tbl *tbl) { struct nlattr *nla; int rem; /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - /* We assume that ovs_key_lens[type] == -1 means that type is a - * nested attribute - */ - if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) - nlattr_set(nla, val, false); + if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) + nlattr_set(nla, val, tbl[nla_type(nla)].next); else memset(nla_data(nla), val, nla_len(nla)); } @@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) static void mask_set_nlattr(struct nlattr *attr, u8 val) { - nlattr_set(attr, val, true); + nlattr_set(attr, val, ovs_key_lens); } /** @@ -1095,6 +1180,59 @@ free_newmask: return err; } +static size_t get_ufid_len(const struct nlattr *attr, bool log) +{ + size_t len; + + if (!attr) + return 0; + + len = nla_len(attr); + if (len < 1 || len > MAX_UFID_LENGTH) { + OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", + nla_len(attr), MAX_UFID_LENGTH); + return 0; + } + + return len; +} + +/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, + * or false otherwise. + */ +bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, + bool log) +{ + sfid->ufid_len = get_ufid_len(attr, log); + if (sfid->ufid_len) + memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); + + return sfid->ufid_len; +} + +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log) +{ + struct sw_flow_key *new_key; + + if (ovs_nla_get_ufid(sfid, ufid, log)) + return 0; + + /* If UFID was not provided, use unmasked key. */ + new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); + if (!new_key) + return -ENOMEM; + memcpy(new_key, key, sizeof(*key)); + sfid->unmasked_key = new_key; + + return 0; +} + +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) +{ + return attr ? nla_get_u32(attr) : 0; +} + /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * @key: Receives extracted in_port, priority, tun_key and skb_mark. @@ -1131,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, return metadata_from_nlattrs(&match, &attrs, a, false, log); } -int ovs_nla_put_flow(const struct sw_flow_key *swkey, - const struct sw_flow_key *output, struct sk_buff *skb) +static int __ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, bool is_mask, + struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; struct nlattr *nla, *encap; - bool is_mask = (swkey != output); if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; @@ -1148,10 +1286,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; if ((swkey->tun_key.ipv4_dst || is_mask)) { - const struct geneve_opt *opts = NULL; + const void *opts = NULL; if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) - opts = GENEVE_OPTS(output, swkey->tun_opts_len); + opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, swkey->tun_opts_len)) @@ -1346,6 +1484,49 @@ nla_put_failure: return -EMSGSIZE; } +int ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, int attr, bool is_mask, + struct sk_buff *skb) +{ + int err; + struct nlattr *nla; + + nla = nla_nest_start(skb, attr); + if (!nla) + return -EMSGSIZE; + err = __ovs_nla_put_key(swkey, output, is_mask, skb); + if (err) + return err; + nla_nest_end(skb, nla); + + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, + flow->id.ufid); + + return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->mask->key, &flow->key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->key, &flow->mask->key, + OVS_FLOW_ATTR_MASK, true, skb); +} + #define MAX_ACTIONS_BUFSIZE (32 * 1024) static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) @@ -1540,6 +1721,34 @@ void ovs_match_init(struct sw_flow_match *match, } } +static int validate_geneve_opts(struct sw_flow_key *key) +{ + struct geneve_opt *option; + int opts_len = key->tun_opts_len; + bool crit_opt = false; + + option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + + return 0; +} + static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { @@ -1547,36 +1756,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_key key; struct ovs_tunnel_info *tun_info; struct nlattr *a; - int err, start; + int err, start, opts_type; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); - if (err) - return err; + opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); + if (opts_type < 0) + return opts_type; if (key.tun_opts_len) { - struct geneve_opt *option = GENEVE_OPTS(&key, - key.tun_opts_len); - int opts_len = key.tun_opts_len; - bool crit_opt = false; - - while (opts_len > 0) { - int len; - - if (opts_len < sizeof(*option)) - return -EINVAL; - - len = sizeof(*option) + option->length * 4; - if (len > opts_len) - return -EINVAL; - - crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); - - option = (struct geneve_opt *)((u8 *)option + len); - opts_len -= len; - }; - - key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + switch (opts_type) { + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + err = validate_geneve_opts(&key); + if (err < 0) + return err; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + break; + } }; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); @@ -1597,9 +1793,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, * everything else will go away after flow setup. We can append * it to tun_info and then point there. */ - memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), - key.tun_opts_len); - tun_info->options = (struct geneve_opt *)(tun_info + 1); + memcpy((tun_info + 1), + TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); + tun_info->options = (tun_info + 1); } else { tun_info->options = NULL; } @@ -1622,8 +1818,8 @@ static int validate_set(const struct nlattr *a, return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) + (ovs_key_lens[key_type].len != nla_len(ovs_key) && + ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) return -EINVAL; switch (key_type) { @@ -1753,7 +1949,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *a; - bool out_tnl_port = false; int rem, err; if (depth >= SAMPLE_ACTION_DEPTH) @@ -1796,8 +1991,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; - out_tnl_port = false; - break; case OVS_ACTION_ATTR_HASH: { @@ -1832,12 +2025,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nla_data(a); - /* Networking stack do not allow simultaneous Tunnel - * and MPLS GSO. - */ - if (out_tnl_port) - return -EINVAL; - if (!eth_p_mpls(mpls->mpls_ethertype)) return -EINVAL; /* Prohibit push MPLS other than to a white list @@ -1873,11 +2060,9 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &out_tnl_port, eth_type, log); + &skip_copy, eth_type, log); if (err) return err; - - skip_copy = out_tnl_port; break; case OVS_ACTION_ATTR_SAMPLE: diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 577f12be3459..5c3d75bff310 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); -int ovs_nla_put_flow(const struct sw_flow_key *, - const struct sw_flow_key *, struct sk_buff *); +int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, + int attr, bool is_mask, struct sk_buff *); int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, bool log); +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); + int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); +bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log); +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr); + int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 5899bf161c61..5e57628e6584 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -139,6 +139,8 @@ static void flow_free(struct sw_flow *flow) { int node; + if (ovs_identifier_is_key(&flow->id)) + kfree(flow->id.unmasked_key); kfree((struct sw_flow_actions __force *)flow->sf_acts); for_each_node(node) if (flow->stats[node]) @@ -200,18 +202,28 @@ static struct table_instance *table_instance_alloc(int new_size) int ovs_flow_tbl_init(struct flow_table *table) { - struct table_instance *ti; + struct table_instance *ti, *ufid_ti; ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) return -ENOMEM; + ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!ufid_ti) + goto free_ti; + rcu_assign_pointer(table->ti, ti); + rcu_assign_pointer(table->ufid_ti, ufid_ti); INIT_LIST_HEAD(&table->mask_list); table->last_rehash = jiffies; table->count = 0; + table->ufid_count = 0; return 0; + +free_ti: + __table_instance_destroy(ti); + return -ENOMEM; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) @@ -221,13 +233,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, bool deferred) +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti, + bool deferred) { int i; if (!ti) return; + BUG_ON(!ufid_ti); if (ti->keep_flows) goto skip_flows; @@ -236,18 +251,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred) struct hlist_head *head = flex_array_get(ti->buckets, i); struct hlist_node *n; int ver = ti->node_ver; + int ufid_ver = ufid_ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del_rcu(&flow->hash_node[ver]); + hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { + hlist_del_rcu(&flow->flow_table.node[ver]); + if (ovs_identifier_is_ufid(&flow->id)) + hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); ovs_flow_free(flow, deferred); } } skip_flows: - if (deferred) + if (deferred) { call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - else + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); + } else { __table_instance_destroy(ti); + __table_instance_destroy(ufid_ti); + } } /* No need for locking this function is called from RCU callback or @@ -256,8 +277,9 @@ skip_flows: void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); + struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); - table_instance_destroy(ti, false); + table_instance_destroy(ti, ufid_ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -272,7 +294,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, while (*bucket < ti->n_buckets) { i = 0; head = flex_array_get(ti->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { if (i < *last) { i++; continue; @@ -294,16 +316,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) (hash & (ti->n_buckets - 1))); } -static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +static void table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->flow_table.hash); + hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head); +} + +static void ufid_table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) { struct hlist_head *head; - head = find_bucket(ti, flow->hash); - hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); + head = find_bucket(ti, flow->ufid_table.hash); + hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head); } static void flow_table_copy_flows(struct table_instance *old, - struct table_instance *new) + struct table_instance *new, bool ufid) { int old_ver; int i; @@ -318,15 +350,21 @@ static void flow_table_copy_flows(struct table_instance *old, head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, head, hash_node[old_ver]) - table_instance_insert(new, flow); + if (ufid) + hlist_for_each_entry(flow, head, + ufid_table.node[old_ver]) + ufid_table_instance_insert(new, flow); + else + hlist_for_each_entry(flow, head, + flow_table.node[old_ver]) + table_instance_insert(new, flow); } old->keep_flows = true; } static struct table_instance *table_instance_rehash(struct table_instance *ti, - int n_buckets) + int n_buckets, bool ufid) { struct table_instance *new_ti; @@ -334,32 +372,45 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti, if (!new_ti) return NULL; - flow_table_copy_flows(ti, new_ti); + flow_table_copy_flows(ti, new_ti, ufid); return new_ti; } int ovs_flow_tbl_flush(struct flow_table *flow_table) { - struct table_instance *old_ti; - struct table_instance *new_ti; + struct table_instance *old_ti, *new_ti; + struct table_instance *old_ufid_ti, *new_ufid_ti; - old_ti = ovsl_dereference(flow_table->ti); new_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!new_ti) return -ENOMEM; + new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ufid_ti) + goto err_free_ti; + + old_ti = ovsl_dereference(flow_table->ti); + old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); rcu_assign_pointer(flow_table->ti, new_ti); + rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); flow_table->last_rehash = jiffies; flow_table->count = 0; + flow_table->ufid_count = 0; - table_instance_destroy(old_ti, true); + table_instance_destroy(old_ti, old_ufid_ti, true); return 0; + +err_free_ti: + __table_instance_destroy(new_ti); + return -ENOMEM; } -static u32 flow_hash(const struct sw_flow_key *key, int key_start, - int key_end) +static u32 flow_hash(const struct sw_flow_key *key, + const struct sw_flow_key_range *range) { + int key_start = range->start; + int key_end = range->end; const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; @@ -395,19 +446,20 @@ static bool cmp_key(const struct sw_flow_key *key1, static bool flow_cmp_masked_key(const struct sw_flow *flow, const struct sw_flow_key *key, - int key_start, int key_end) + const struct sw_flow_key_range *range) { - return cmp_key(&flow->key, key, key_start, key_end); + return cmp_key(&flow->key, key, range->start, range->end); } -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match) +static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + const struct sw_flow_match *match) { struct sw_flow_key *key = match->key; int key_start = flow_key_start(key); int key_end = match->range.end; - return cmp_key(&flow->unmasked_key, key, key_start, key_end); + BUG_ON(ovs_identifier_is_ufid(&flow->id)); + return cmp_key(flow->id.unmasked_key, key, key_start, key_end); } static struct sw_flow *masked_flow_lookup(struct table_instance *ti, @@ -416,18 +468,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, { struct sw_flow *flow; struct hlist_head *head; - int key_start = mask->range.start; - int key_end = mask->range.end; u32 hash; struct sw_flow_key masked_key; ovs_flow_mask_key(&masked_key, unmasked, mask); - hash = flow_hash(&masked_key, key_start, key_end); + hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { - if (flow->mask == mask && flow->hash == hash && - flow_cmp_masked_key(flow, &masked_key, - key_start, key_end)) + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { + if (flow->mask == mask && flow->flow_table.hash == hash && + flow_cmp_masked_key(flow, &masked_key, &mask->range)) return flow; } return NULL; @@ -469,7 +518,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, /* Always called under ovs-mutex. */ list_for_each_entry(mask, &tbl->mask_list, list) { flow = masked_flow_lookup(ti, match->key, mask); - if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ + if (flow && ovs_identifier_is_key(&flow->id) && + ovs_flow_cmp_unmasked_key(flow, match)) + return flow; + } + return NULL; +} + +static u32 ufid_hash(const struct sw_flow_id *sfid) +{ + return jhash(sfid->ufid, sfid->ufid_len, 0); +} + +static bool ovs_flow_cmp_ufid(const struct sw_flow *flow, + const struct sw_flow_id *sfid) +{ + if (flow->id.ufid_len != sfid->ufid_len) + return false; + + return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); +} + +bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return flow_cmp_masked_key(flow, match->key, &match->range); + + return ovs_flow_cmp_unmasked_key(flow, match); +} + +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, + const struct sw_flow_id *ufid) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti); + struct sw_flow *flow; + struct hlist_head *head; + u32 hash; + + hash = ufid_hash(ufid); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { + if (flow->ufid_table.hash == hash && + ovs_flow_cmp_ufid(flow, ufid)) return flow; } return NULL; @@ -486,9 +576,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) return num; } -static struct table_instance *table_instance_expand(struct table_instance *ti) +static struct table_instance *table_instance_expand(struct table_instance *ti, + bool ufid) { - return table_instance_rehash(ti, ti->n_buckets * 2); + return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -513,10 +604,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); + struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[ti->node_ver]); + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); table->count--; + if (ovs_identifier_is_ufid(&flow->id)) { + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); + table->ufid_count--; + } /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be * accessible as long as the RCU read lock is held. @@ -585,34 +681,64 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, } /* Must be called with OVS mutex held. */ -int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - const struct sw_flow_mask *mask) +static void flow_key_insert(struct flow_table *table, struct sw_flow *flow) { struct table_instance *new_ti = NULL; struct table_instance *ti; - int err; - - err = flow_mask_insert(table, flow, mask); - if (err) - return err; - flow->hash = flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); + flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range); ti = ovsl_dereference(table->ti); table_instance_insert(ti, flow); table->count++; /* Expand table, if necessary, to make room. */ if (table->count > ti->n_buckets) - new_ti = table_instance_expand(ti); + new_ti = table_instance_expand(ti, false); else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) - new_ti = table_instance_rehash(ti, ti->n_buckets); + new_ti = table_instance_rehash(ti, ti->n_buckets, false); if (new_ti) { rcu_assign_pointer(table->ti, new_ti); - table_instance_destroy(ti, true); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); table->last_rehash = jiffies; } +} + +/* Must be called with OVS mutex held. */ +static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct table_instance *ti; + + flow->ufid_table.hash = ufid_hash(&flow->id); + ti = ovsl_dereference(table->ufid_ti); + ufid_table_instance_insert(ti, flow); + table->ufid_count++; + + /* Expand table, if necessary, to make room. */ + if (table->ufid_count > ti->n_buckets) { + struct table_instance *new_ti; + + new_ti = table_instance_expand(ti, true); + if (new_ti) { + rcu_assign_pointer(table->ufid_ti, new_ti); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + } + } +} + +/* Must be called with OVS mutex held. */ +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + const struct sw_flow_mask *mask) +{ + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + flow_key_insert(table, flow); + if (ovs_identifier_is_ufid(&flow->id)) + flow_ufid_insert(table, flow); + return 0; } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 309fa6415689..616eda10d955 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -47,9 +47,11 @@ struct table_instance { struct flow_table { struct table_instance __rcu *ti; + struct table_instance __rcu *ufid_ti; struct list_head mask_list; unsigned long last_rehash; unsigned int count; + unsigned int ufid_count; }; extern struct kmem_cache *flow_stats_cache; @@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, const struct sw_flow_match *match); -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match); +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *, + const struct sw_flow_id *); + +bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 347fa2325b22..bf02fd5808c9 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -9,8 +9,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/version.h> - #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> @@ -90,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) opts_len = geneveh->opt_len * 4; - flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | (geneveh->oam ? TUNNEL_OAM : 0) | (geneveh->critical ? TUNNEL_CRIT_OPT : 0); @@ -172,7 +170,7 @@ error: static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) { - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct ovs_tunnel_info *tun_info; struct net *net = ovs_dp_get_net(vport->dp); struct geneve_port *geneve_port = geneve_vport(vport); @@ -180,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 sport; struct rtable *rt; struct flowi4 fl; - u8 vni[3]; + u8 vni[3], opts_len, *opts; __be16 df; int err; @@ -191,16 +189,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &tun_info->tunnel; - - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -211,15 +200,25 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) tunnel_id_to_vni(tun_key->tun_id, vni); skb->ignore_df = 1; + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { + opts = (u8 *)tun_info->options; + opts_len = tun_info->options_len; + } else { + opts = NULL; + opts_len = 0; + } + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, sport, dport, - tun_key->tun_flags, vni, - tun_info->options_len, (u8 *)tun_info->options, - false); + tun_key->tun_flags, vni, opts_len, opts, + !!(tun_key->tun_flags & TUNNEL_CSUM), false); if (err < 0) ip_rt_put(rt); + return err; + error: + kfree_skb(skb); return err; } diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 6b69df545b1d..f17ac9642f4e 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -73,7 +73,7 @@ static struct sk_buff *__build_header(struct sk_buff *skb, skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); if (IS_ERR(skb)) - return NULL; + return skb; tpi.flags = filter_tnl_flags(tun_key->tun_flags); tpi.proto = htons(ETH_P_TEB); @@ -134,7 +134,7 @@ static int gre_err(struct sk_buff *skb, u32 info, static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct flowi4 fl; struct rtable *rt; int min_headroom; @@ -144,27 +144,21 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; - goto error; + goto err_free_skb; } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_GRE; - - rt = ip_route_output_key(net, &fl); - if (IS_ERR(rt)) - return PTR_ERR(rt); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto err_free_skb; + } tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + @@ -183,8 +177,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) /* Push Tunnel header. */ skb = __build_header(skb, tunnel_hlen); - if (unlikely(!skb)) { - err = 0; + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + skb = NULL; goto err_free_rt; } @@ -198,7 +193,8 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false); err_free_rt: ip_rt_put(rt); -error: +err_free_skb: + kfree_skb(skb); return err; } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 38f95a52241b..ff07d4062d60 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -40,6 +40,7 @@ #include "datapath.h" #include "vport.h" +#include "vport-vxlan.h" /** * struct vxlan_port - Keeps track of open UDP ports @@ -49,6 +50,7 @@ struct vxlan_port { struct vxlan_sock *vs; char name[IFNAMSIZ]; + u32 exts; /* VXLAN_F_* in <net/vxlan.h> */ }; static struct vport_ops ovs_vxlan_vport_ops; @@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) } /* Called with rcu_read_lock and BH disabled. */ -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + struct vxlan_metadata *md) { struct ovs_tunnel_info tun_info; + struct vxlan_port *vxlan_port; struct vport *vport = vs->data; struct iphdr *iph; + struct ovs_vxlan_opts opts = { + .gbp = md->gbp, + }; __be64 key; + __be16 flags; + + flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0); + vxlan_port = vxlan_vport(vport); + if (vxlan_port->exts & VXLAN_F_GBP) + flags |= TUNNEL_VXLAN_OPT; /* Save outer tunnel values */ iph = ip_hdr(skb); - key = cpu_to_be64(ntohl(vx_vni) >> 8); + key = cpu_to_be64(ntohl(md->vni) >> 8); ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, TUNNEL_KEY, NULL, 0); + key, flags, &opts, sizeof(opts)); ovs_vport_receive(vport, skb, &tun_info); } @@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; + + if (vxlan_port->exts) { + struct nlattr *exts; + + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + if (!exts) + return -EMSGSIZE; + + if (vxlan_port->exts & VXLAN_F_GBP && + nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) + return -EMSGSIZE; + + nla_nest_end(skb, exts); + } + return 0; } @@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport) ovs_vport_deferred_free(vport); } +static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, +}; + +static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr) +{ + struct nlattr *exts[OVS_VXLAN_EXT_MAX+1]; + struct vxlan_port *vxlan_port; + int err; + + if (nla_len(attr) < sizeof(struct nlattr)) + return -EINVAL; + + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + if (err < 0) + return err; + + vxlan_port = vxlan_vport(vport); + + if (exts[OVS_VXLAN_EXT_GBP]) + vxlan_port->exts |= VXLAN_F_GBP; + + return 0; +} + static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); + a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); + if (a) { + err = vxlan_configure_exts(vport, a); + if (err) { + ovs_vport_free(vport); + goto error; + } + } + + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, + vxlan_port->exts); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -140,17 +203,34 @@ error: return ERR_PTR(err); } +static int vxlan_ext_gbp(struct sk_buff *skb) +{ + const struct ovs_tunnel_info *tun_info; + const struct ovs_vxlan_opts *opts; + + tun_info = OVS_CB(skb)->egress_tun_info; + opts = tun_info->options; + + if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT && + tun_info->options_len >= sizeof(*opts)) + return opts->gbp; + else + return 0; +} + static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; + struct vxlan_metadata md = {0}; struct rtable *rt; struct flowi4 fl; __be16 src_port; __be16 df; int err; + u32 vxflags; if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; @@ -158,15 +238,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -178,16 +250,20 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->ignore_df = 1; src_port = udp_flow_src_port(net, skb, 0, 0, true); + md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); + md.gbp = vxlan_ext_gbp(skb); + vxflags = vxlan_port->exts | + (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0); - err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, - fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(tun_key->tun_id) << 8), - false); + &md, false, vxflags); if (err < 0) ip_rt_put(rt); + return err; error: + kfree_skb(skb); return err; } diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h new file mode 100644 index 000000000000..4b08233e73d5 --- /dev/null +++ b/net/openvswitch/vport-vxlan.h @@ -0,0 +1,11 @@ +#ifndef VPORT_VXLAN_H +#define VPORT_VXLAN_H 1 + +#include <linux/kernel.h> +#include <linux/types.h> + +struct ovs_vxlan_opts { + __u32 gbp; +}; + +#endif diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 9584526c0778..ec2954ffc690 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -480,7 +480,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += skb->len + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; @@ -519,10 +520,9 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) u64_stats_update_end(&stats->syncp); } else if (sent < 0) { ovs_vport_record_error(vport, VPORT_E_TX_ERROR); - kfree_skb(skb); - } else + } else { ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); - + } return sent; } @@ -595,14 +595,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, * The process may need to be changed if the corresponding process * in vports ops changed. */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb_mark; - fl.flowi4_proto = ipproto; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 99c8e71d9e6c..f8ae295fb001 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -236,4 +236,22 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, int ovs_vport_ops_register(struct vport_ops *ops); void ovs_vport_ops_unregister(struct vport_ops *ops); +static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, + const struct ovs_key_ipv4_tunnel *key, + u32 mark, + struct flowi4 *fl, + u8 protocol) +{ + struct rtable *rt; + + memset(fl, 0, sizeof(*fl)); + fl->daddr = key->ipv4_dst; + fl->saddr = key->ipv4_src; + fl->flowi4_tos = RT_TOS(key->ipv4_tos); + fl->flowi4_mark = mark; + fl->flowi4_proto = protocol; + + rt = ip_route_output_key(net, fl); + return rt; +} #endif /* vport.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e52a44785681..9c28cec1a083 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -785,6 +785,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + struct sock *sk = &po->sk; if (po->stats.stats3.tp_drops) status |= TP_STATUS_LOSING; @@ -809,6 +810,8 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, /* Flush the block */ prb_flush_block(pkc1, pbd1, status); + sk->sk_data_ready(sk); + pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); } @@ -983,8 +986,8 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { - if (vlan_tx_tag_present(pkc->skb)) { - ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); + if (skb_vlan_tag_present(pkc->skb)) { + ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { @@ -1997,8 +2000,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_net = netoff; h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - if (vlan_tx_tag_present(skb)) { - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { @@ -2052,12 +2055,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, smp_wmb(); #endif - if (po->tp_version <= TPACKET_V2) + if (po->tp_version <= TPACKET_V2) { __packet_set_status(po, h.raw, status); - else + sk->sk_data_ready(sk); + } else { prb_clear_blk_fill_status(&po->rx_ring); - - sk->sk_data_ready(sk); + } drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -2099,7 +2102,7 @@ static bool ll_header_truncated(const struct net_device *dev, int len) { /* net device doesn't like empty head */ if (unlikely(len <= dev->hard_header_len)) { - net_warn_ratelimited("%s: packet size is too short (%d < %d)\n", + net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n", current->comm, len, dev->hard_header_len); return true; } @@ -2514,7 +2517,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); - if (unlikely(offset) < 0) + if (unlikely(offset < 0)) goto out_free; } else { if (ll_header_truncated(dev, len)) @@ -3007,8 +3010,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - if (vlan_tx_tag_present(skb)) { - aux.tp_vlan_tci = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { diff --git a/net/packet/diag.c b/net/packet/diag.c index 92f2c7107eec..0ed68f0238bf 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -177,7 +177,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, PACKET_DIAG_FILTER)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index b64151ade6b3..bc5ee5fbe6ae 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -121,7 +121,8 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, ifm->ifa_index = dev->ifindex; if (nla_put_u8(skb, IFA_LOCAL, addr)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -190,7 +191,8 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, if (nla_put_u8(skb, RTA_DST, dst) || nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -270,27 +272,23 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; + u8 addr; rcu_read_lock(); - for (addr = 0; addr < 64; addr++) { - struct net_device *dev; + for (addr = cb->args[0]; addr < 64; addr++) { + struct net_device *dev = phonet_route_get_rcu(net, addr << 2); - dev = phonet_route_get_rcu(net, addr << 2); if (!dev) continue; - if (addr_idx++ < addr_start_idx) - continue; if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE)) + cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0) goto out; } out: rcu_read_unlock(); - cb->args[0] = addr_idx; - cb->args[1] = 0; + cb->args[0] = addr; return skb->len; } diff --git a/net/rds/message.c b/net/rds/message.c index ff2202218187..5a21e6f5986f 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -325,7 +325,8 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) copied = 0; while (iov_iter_count(to) && copied < len) { - to_copy = min(iov_iter_count(to), sg->length - vec_off); + to_copy = min_t(unsigned long, iov_iter_count(to), + sg->length - vec_off); to_copy = min_t(unsigned long, to_copy, len - copied); rds_stats_add(s_copy_to_user, to_copy); diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index a6915b358354..d978f2f46ff3 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -184,7 +184,6 @@ static struct platform_driver rfkill_gpio_driver = { .remove = rfkill_gpio_remove, .driver = { .name = "rfkill_gpio", - .owner = THIS_MODULE, .acpi_match_table = ACPI_PTR(rfkill_acpi_match), }, }; diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c index cf5b145902e5..50cd26a48e87 100644 --- a/net/rfkill/rfkill-regulator.c +++ b/net/rfkill/rfkill-regulator.c @@ -142,7 +142,6 @@ static struct platform_driver rfkill_regulator_driver = { .remove = rfkill_regulator_remove, .driver = { .name = "rfkill-regulator", - .owner = THIS_MODULE, }, }; diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 74c0fcd36838..5394b6be46ec 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + if (!skb->len) { + _leave("UDP empty message"); + kfree_skb(skb); + return; + } rxrpc_new_skb(skb); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index c54c9d9d1ffb..899d0319f2b2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -698,6 +698,30 @@ config NET_ACT_VLAN To compile this code as a module, choose M here: the module will be called act_vlan. +config NET_ACT_BPF + tristate "BPF based action" + depends on NET_CLS_ACT + ---help--- + Say Y here to execute BPF code on packets. The BPF code will decide + if the packet should be dropped or not. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_bpf. + +config NET_ACT_CONNMARK + tristate "Netfilter Connection Mark Retriever" + depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NF_CONNTRACK && NF_CONNTRACK_MARK + ---help--- + Say Y here to allow retrieving of conn mark + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_connmark. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 679f24ae7f93..7ca7f4c1b8c2 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o +obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o +obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c new file mode 100644 index 000000000000..82c5d7fc1988 --- /dev/null +++ b/net/sched/act_bpf.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/filter.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_bpf.h> +#include <net/tc_act/tc_bpf.h> + +#define BPF_TAB_MASK 15 + +static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_bpf *b = a->priv; + int action; + int filter_res; + + spin_lock(&b->tcf_lock); + b->tcf_tm.lastuse = jiffies; + bstats_update(&b->tcf_bstats, skb); + action = b->tcf_action; + + filter_res = BPF_PROG_RUN(b->filter, skb); + if (filter_res == 0) { + /* Return code 0 from the BPF program + * is being interpreted as a drop here. + */ + action = TC_ACT_SHOT; + b->tcf_qstats.drops++; + } + + spin_unlock(&b->tcf_lock); + return action; +} + +static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *tp = skb_tail_pointer(skb); + struct tcf_bpf *b = a->priv; + struct tc_act_bpf opt = { + .index = b->tcf_index, + .refcnt = b->tcf_refcnt - ref, + .bindcnt = b->tcf_bindcnt - bind, + .action = b->tcf_action, + }; + struct tcf_t t; + struct nlattr *nla; + + if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) + goto nla_put_failure; + + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * + sizeof(struct sock_filter)); + if (!nla) + goto nla_put_failure; + + memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); + + t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(b->tcf_tm.expires); + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, tp); + return -1; +} + +static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { + [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, + [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, + [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, + .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, +}; + +static int tcf_bpf_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + struct tcf_bpf *b; + u16 bpf_size, bpf_num_ops; + struct sock_filter *bpf_ops; + struct sock_fprog_kern tmp; + struct bpf_prog *fp; + int ret; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + if (ret < 0) + return ret; + + if (!tb[TCA_ACT_BPF_PARMS] || + !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) + return -EINVAL; + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); + + bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) + return -EINVAL; + + bpf_size = bpf_num_ops * sizeof(*bpf_ops); + if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) + return -EINVAL; + + bpf_ops = kzalloc(bpf_size, GFP_KERNEL); + if (!bpf_ops) + return -ENOMEM; + + memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); + + tmp.len = bpf_num_ops; + tmp.filter = bpf_ops; + + ret = bpf_prog_create(&fp, &tmp); + if (ret) + goto free_bpf_ops; + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); + if (ret) + goto destroy_fp; + + ret = ACT_P_CREATED; + } else { + if (bind) + goto destroy_fp; + tcf_hash_release(a, bind); + if (!ovr) { + ret = -EEXIST; + goto destroy_fp; + } + } + + b = to_bpf(a); + spin_lock_bh(&b->tcf_lock); + b->tcf_action = parm->action; + b->bpf_num_ops = bpf_num_ops; + b->bpf_ops = bpf_ops; + b->filter = fp; + spin_unlock_bh(&b->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(a); + return ret; + +destroy_fp: + bpf_prog_destroy(fp); +free_bpf_ops: + kfree(bpf_ops); + return ret; +} + +static void tcf_bpf_cleanup(struct tc_action *a, int bind) +{ + struct tcf_bpf *b = a->priv; + + bpf_prog_destroy(b->filter); +} + +static struct tc_action_ops act_bpf_ops = { + .kind = "bpf", + .type = TCA_ACT_BPF, + .owner = THIS_MODULE, + .act = tcf_bpf, + .dump = tcf_bpf_dump, + .cleanup = tcf_bpf_cleanup, + .init = tcf_bpf_init, +}; + +static int __init bpf_init_module(void) +{ + return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); +} + +static void __exit bpf_cleanup_module(void) +{ + tcf_unregister_action(&act_bpf_ops); +} + +module_init(bpf_init_module); +module_exit(bpf_cleanup_module); + +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("TC BPF based action"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c new file mode 100644 index 000000000000..8e472518f9f6 --- /dev/null +++ b/net/sched/act_connmark.c @@ -0,0 +1,192 @@ +/* + * net/sched/act_connmark.c netfilter connmark retriever action + * skb mark is over-written + * + * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/pkt_cls.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/act_api.h> +#include <uapi/linux/tc_act/tc_connmark.h> +#include <net/tc_act/tc_connmark.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> + +#define CONNMARK_TAB_MASK 3 + +static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + const struct nf_conntrack_tuple_hash *thash; + struct nf_conntrack_tuple tuple; + enum ip_conntrack_info ctinfo; + struct tcf_connmark_info *ca = a->priv; + struct nf_conn *c; + int proto; + + spin_lock(&ca->tcf_lock); + ca->tcf_tm.lastuse = jiffies; + bstats_update(&ca->tcf_bstats, skb); + + if (skb->protocol == htons(ETH_P_IP)) { + if (skb->len < sizeof(struct iphdr)) + goto out; + + proto = NFPROTO_IPV4; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (skb->len < sizeof(struct ipv6hdr)) + goto out; + + proto = NFPROTO_IPV6; + } else { + goto out; + } + + c = nf_ct_get(skb, &ctinfo); + if (c) { + skb->mark = c->mark; + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + nf_ct_put(c); + goto out; + } + + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + proto, &tuple)) + goto out; + + thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple); + if (!thash) + goto out; + + c = nf_ct_tuplehash_to_ctrack(thash); + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + skb->mark = c->mark; + nf_ct_put(c); + +out: + skb->nfct = NULL; + spin_unlock(&ca->tcf_lock); + return ca->tcf_action; +} + +static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { + [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) }, +}; + +static int tcf_connmark_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_connmark_info *ci; + struct tc_connmark *parm; + int ret = 0; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy); + if (ret < 0) + return ret; + + parm = nla_data(tb[TCA_CONNMARK_PARMS]); + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind); + if (ret) + return ret; + + ci = to_connmark(a); + ci->tcf_action = parm->action; + ci->zone = parm->zone; + + tcf_hash_insert(a); + ret = ACT_P_CREATED; + } else { + ci = to_connmark(a); + if (bind) + return 0; + tcf_hash_release(a, bind); + if (!ovr) + return -EEXIST; + /* replacing action and zone */ + ci->tcf_action = parm->action; + ci->zone = parm->zone; + } + + return ret; +} + +static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_connmark_info *ci = a->priv; + + struct tc_connmark opt = { + .index = ci->tcf_index, + .refcnt = ci->tcf_refcnt - ref, + .bindcnt = ci->tcf_bindcnt - bind, + .action = ci->tcf_action, + .zone = ci->zone, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); + if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_connmark_ops = { + .kind = "connmark", + .type = TCA_ACT_CONNMARK, + .owner = THIS_MODULE, + .act = tcf_connmark, + .dump = tcf_connmark_dump, + .init = tcf_connmark_init, +}; + +static int __init connmark_init_module(void) +{ + return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK); +} + +static void __exit connmark_cleanup_module(void) +{ + tcf_unregister_action(&act_connmark_ops); +} + +module_init(connmark_init_module); +module_exit(connmark_cleanup_module); +MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); +MODULE_DESCRIPTION("Connection tracking mark restoring"); +MODULE_LICENSE("GPL"); + diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index edbf40dac709..4cd5cf1aedf8 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb, if (unlikely(action == TC_ACT_SHOT)) goto drop; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5aed341406c2..fc399db86f11 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -65,9 +65,12 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) if (head == NULL) return 0UL; - list_for_each_entry(f, &head->flist, link) - if (f->handle == handle) + list_for_each_entry(f, &head->flist, link) { + if (f->handle == handle) { l = (unsigned long) f; + break; + } + } return l; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 84c8219c3e1c..5f3ee9e4b5bf 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -37,7 +37,7 @@ struct cls_bpf_prog { struct tcf_result res; struct list_head link; u32 handle; - u16 bpf_len; + u16 bpf_num_ops; struct tcf_proto *tp; struct rcu_head rcu; }; @@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct tcf_exts exts; struct sock_fprog_kern tmp; struct bpf_prog *fp; - u16 bpf_size, bpf_len; + u16 bpf_size, bpf_num_ops; u32 classid; int ret; @@ -173,13 +173,18 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return ret; classid = nla_get_u32(tb[TCA_BPF_CLASSID]); - bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); - if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { + bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) { + ret = -EINVAL; + goto errout; + } + + bpf_size = bpf_num_ops * sizeof(*bpf_ops); + if (bpf_size != nla_len(tb[TCA_BPF_OPS])) { ret = -EINVAL; goto errout; } - bpf_size = bpf_len * sizeof(*bpf_ops); bpf_ops = kzalloc(bpf_size, GFP_KERNEL); if (bpf_ops == NULL) { ret = -ENOMEM; @@ -188,14 +193,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - tmp.len = bpf_len; + tmp.len = bpf_num_ops; tmp.filter = bpf_ops; ret = bpf_prog_create(&fp, &tmp); if (ret) goto errout_free; - prog->bpf_len = bpf_len; + prog->bpf_num_ops = bpf_num_ops; prog->bpf_ops = bpf_ops; prog->filter = fp; prog->res.classid = classid; @@ -215,15 +220,21 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, struct cls_bpf_head *head) { unsigned int i = 0x80000000; + u32 handle; do { if (++head->hgen == 0x7FFFFFFF) head->hgen = 1; } while (--i > 0 && cls_bpf_get(tp, head->hgen)); - if (i == 0) + + if (unlikely(i == 0)) { pr_err("Insufficient number of handles\n"); + handle = 0; + } else { + handle = head->hgen; + } - return i; + return handle; } static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, @@ -303,10 +314,10 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) goto nla_put_failure; - nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * sizeof(struct sock_filter)); if (nla == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 15d68f24a521..461410394d08 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->dst) return ntohl(flow->dst); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) @@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys if (flow->ports) return ntohs(flow->port16[1]); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -156,7 +156,7 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 5b4a4efe468c..a3d79c8bf3b8 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct net_device *dev, *indev = NULL; int ret, network_offset; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): acpar.family = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index c8f8c399b99a..b5294ce20cd4 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag) { unsigned short tag; - tag = vlan_tx_tag_get(skb); + tag = skb_vlan_tag_get(skb); if (!tag && __vlan_get_tag(skb, &tag)) *err = -1; else @@ -197,7 +197,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = skb->protocol; + dst->value = tc_skb_protocol(skb); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 76f402e05bd6..243b7d169d61 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1807,7 +1807,7 @@ done: int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - __be16 protocol = skb->protocol; + __be16 protocol = tc_skb_protocol(skb); int err; for (; tp; tp = rcu_dereference_bh(tp->next)) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 227114f27f94..66700a6116aa 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); @@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mask[index] != 0xff || p->value[index]) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb->protocol)); + __func__, ntohs(tc_skb_protocol(skb))); break; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 9b05924cc386..2a50f5c62070 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1,7 +1,7 @@ /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * - * Copyright (C) 2013 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -471,7 +471,7 @@ begin: goto out; rate = q->flow_max_rate; - if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6ada42396a24..e02687185a59 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -122,13 +122,6 @@ teql_peek(struct Qdisc *sch) return NULL; } -static inline void -teql_neigh_release(struct neighbour *n) -{ - if (n) - neigh_release(n); -} - static void teql_reset(struct Qdisc *sch) { @@ -249,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, - NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + haddr, NULL, skb->len); if (err < 0) err = -EINVAL; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index f791edd64d6c..197c3f59ecbf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -391,8 +391,7 @@ void sctp_association_free(struct sctp_association *asoc) sctp_asconf_queue_teardown(asoc); /* Free pending address space being deleted */ - if (asoc->asconf_addr_del_pending != NULL) - kfree(asoc->asconf_addr_del_pending); + kfree(asoc->asconf_addr_del_pending); /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); @@ -1182,7 +1181,6 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.peer_hmacs = new->peer.peer_hmacs; new->peer.peer_hmacs = NULL; - sctp_auth_key_put(asoc->asoc_shared_key); sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2625eccb77d5..aafe94bf292e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1603,7 +1603,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sctp_assoc_t associd = 0; sctp_cmsgs_t cmsgs = { NULL }; sctp_scope_t scope; - bool fill_sinfo_ttl = false; + bool fill_sinfo_ttl = false, wait_connect = false; struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; __u16 sinfo_flags = 0; @@ -1943,6 +1943,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (err < 0) goto out_free; + wait_connect = true; pr_debug("%s: we associated primitively\n", __func__); } @@ -1980,6 +1981,11 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sctp_datamsg_put(datamsg); err = msg_len; + if (unlikely(wait_connect)) { + timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT); + sctp_wait_for_connect(asoc, &timeo); + } + /* If we are already past ASSOCIATE, the lower * layers are responsible for association cleanup. */ diff --git a/net/socket.c b/net/socket.c index 8809afccf7fa..3326d67482ac 100644 --- a/net/socket.c +++ b/net/socket.c @@ -113,7 +113,6 @@ unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, @@ -151,7 +150,6 @@ static const struct file_operations socket_file_ops = { .compat_ioctl = compat_sock_ioctl, #endif .mmap = sock_mmap, - .open = sock_no_open, /* special open code to disallow open via /proc */ .release = sock_close, .fasync = sock_fasync, .sendpage = sock_sendpage, @@ -374,7 +372,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) path.mnt = mntget(sock_mnt); d_instantiate(path.dentry, SOCK_INODE(sock)); - SOCK_INODE(sock)->i_fop = &socket_file_ops; file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); @@ -559,23 +556,6 @@ static struct socket *sock_alloc(void) return sock; } -/* - * In theory you can't get an open on this inode, but /proc provides - * a back door. Remember to keep it shut otherwise you'll let the - * creepy crawlies in. - */ - -static int sock_no_open(struct inode *irrelevant, struct file *dontcare) -{ - return -ENXIO; -} - -const struct file_operations bad_sock_fops = { - .owner = THIS_MODULE, - .open = sock_no_open, - .llseek = noop_llseek, -}; - /** * sock_release - close a socket * @sock: socket to close @@ -633,13 +613,6 @@ EXPORT_SYMBOL(__sock_tx_timestamp); static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - return sock->ops->sendmsg(iocb, sock, msg, size); } @@ -655,11 +628,9 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, bool nosec) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : __sock_sendmsg(&iocb, sock, msg, size); if (-EIOCBQUEUED == ret) @@ -776,14 +747,6 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - si->flags = flags; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); } @@ -799,11 +762,9 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = __sock_recvmsg(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); @@ -815,11 +776,9 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); @@ -886,42 +845,26 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - struct sock_iocb *siocb) -{ - if (!is_sync_kiocb(iocb)) - BUG(); - - siocb->kiocb = iocb; - iocb->private = siocb; - return siocb; -} - static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, struct file *file, const struct iovec *iov, unsigned long nr_segs) { struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size); + iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, iocb->ki_nbytes); msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); + return __sock_recvmsg(iocb, sock, msg, iocb->ki_nbytes, msg->msg_flags); } static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct sock_iocb siocb, *x; + struct msghdr msg; if (pos != 0) return -ESPIPE; @@ -929,11 +872,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + return do_sock_read(&msg, iocb, iocb->ki_filp, iov, nr_segs); } static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, @@ -941,37 +880,28 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, unsigned long nr_segs) { struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size); + iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes); msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; if (sock->type == SOCK_SEQPACKET) msg->msg_flags |= MSG_EOR; - return __sock_sendmsg(iocb, sock, msg, size); + return __sock_sendmsg(iocb, sock, msg, iocb->ki_nbytes); } static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct sock_iocb siocb, *x; + struct msghdr msg; if (pos != 0) return -ESPIPE; - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; - - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + return do_sock_write(&msg, iocb, iocb->ki_filp, iov, nr_segs); } /* diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index de856ddf5fed..224a82f24d3c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs u32 priv_len, maj_stat; int pad, saved_len, remaining_len, offset; - rqstp->rq_splice_ok = false; + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 066362141133..33fb105d4352 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -20,6 +20,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/ctype.h> +#include <linux/string_helpers.h> #include <asm/uaccess.h> #include <linux/poll.h> #include <linux/seq_file.h> @@ -1067,30 +1068,15 @@ void qword_add(char **bpp, int *lp, char *str) { char *bp = *bpp; int len = *lp; - char c; + int ret; if (len < 0) return; - while ((c=*str++) && len) - switch(c) { - case ' ': - case '\t': - case '\n': - case '\\': - if (len >= 4) { - *bp++ = '\\'; - *bp++ = '0' + ((c & 0300)>>6); - *bp++ = '0' + ((c & 0070)>>3); - *bp++ = '0' + ((c & 0007)>>0); - } - len -= 4; - break; - default: - *bp++ = c; - len--; - } - if (c || len <1) len = -1; + ret = string_escape_str(str, &bp, len, ESCAPE_OCTAL, "\\ \n\t"); + if (ret < 0 || ret == len) + len = -1; else { + len -= ret; *bp++ = ' '; len--; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2783fd80c229..91eaef1844c8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -191,7 +191,7 @@ svc_pool_map_init_percpu(struct svc_pool_map *m) return err; for_each_online_cpu(cpu) { - BUG_ON(pidx > maxpools); + BUG_ON(pidx >= maxpools); m->to_pool[cpu] = pidx; m->pool_to[pidx] = cpu; pidx++; @@ -476,15 +476,11 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, i, serv->sv_name); pool->sp_id = i; - INIT_LIST_HEAD(&pool->sp_threads); INIT_LIST_HEAD(&pool->sp_sockets); INIT_LIST_HEAD(&pool->sp_all_threads); spin_lock_init(&pool->sp_lock); } - if (svc_uses_rpcbind(serv) && (!serv->sv_shutdown)) - serv->sv_shutdown = svc_rpcb_cleanup; - return serv; } @@ -505,13 +501,15 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, unsigned int npools = svc_pool_map_get(); serv = __svc_create(prog, bufsize, npools, shutdown); + if (!serv) + goto out_err; - if (serv != NULL) { - serv->sv_function = func; - serv->sv_module = mod; - } - + serv->sv_function = func; + serv->sv_module = mod; return serv; +out_err: + svc_pool_map_put(); + return NULL; } EXPORT_SYMBOL_GPL(svc_create_pooled); @@ -615,12 +613,14 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) goto out_enomem; serv->sv_nrthreads++; + __set_bit(RQ_BUSY, &rqstp->rq_flags); + spin_lock_init(&rqstp->rq_lock); + rqstp->rq_server = serv; + rqstp->rq_pool = pool; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; - list_add(&rqstp->rq_all, &pool->sp_all_threads); + list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); - rqstp->rq_server = serv; - rqstp->rq_pool = pool; rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_argp) @@ -685,7 +685,8 @@ found_pool: * so we don't try to kill it again. */ rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); - list_del_init(&rqstp->rq_all); + set_bit(RQ_VICTIM, &rqstp->rq_flags); + list_del_rcu(&rqstp->rq_all); task = rqstp->rq_task; } spin_unlock_bh(&pool->sp_lock); @@ -783,10 +784,11 @@ svc_exit_thread(struct svc_rqst *rqstp) spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads--; - list_del(&rqstp->rq_all); + if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags)) + list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); - kfree(rqstp); + kfree_rcu(rqstp, rq_rcu_head); /* Release the server */ if (serv) @@ -1086,10 +1088,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off only in gss privacy case: */ - rqstp->rq_splice_ok = true; + set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ - rqstp->rq_usedeferral = true; - rqstp->rq_dropme = false; + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_DROPME, &rqstp->rq_flags); /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); @@ -1189,7 +1191,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); /* Encode reply */ - if (rqstp->rq_dropme) { + if (test_bit(RQ_DROPME, &rqstp->rq_flags)) { if (procp->pc_release) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bbb3b044b877..c69358b3cf7f 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -220,9 +220,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, */ static void svc_xprt_received(struct svc_xprt *xprt) { - WARN_ON_ONCE(!test_bit(XPT_BUSY, &xprt->xpt_flags)); - if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) + if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) { + WARN_ONCE(1, "xprt=0x%p already busy!", xprt); return; + } + /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_xprt_do_enqueue with: */ @@ -310,25 +312,6 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) } EXPORT_SYMBOL_GPL(svc_print_addr); -/* - * Queue up an idle server thread. Must have pool->sp_lock held. - * Note: this is really a stack rather than a queue, so that we only - * use as many different threads as we need, and the rest don't pollute - * the cache. - */ -static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) -{ - list_add(&rqstp->rq_list, &pool->sp_threads); -} - -/* - * Dequeue an nfsd thread. Must have pool->sp_lock held. - */ -static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) -{ - list_del(&rqstp->rq_list); -} - static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) { if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE))) @@ -341,11 +324,12 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) static void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; - struct svc_rqst *rqstp; + struct svc_rqst *rqstp = NULL; int cpu; + bool queued = false; if (!svc_xprt_has_something_to_do(xprt)) - return; + goto out; /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY @@ -355,43 +339,69 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { /* Don't enqueue transport while already enqueued */ dprintk("svc: transport %p busy, not enqueued\n", xprt); - return; + goto out; } cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); - spin_lock_bh(&pool->sp_lock); - pool->sp_stats.packets++; - - if (!list_empty(&pool->sp_threads)) { - rqstp = list_entry(pool->sp_threads.next, - struct svc_rqst, - rq_list); - dprintk("svc: transport %p served by daemon %p\n", - xprt, rqstp); - svc_thread_dequeue(pool, rqstp); - if (rqstp->rq_xprt) - printk(KERN_ERR - "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", - rqstp, rqstp->rq_xprt); - /* Note the order of the following 3 lines: - * We want to assign xprt to rqstp->rq_xprt only _after_ - * we've woken up the process, so that we don't race with - * the lockless check in svc_get_next_xprt(). + atomic_long_inc(&pool->sp_stats.packets); + +redo_search: + /* find a thread for this xprt */ + rcu_read_lock(); + list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + /* Do a lockless check first */ + if (test_bit(RQ_BUSY, &rqstp->rq_flags)) + continue; + + /* + * Once the xprt has been queued, it can only be dequeued by + * the task that intends to service it. All we can do at that + * point is to try to wake this thread back up so that it can + * do so. */ - svc_xprt_get(xprt); + if (!queued) { + spin_lock_bh(&rqstp->rq_lock); + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) { + /* already busy, move on... */ + spin_unlock_bh(&rqstp->rq_lock); + continue; + } + + /* this one will do */ + rqstp->rq_xprt = xprt; + svc_xprt_get(xprt); + spin_unlock_bh(&rqstp->rq_lock); + } + rcu_read_unlock(); + + atomic_long_inc(&pool->sp_stats.threads_woken); wake_up_process(rqstp->rq_task); - rqstp->rq_xprt = xprt; - pool->sp_stats.threads_woken++; - } else { + put_cpu(); + goto out; + } + rcu_read_unlock(); + + /* + * We didn't find an idle thread to use, so we need to queue the xprt. + * Do so and then search again. If we find one, we can't hook this one + * up to it directly but we can wake the thread up in the hopes that it + * will pick it up once it searches for a xprt to service. + */ + if (!queued) { + queued = true; dprintk("svc: transport %p put into queue\n", xprt); + spin_lock_bh(&pool->sp_lock); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); pool->sp_stats.sockets_queued++; + spin_unlock_bh(&pool->sp_lock); + goto redo_search; } - - spin_unlock_bh(&pool->sp_lock); + rqstp = NULL; put_cpu(); +out: + trace_svc_xprt_do_enqueue(xprt, rqstp); } /* @@ -408,22 +418,28 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* - * Dequeue the first transport. Must be called with the pool->sp_lock held. + * Dequeue the first transport, if there is one. */ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) { - struct svc_xprt *xprt; + struct svc_xprt *xprt = NULL; if (list_empty(&pool->sp_sockets)) - return NULL; - - xprt = list_entry(pool->sp_sockets.next, - struct svc_xprt, xpt_ready); - list_del_init(&xprt->xpt_ready); + goto out; - dprintk("svc: transport %p dequeued, inuse=%d\n", - xprt, atomic_read(&xprt->xpt_ref.refcount)); + spin_lock_bh(&pool->sp_lock); + if (likely(!list_empty(&pool->sp_sockets))) { + xprt = list_first_entry(&pool->sp_sockets, + struct svc_xprt, xpt_ready); + list_del_init(&xprt->xpt_ready); + svc_xprt_get(xprt); + dprintk("svc: transport %p dequeued, inuse=%d\n", + xprt, atomic_read(&xprt->xpt_ref.refcount)); + } + spin_unlock_bh(&pool->sp_lock); +out: + trace_svc_xprt_dequeue(xprt); return xprt; } @@ -484,34 +500,36 @@ static void svc_xprt_release(struct svc_rqst *rqstp) } /* - * External function to wake up a server waiting for data - * This really only makes sense for services like lockd - * which have exactly one thread anyway. + * Some svc_serv's will have occasional work to do, even when a xprt is not + * waiting to be serviced. This function is there to "kick" a task in one of + * those services so that it can wake up and do that work. Note that we only + * bother with pool 0 as we don't need to wake up more than one thread for + * this purpose. */ void svc_wake_up(struct svc_serv *serv) { struct svc_rqst *rqstp; - unsigned int i; struct svc_pool *pool; - for (i = 0; i < serv->sv_nrpools; i++) { - pool = &serv->sv_pools[i]; + pool = &serv->sv_pools[0]; - spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_threads)) { - rqstp = list_entry(pool->sp_threads.next, - struct svc_rqst, - rq_list); - dprintk("svc: daemon %p woken up.\n", rqstp); - /* - svc_thread_dequeue(pool, rqstp); - rqstp->rq_xprt = NULL; - */ - wake_up_process(rqstp->rq_task); - } else - pool->sp_task_pending = 1; - spin_unlock_bh(&pool->sp_lock); + rcu_read_lock(); + list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + /* skip any that aren't queued */ + if (test_bit(RQ_BUSY, &rqstp->rq_flags)) + continue; + rcu_read_unlock(); + dprintk("svc: daemon %p woken up.\n", rqstp); + wake_up_process(rqstp->rq_task); + trace_svc_wake_up(rqstp->rq_task->pid); + return; } + rcu_read_unlock(); + + /* No free entries available */ + set_bit(SP_TASK_PENDING, &pool->sp_flags); + smp_wmb(); + trace_svc_wake_up(0); } EXPORT_SYMBOL_GPL(svc_wake_up); @@ -622,75 +640,86 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) return 0; } +static bool +rqst_should_sleep(struct svc_rqst *rqstp) +{ + struct svc_pool *pool = rqstp->rq_pool; + + /* did someone call svc_wake_up? */ + if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags)) + return false; + + /* was a socket queued? */ + if (!list_empty(&pool->sp_sockets)) + return false; + + /* are we shutting down? */ + if (signalled() || kthread_should_stop()) + return false; + + /* are we freezing? */ + if (freezing(current)) + return false; + + return true; +} + static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; long time_left = 0; + /* rq_xprt should be clear on entry */ + WARN_ON_ONCE(rqstp->rq_xprt); + /* Normally we will wait up to 5 seconds for any required * cache information to be provided. */ rqstp->rq_chandle.thread_wait = 5*HZ; - spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; - svc_xprt_get(xprt); /* As there is a shortage of threads and this request * had to be queued, don't allow the thread to wait so * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; - pool->sp_task_pending = 0; - } else { - if (pool->sp_task_pending) { - pool->sp_task_pending = 0; - xprt = ERR_PTR(-EAGAIN); - goto out; - } - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); + clear_bit(SP_TASK_PENDING, &pool->sp_flags); + return xprt; + } - /* No data pending. Go to sleep */ - svc_thread_enqueue(pool, rqstp); - spin_unlock_bh(&pool->sp_lock); + /* + * We have to be able to interrupt this wait + * to bring down the daemons ... + */ + set_current_state(TASK_INTERRUPTIBLE); + clear_bit(RQ_BUSY, &rqstp->rq_flags); + smp_mb(); - if (!(signalled() || kthread_should_stop())) { - time_left = schedule_timeout(timeout); - __set_current_state(TASK_RUNNING); + if (likely(rqst_should_sleep(rqstp))) + time_left = schedule_timeout(timeout); + else + __set_current_state(TASK_RUNNING); - try_to_freeze(); + try_to_freeze(); - xprt = rqstp->rq_xprt; - if (xprt != NULL) - return xprt; - } else - __set_current_state(TASK_RUNNING); + spin_lock_bh(&rqstp->rq_lock); + set_bit(RQ_BUSY, &rqstp->rq_flags); + spin_unlock_bh(&rqstp->rq_lock); - spin_lock_bh(&pool->sp_lock); - if (!time_left) - pool->sp_stats.threads_timedout++; + xprt = rqstp->rq_xprt; + if (xprt != NULL) + return xprt; - xprt = rqstp->rq_xprt; - if (!xprt) { - svc_thread_dequeue(pool, rqstp); - spin_unlock_bh(&pool->sp_lock); - dprintk("svc: server %p, no data yet\n", rqstp); - if (signalled() || kthread_should_stop()) - return ERR_PTR(-EINTR); - else - return ERR_PTR(-EAGAIN); - } - } -out: - spin_unlock_bh(&pool->sp_lock); - return xprt; + if (!time_left) + atomic_long_inc(&pool->sp_stats.threads_timedout); + + if (signalled() || kthread_should_stop()) + return ERR_PTR(-EINTR); + return ERR_PTR(-EAGAIN); } static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) @@ -719,7 +748,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) dprintk("svc_recv: found XPT_CLOSE\n"); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ - return 0; + goto out; } if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; @@ -750,6 +779,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) } /* clear XPT_BUSY: */ svc_xprt_received(xprt); +out: + trace_svc_handle_xprt(xprt, len); return len; } @@ -797,7 +828,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) clear_bit(XPT_OLD, &xprt->xpt_flags); - rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); + if (xprt->xpt_ops->xpo_secure_port(rqstp)) + set_bit(RQ_SECURE, &rqstp->rq_flags); + else + clear_bit(RQ_SECURE, &rqstp->rq_flags); rqstp->rq_chandle.defer = svc_defer; rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); @@ -895,7 +929,6 @@ static void svc_age_temp_xprts(unsigned long closure) continue; list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); - set_bit(XPT_DETACHED, &xprt->xpt_flags); dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ @@ -935,8 +968,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt) xprt->xpt_ops->xpo_detach(xprt); spin_lock_bh(&serv->sv_lock); - if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) - list_del_init(&xprt->xpt_list); + list_del_init(&xprt->xpt_list); WARN_ON_ONCE(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; @@ -1080,7 +1112,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_deferred_req *dr; - if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral) + if (rqstp->rq_arg.page_len || !test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags)) return NULL; /* if more than a page, give up FIXME */ if (rqstp->rq_deferred) { dr = rqstp->rq_deferred; @@ -1109,7 +1141,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) } svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - rqstp->rq_dropme = true; + set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; @@ -1311,10 +1343,10 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) seq_printf(m, "%u %lu %lu %lu %lu\n", pool->sp_id, - pool->sp_stats.packets, + (unsigned long)atomic_long_read(&pool->sp_stats.packets), pool->sp_stats.sockets_queued, - pool->sp_stats.threads_woken, - pool->sp_stats.threads_timedout); + (unsigned long)atomic_long_read(&pool->sp_stats.threads_woken), + (unsigned long)atomic_long_read(&pool->sp_stats.threads_timedout)); return 0; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f9c052d508f0..cc331b6cf573 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1145,7 +1145,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; - rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags); + if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags)) + set_bit(RQ_LOCAL, &rqstp->rq_flags); + else + clear_bit(RQ_LOCAL, &rqstp->rq_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 290af97bf6f9..4439ac4c1b53 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -606,7 +606,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) struct kvec *head = buf->head; struct kvec *tail = buf->tail; int fraglen; - int new, old; + int new; if (len > buf->len) { WARN_ON_ONCE(1); @@ -617,9 +617,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) fraglen = min_t(int, buf->len - len, tail->iov_len); tail->iov_len -= fraglen; buf->len -= fraglen; - if (tail->iov_len && buf->len == len) { + if (tail->iov_len) { xdr->p = tail->iov_base + tail->iov_len; - /* xdr->end, xdr->iov should be set already */ + WARN_ON_ONCE(!xdr->end); + WARN_ON_ONCE(!xdr->iov); return; } WARN_ON_ONCE(fraglen); @@ -628,14 +629,14 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) buf->len -= fraglen; new = buf->page_base + buf->page_len; - old = new + fraglen; - xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT); - if (buf->page_len && buf->len == len) { + xdr->page_ptr = buf->pages + (new >> PAGE_SHIFT); + + if (buf->page_len) { xdr->p = page_address(*xdr->page_ptr); xdr->end = (void *)xdr->p + PAGE_SIZE; xdr->p = (void *)xdr->p + (new % PAGE_SIZE); - /* xdr->iov should already be NULL */ + WARN_ON_ONCE(xdr->iov); return; } if (fraglen) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index d162b21b14bd..8c1e558db118 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -11,6 +11,8 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/init.h> +#include <linux/mutex.h> +#include <linux/notifier.h> #include <linux/netdevice.h> #include <net/switchdev.h> @@ -50,3 +52,176 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) return ops->ndo_switch_port_stp_update(dev, state); } EXPORT_SYMBOL(netdev_switch_port_stp_update); + +static DEFINE_MUTEX(netdev_switch_mutex); +static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); + +/** + * register_netdev_switch_notifier - Register nofifier + * @nb: notifier_block + * + * Register switch device notifier. This should be used by code + * which needs to monitor events happening in particular device. + * Return values are same as for atomic_notifier_chain_register(). + */ +int register_netdev_switch_notifier(struct notifier_block *nb) +{ + int err; + + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(register_netdev_switch_notifier); + +/** + * unregister_netdev_switch_notifier - Unregister nofifier + * @nb: notifier_block + * + * Unregister switch device notifier. + * Return values are same as for atomic_notifier_chain_unregister(). + */ +int unregister_netdev_switch_notifier(struct notifier_block *nb) +{ + int err; + + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(unregister_netdev_switch_notifier); + +/** + * call_netdev_switch_notifiers - Call nofifiers + * @val: value passed unmodified to notifier function + * @dev: port device + * @info: notifier information data + * + * Call all network notifier blocks. This should be called by driver + * when it needs to propagate hardware event. + * Return values are same as for atomic_notifier_call_chain(). + */ +int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, + struct netdev_switch_notifier_info *info) +{ + int err; + + info->dev = dev; + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(call_netdev_switch_notifiers); + +/** + * netdev_switch_port_bridge_setlink - Notify switch device port of bridge + * port attributes + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify switch device port of bridge port attributes + */ +int netdev_switch_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return 0; + + if (!ops->ndo_bridge_setlink) + return -EOPNOTSUPP; + + return ops->ndo_bridge_setlink(dev, nlh, flags); +} +EXPORT_SYMBOL(netdev_switch_port_bridge_setlink); + +/** + * netdev_switch_port_bridge_dellink - Notify switch device port of bridge + * port attribute delete + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify switch device port of bridge port attribute delete + */ +int netdev_switch_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return 0; + + if (!ops->ndo_bridge_dellink) + return -EOPNOTSUPP; + + return ops->ndo_bridge_dellink(dev, nlh, flags); +} +EXPORT_SYMBOL(netdev_switch_port_bridge_dellink); + +/** + * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink + * op for master devices + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify master device slaves of bridge port attributes + */ +int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + struct net_device *lower_dev; + struct list_head *iter; + int ret = 0, err = 0; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return ret; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); + if (err && err != -EOPNOTSUPP) + ret = err; + } + + return ret; +} +EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink); + +/** + * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink + * op for master devices + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge dellink flags + * + * Notify master device slaves of bridge port attribute deletes + */ +int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + struct net_device *lower_dev; + struct list_head *iter; + int ret = 0, err = 0; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return ret; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); + if (err && err != -EOPNOTSUPP) + ret = err; + } + + return ret; +} +EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index c890848f9d56..91c8a8e031db 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -20,18 +20,6 @@ menuconfig TIPC If in doubt, say N. -config TIPC_PORTS - int "Maximum number of ports in a node" - depends on TIPC - range 127 65535 - default "8191" - help - Specifies how many ports can be supported by a node. - Can range from 127 to 65535 ports; default is 8191. - - Setting this to a smaller value saves some memory, - setting it to higher allows for more ports. - config TIPC_MEDIA_IB bool "InfiniBand media type support" depends on TIPC && INFINIBAND_IPOIB diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 357b74b26f9e..48fd3b5a73fb 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -34,8 +34,51 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" +#include <linux/kernel.h> #include "addr.h" +#include "core.h" + +/** + * in_own_cluster - test for cluster inclusion; <0.0.0> always matches + */ +int in_own_cluster(struct net *net, u32 addr) +{ + return in_own_cluster_exact(net, addr) || !addr; +} + +int in_own_cluster_exact(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return !((addr ^ tn->own_addr) >> 12); +} + +/** + * in_own_node - test for node inclusion; <0.0.0> always matches + */ +int in_own_node(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return (addr == tn->own_addr) || !addr; +} + +/** + * addr_domain - convert 2-bit scope value to equivalent message lookup domain + * + * Needed when address of a named message must be looked up a second time + * after a network hop. + */ +u32 addr_domain(struct net *net, u32 sc) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (likely(sc == TIPC_NODE_SCOPE)) + return tn->own_addr; + if (sc == TIPC_CLUSTER_SCOPE) + return tipc_cluster_mask(tn->own_addr); + return tipc_zone_mask(tn->own_addr); +} /** * tipc_addr_domain_valid - validates a network domain address diff --git a/net/tipc/addr.h b/net/tipc/addr.h index a74acf9ee804..c700c2d28e09 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,7 +37,10 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H -#include "core.h" +#include <linux/types.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #define TIPC_ZONE_MASK 0xff000000u #define TIPC_CLUSTER_MASK 0xfffff000u @@ -52,42 +55,10 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_CLUSTER_MASK; } -static inline int in_own_cluster_exact(u32 addr) -{ - return !((addr ^ tipc_own_addr) >> 12); -} - -/** - * in_own_node - test for node inclusion; <0.0.0> always matches - */ -static inline int in_own_node(u32 addr) -{ - return (addr == tipc_own_addr) || !addr; -} - -/** - * in_own_cluster - test for cluster inclusion; <0.0.0> always matches - */ -static inline int in_own_cluster(u32 addr) -{ - return in_own_cluster_exact(addr) || !addr; -} - -/** - * addr_domain - convert 2-bit scope value to equivalent message lookup domain - * - * Needed when address of a named message must be looked up a second time - * after a network hop. - */ -static inline u32 addr_domain(u32 sc) -{ - if (likely(sc == TIPC_NODE_SCOPE)) - return tipc_own_addr; - if (sc == TIPC_CLUSTER_SCOPE) - return tipc_cluster_mask(tipc_own_addr); - return tipc_zone_mask(tipc_own_addr); -} - +int in_own_cluster(struct net *net, u32 addr); +int in_own_cluster_exact(struct net *net, u32 addr); +int in_own_node(struct net *net, u32 addr); +u32 addr_domain(struct net *net, u32 sc); int tipc_addr_domain_valid(u32); int tipc_addr_node_valid(u32 addr); int tipc_in_scope(u32 domain, u32 addr); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 96ceefeb9daf..53f8bf059fec 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -35,77 +35,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" -#include "link.h" #include "socket.h" #include "msg.h" #include "bcast.h" #include "name_distr.h" +#include "core.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ -#define BCBEARER MAX_BEARERS - -/** - * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link - * @primary: pointer to primary bearer - * @secondary: pointer to secondary bearer - * - * Bearers must have same priority and same set of reachable destinations - * to be paired. - */ - -struct tipc_bcbearer_pair { - struct tipc_bearer *primary; - struct tipc_bearer *secondary; -}; - -/** - * struct tipc_bcbearer - bearer used by broadcast link - * @bearer: (non-standard) broadcast bearer structure - * @media: (non-standard) broadcast media structure - * @bpairs: array of bearer pairs - * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() - * @remains: temporary node map used by tipc_bcbearer_send() - * @remains_new: temporary node map used tipc_bcbearer_send() - * - * Note: The fields labelled "temporary" are incorporated into the bearer - * to avoid consuming potentially limited stack space through the use of - * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bclink_lock". - */ -struct tipc_bcbearer { - struct tipc_bearer bearer; - struct tipc_media media; - struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; - struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct tipc_node_map remains; - struct tipc_node_map remains_new; -}; - -/** - * struct tipc_bclink - link used for broadcast messages - * @lock: spinlock governing access to structure - * @link: (non-standard) broadcast link structure - * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states - * @bcast_nodes: map of broadcast-capable nodes - * @retransmit_to: node that most recently requested a retransmit - * - * Handles sequence numbering, fragmentation, bundling, etc. - */ -struct tipc_bclink { - spinlock_t lock; - struct tipc_link link; - struct tipc_node node; - unsigned int flags; - struct tipc_node_map bcast_nodes; - struct tipc_node *retransmit_to; -}; - -static struct tipc_bcbearer *bcbearer; -static struct tipc_bclink *bclink; -static struct tipc_link *bcl; const char tipc_bclink_name[] = "broadcast-link"; @@ -115,25 +52,28 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); -static void tipc_bclink_lock(void) +static void tipc_bclink_lock(struct net *net) { - spin_lock_bh(&bclink->lock); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&tn->bclink->lock); } -static void tipc_bclink_unlock(void) +static void tipc_bclink_unlock(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node = NULL; - if (likely(!bclink->flags)) { - spin_unlock_bh(&bclink->lock); + if (likely(!tn->bclink->flags)) { + spin_unlock_bh(&tn->bclink->lock); return; } - if (bclink->flags & TIPC_BCLINK_RESET) { - bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(); + if (tn->bclink->flags & TIPC_BCLINK_RESET) { + tn->bclink->flags &= ~TIPC_BCLINK_RESET; + node = tipc_bclink_retransmit_to(net); } - spin_unlock_bh(&bclink->lock); + spin_unlock_bh(&tn->bclink->lock); if (node) tipc_link_reset_all(node); @@ -144,9 +84,11 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(unsigned int flags) +void tipc_bclink_set_flags(struct net *net, unsigned int flags) { - bclink->flags |= flags; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tn->bclink->flags |= flags; } static u32 bcbuf_acks(struct sk_buff *buf) @@ -164,31 +106,40 @@ static void bcbuf_decr_acks(struct sk_buff *buf) bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); } -void tipc_bclink_add_node(u32 addr) +void tipc_bclink_add_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_add(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_add(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -void tipc_bclink_remove_node(u32 addr) +void tipc_bclink_remove_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_remove(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -static void bclink_set_last_sent(void) +static void bclink_set_last_sent(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (bcl->next_out) bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); else bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); } -u32 tipc_bclink_get_last_sent(void) +u32 tipc_bclink_get_last_sent(struct net *net) { - return bcl->fsm_msg_cnt; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bcl->fsm_msg_cnt; } static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) @@ -203,9 +154,11 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) * * Called with bclink_lock locked */ -struct tipc_node *tipc_bclink_retransmit_to(void) +struct tipc_node *tipc_bclink_retransmit_to(struct net *net) { - return bclink->retransmit_to; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bclink->retransmit_to; } /** @@ -215,15 +168,17 @@ struct tipc_node *tipc_bclink_retransmit_to(void) * * Called with bclink_lock locked */ -static void bclink_retransmit_pkt(u32 after, u32 to) +static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) { struct sk_buff *skb; + struct tipc_link *bcl = tn->bcl; skb_queue_walk(&bcl->outqueue, skb) { - if (more(buf_seqno(skb), after)) + if (more(buf_seqno(skb), after)) { + tipc_link_retransmit(bcl, skb, mod(to - after)); break; + } } - tipc_link_retransmit(bcl, skb, mod(to - after)); } /** @@ -231,13 +186,13 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * * Called with no locks taken */ -void tipc_bclink_wakeup_users(void) +void tipc_bclink_wakeup_users(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb; - while ((skb = skb_dequeue(&bclink->link.waiting_sks))) - tipc_sk_rcv(skb); - + while ((skb = skb_dequeue(&tn->bclink->link.waiting_sks))) + tipc_sk_rcv(net, skb); } /** @@ -252,10 +207,12 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *skb, *tmp; struct sk_buff *next; unsigned int released = 0; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); - tipc_bclink_lock(); + tipc_bclink_lock(net); /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&bcl->outqueue); + skb = skb_peek(&tn->bcl->outqueue); if (!skb) goto exit; @@ -266,43 +223,43 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) * acknowledge sent messages only (if other nodes still exist) * or both sent and unsent messages (otherwise) */ - if (bclink->bcast_nodes.count) - acked = bcl->fsm_msg_cnt; + if (tn->bclink->bcast_nodes.count) + acked = tn->bcl->fsm_msg_cnt; else - acked = bcl->next_out_no; + acked = tn->bcl->next_out_no; } else { /* * Bail out if specified sequence number does not correspond * to a message that has been sent and not yet acknowledged */ if (less(acked, buf_seqno(skb)) || - less(bcl->fsm_msg_cnt, acked) || + less(tn->bcl->fsm_msg_cnt, acked) || less_eq(acked, n_ptr->bclink.acked)) goto exit; } /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&bcl->outqueue, skb) { + skb_queue_walk(&tn->bcl->outqueue, skb) { if (more(buf_seqno(skb), n_ptr->bclink.acked)) break; } /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&bcl->outqueue, skb, tmp) { + skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) { if (more(buf_seqno(skb), acked)) break; - next = tipc_skb_queue_next(&bcl->outqueue, skb); - if (skb != bcl->next_out) { + next = tipc_skb_queue_next(&tn->bcl->outqueue, skb); + if (skb != tn->bcl->next_out) { bcbuf_decr_acks(skb); } else { bcbuf_set_acks(skb, 0); - bcl->next_out = next; - bclink_set_last_sent(); + tn->bcl->next_out = next; + bclink_set_last_sent(net); } if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &bcl->outqueue); + __skb_unlink(skb, &tn->bcl->outqueue); kfree_skb(skb); released = 1; } @@ -310,15 +267,15 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) n_ptr->bclink.acked = acked; /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(bcl->next_out)) { - tipc_link_push_packets(bcl); - bclink_set_last_sent(); + if (unlikely(tn->bcl->next_out)) { + tipc_link_push_packets(tn->bcl); + bclink_set_last_sent(net); } - if (unlikely(released && !skb_queue_empty(&bcl->waiting_sks))) + if (unlikely(released && !skb_queue_empty(&tn->bcl->waiting_sks))) n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; exit: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } /** @@ -326,9 +283,11 @@ exit: * * RCU and node lock set */ -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) +void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, + u32 last_sent) { struct sk_buff *buf; + struct tipc_net *tn = net_generic(net, tipc_net_id); /* Ignore "stale" link state info */ if (less_eq(last_sent, n_ptr->bclink.last_in)) @@ -358,18 +317,18 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, + tipc_msg_init(net, msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); + msg_set_mc_netid(msg, tn->net_id); msg_set_bcast_ack(msg, n_ptr->bclink.last_in); msg_set_bcgap_after(msg, n_ptr->bclink.last_in); msg_set_bcgap_to(msg, to); - tipc_bclink_lock(); - tipc_bearer_send(MAX_BEARERS, buf, NULL); - bcl->stats.sent_nacks++; - tipc_bclink_unlock(); + tipc_bclink_lock(net); + tipc_bearer_send(net, MAX_BEARERS, buf, NULL); + tn->bcl->stats.sent_nacks++; + tipc_bclink_unlock(net); kfree_skb(buf); n_ptr->bclink.oos_state++; @@ -382,9 +341,9 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. */ -static void bclink_peek_nack(struct tipc_msg *msg) +static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) { - struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg)); + struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg)); if (unlikely(!n_ptr)) return; @@ -401,12 +360,16 @@ static void bclink_peek_nack(struct tipc_msg *msg) /* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster * and to identified node local sockets + * @net: the applicable net namespace * @list: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_bclink_xmit(struct sk_buff_head *list) +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct tipc_bclink *bclink = tn->bclink; int rc = 0; int bc = 0; struct sk_buff *skb; @@ -420,19 +383,19 @@ int tipc_bclink_xmit(struct sk_buff_head *list) /* Broadcast to all other nodes */ if (likely(bclink)) { - tipc_bclink_lock(); + tipc_bclink_lock(net); if (likely(bclink->bcast_nodes.count)) { - rc = __tipc_link_xmit(bcl, list); + rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { u32 len = skb_queue_len(&bcl->outqueue); - bclink_set_last_sent(); + bclink_set_last_sent(net); bcl->stats.queue_sz_counts++; bcl->stats.accu_queue_sz += len; } bc = 1; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } if (unlikely(!bc)) @@ -440,7 +403,7 @@ int tipc_bclink_xmit(struct sk_buff_head *list) /* Deliver message clone */ if (likely(!rc)) - tipc_sk_mcast_rcv(skb); + tipc_sk_mcast_rcv(net, skb); else kfree_skb(skb); @@ -454,19 +417,21 @@ int tipc_bclink_xmit(struct sk_buff_head *list) */ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) { + struct tipc_net *tn = net_generic(node->net, tipc_net_id); + bclink_update_last_sent(node, seqno); node->bclink.last_in = seqno; node->bclink.oos_state = 0; - bcl->stats.recv_info++; + tn->bcl->stats.recv_info++; /* * Unicast an ACK periodically, ensuring that * all nodes in the cluster don't ACK at the same time */ - if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { + if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], STATE_MSG, 0, 0, 0, 0, 0); - bcl->stats.sent_acks++; + tn->bcl->stats.sent_acks++; } } @@ -475,8 +440,10 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) * * RCU is locked, no other locks set */ -void tipc_bclink_rcv(struct sk_buff *buf) +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; struct tipc_msg *msg = buf_msg(buf); struct tipc_node *node; u32 next_in; @@ -484,10 +451,10 @@ void tipc_bclink_rcv(struct sk_buff *buf) int deferred = 0; /* Screen out unwanted broadcast messages */ - if (msg_mc_netid(msg) != tipc_net_id) + if (msg_mc_netid(msg) != tn->net_id) goto exit; - node = tipc_node_find(msg_prevnode(msg)); + node = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!node)) goto exit; @@ -499,18 +466,18 @@ void tipc_bclink_rcv(struct sk_buff *buf) if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { if (msg_type(msg) != STATE_MSG) goto unlock; - if (msg_destnode(msg) == tipc_own_addr) { + if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); - tipc_bclink_lock(); + tipc_bclink_lock(net); bcl->stats.recv_nacks++; - bclink->retransmit_to = node; - bclink_retransmit_pkt(msg_bcgap_after(msg), + tn->bclink->retransmit_to = node; + bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } else { tipc_node_unlock(node); - bclink_peek_nack(msg); + bclink_peek_nack(net, msg); } goto exit; } @@ -523,47 +490,47 @@ void tipc_bclink_rcv(struct sk_buff *buf) receive: /* Deliver message to destination */ if (likely(msg_isdata(msg))) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); if (likely(msg_mcast(msg))) - tipc_sk_mcast_rcv(buf); + tipc_sk_mcast_rcv(net, buf); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); - tipc_link_bundle_rcv(buf); + tipc_link_bundle_rcv(net, buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { tipc_buf_append(&node->bclink.reasm_buf, &buf); if (unlikely(!buf && !node->bclink.reasm_buf)) goto unlock; - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; msg = buf_msg(buf); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); goto receive; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); - tipc_named_rcv(buf); + tipc_named_rcv(net, buf); } else { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); kfree_skb(buf); } @@ -601,14 +568,14 @@ receive: buf = NULL; } - tipc_bclink_lock(); + tipc_bclink_lock(net); if (deferred) bcl->stats.deferred_recv++; else bcl->stats.duplicates++; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); unlock: tipc_node_unlock(node); @@ -619,7 +586,7 @@ exit: u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) { return (n_ptr->bclink.recv_permitted && - (tipc_bclink_get_last_sent() != n_ptr->bclink.acked)); + (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked)); } @@ -632,11 +599,15 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, + struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; struct tipc_msg *msg = buf_msg(buf); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; + struct tipc_bclink *bclink = tn->bclink; /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; @@ -646,8 +617,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (likely(!msg_non_seq(buf_msg(buf)))) { bcbuf_set_acks(buf, bclink->bcast_nodes.count); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); - bcl->stats.sent_info++; + msg_set_mc_netid(msg, tn->net_id); + tn->bcl->stats.sent_info++; if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); @@ -676,13 +647,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b->identity, buf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, tbuf, + &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } if (bcbearer->remains_new.count == 0) @@ -697,15 +669,18 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; struct tipc_bearer *b; int b_index; int pri; - tipc_bclink_lock(); + tipc_bclink_lock(net); if (action) tipc_nmap_add(nm_ptr, node); @@ -717,7 +692,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - b = rcu_dereference_rtnl(bearer_list[b_index]); + b = rcu_dereference_rtnl(tn->bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -752,7 +727,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) bp_curr++; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, @@ -806,17 +781,19 @@ msg_full: return -EMSGSIZE; } -int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; - tipc_bclink_lock(); + tipc_bclink_lock(net); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_LINK_GET); @@ -851,7 +828,7 @@ int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) if (err) goto attr_msg_full; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -862,21 +839,23 @@ prop_msg_full: attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } -int tipc_bclink_stats(char *buf, const u32 buf_size) +int tipc_bclink_stats(struct net *net, char *buf, const u32 buf_size) { int ret; struct tipc_stats *s; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; - tipc_bclink_lock(); + tipc_bclink_lock(net); s = &bcl->stats; @@ -905,36 +884,47 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) s->queue_sz_counts ? (s->accu_queue_sz / s->queue_sz_counts) : 0); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return ret; } -int tipc_bclink_reset_stats(void) +int tipc_bclink_reset_stats(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (!bcl) return -ENOPROTOOPT; - tipc_bclink_lock(); + tipc_bclink_lock(net); memset(&bcl->stats, 0, sizeof(bcl->stats)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } -int tipc_bclink_set_queue_limits(u32 limit) +int tipc_bclink_set_queue_limits(struct net *net, u32 limit) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (!bcl) return -ENOPROTOOPT; if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) return -EINVAL; - tipc_bclink_lock(); + tipc_bclink_lock(net); tipc_link_set_queue_limits(bcl, limit); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } -int tipc_bclink_init(void) +int tipc_bclink_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); if (!bcbearer) return -ENOMEM; @@ -958,25 +948,31 @@ int tipc_bclink_init(void) spin_lock_init(&bclink->node.lock); __skb_queue_head_init(&bclink->node.waiting_sks); bcl->owner = &bclink->node; + bcl->owner->net = net; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; - rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); + rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + tn->bcbearer = bcbearer; + tn->bclink = bclink; + tn->bcl = bcl; return 0; } -void tipc_bclink_stop(void) +void tipc_bclink_stop(struct net *net) { - tipc_bclink_lock(); - tipc_link_purge_queues(bcl); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_link_purge_queues(tn->bcl); + tipc_bclink_unlock(net); - RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); + RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); synchronize_net(); - kfree(bcbearer); - kfree(bclink); + kfree(tn->bcbearer); + kfree(tn->bclink); } /** diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 644d79129fba..a4583a109486 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -37,23 +37,13 @@ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H -#include "netlink.h" +#include <linux/tipc_config.h> +#include "link.h" +#include "node.h" -#define MAX_NODES 4096 -#define WSIZE 32 -#define TIPC_BCLINK_RESET 1 - -/** - * struct tipc_node_map - set of node identifiers - * @count: # of nodes in set - * @map: bitmap of node identifiers that are in the set - */ -struct tipc_node_map { - u32 count; - u32 map[MAX_NODES / WSIZE]; -}; - -#define PLSIZE 32 +#define TIPC_BCLINK_RESET 1 +#define PLSIZE 32 +#define BCBEARER MAX_BEARERS /** * struct tipc_port_list - set of node local destination ports @@ -67,9 +57,64 @@ struct tipc_port_list { u32 ports[PLSIZE]; }; +/** + * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link + * @primary: pointer to primary bearer + * @secondary: pointer to secondary bearer + * + * Bearers must have same priority and same set of reachable destinations + * to be paired. + */ -struct tipc_node; +struct tipc_bcbearer_pair { + struct tipc_bearer *primary; + struct tipc_bearer *secondary; +}; +/** + * struct tipc_bcbearer - bearer used by broadcast link + * @bearer: (non-standard) broadcast bearer structure + * @media: (non-standard) broadcast media structure + * @bpairs: array of bearer pairs + * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() + * @remains: temporary node map used by tipc_bcbearer_send() + * @remains_new: temporary node map used tipc_bcbearer_send() + * + * Note: The fields labelled "temporary" are incorporated into the bearer + * to avoid consuming potentially limited stack space through the use of + * large local variables within multicast routines. Concurrent access is + * prevented through use of the spinlock "bclink_lock". + */ +struct tipc_bcbearer { + struct tipc_bearer bearer; + struct tipc_media media; + struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; + struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; + struct tipc_node_map remains; + struct tipc_node_map remains_new; +}; + +/** + * struct tipc_bclink - link used for broadcast messages + * @lock: spinlock governing access to structure + * @link: (non-standard) broadcast link structure + * @node: (non-standard) node structure representing b'cast link's peer node + * @flags: represent bclink states + * @bcast_nodes: map of broadcast-capable nodes + * @retransmit_to: node that most recently requested a retransmit + * + * Handles sequence numbering, fragmentation, bundling, etc. + */ +struct tipc_bclink { + spinlock_t lock; + struct tipc_link link; + struct tipc_node node; + unsigned int flags; + struct tipc_node_map bcast_nodes; + struct tipc_node *retransmit_to; +}; + +struct tipc_node; extern const char tipc_bclink_name[]; /** @@ -84,24 +129,26 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); void tipc_port_list_free(struct tipc_port_list *pl_ptr); -int tipc_bclink_init(void); -void tipc_bclink_stop(void); -void tipc_bclink_set_flags(unsigned int flags); -void tipc_bclink_add_node(u32 addr); -void tipc_bclink_remove_node(u32 addr); -struct tipc_node *tipc_bclink_retransmit_to(void); +int tipc_bclink_init(struct net *net); +void tipc_bclink_stop(struct net *net); +void tipc_bclink_set_flags(struct net *tn, unsigned int flags); +void tipc_bclink_add_node(struct net *net, u32 addr); +void tipc_bclink_remove_node(struct net *net, u32 addr); +struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -void tipc_bclink_rcv(struct sk_buff *buf); -u32 tipc_bclink_get_last_sent(void); +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf); +u32 tipc_bclink_get_last_sent(struct net *net); u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); -int tipc_bclink_stats(char *stats_buf, const u32 buf_size); -int tipc_bclink_reset_stats(void); -int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); +void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, + u32 last_sent); +int tipc_bclink_stats(struct net *net, char *stats_buf, const u32 buf_size); +int tipc_bclink_reset_stats(struct net *net); +int tipc_bclink_set_queue_limits(struct net *net, u32 limit); +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action); uint tipc_bclink_get_mtu(void); -int tipc_bclink_xmit(struct sk_buff_head *list); -void tipc_bclink_wakeup_users(void); -int tipc_nl_add_bc_link(struct tipc_nl_msg *msg); +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); +void tipc_bclink_wakeup_users(struct net *net); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 463db5b15b8b..33dc3486d16c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -34,11 +34,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" #include "config.h" #include "bearer.h" #include "link.h" #include "discover.h" +#include "bcast.h" #define MAX_ADDR_STR 60 @@ -67,9 +69,8 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; - -static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down); /** * tipc_media_find - locates specified media object by name @@ -190,13 +191,14 @@ static int bearer_name_validate(const char *name, /** * tipc_bearer_find - locates bearer object with matching bearer name */ -struct tipc_bearer *tipc_bearer_find(const char *name) +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } @@ -206,8 +208,9 @@ struct tipc_bearer *tipc_bearer_find(const char *name) /** * tipc_bearer_get_names - record names of bearers in buffer */ -struct sk_buff *tipc_bearer_get_names(void) +struct sk_buff *tipc_bearer_get_names(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf; struct tipc_bearer *b; int i, j; @@ -218,7 +221,7 @@ struct sk_buff *tipc_bearer_get_names(void) for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b = rtnl_dereference(bearer_list[j]); + b = rtnl_dereference(tn->bearer_list[j]); if (!b) continue; if (b->media == media_info_array[i]) { @@ -231,27 +234,29 @@ struct sk_buff *tipc_bearer_get_names(void) return buf; } -void tipc_bearer_add_dest(u32 bearer_id, u32 dest) +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b_ptr) { - tipc_bcbearer_sort(&b_ptr->nodes, dest, true); + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true); tipc_disc_add_dest(b_ptr->link_req); } rcu_read_unlock(); } -void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b_ptr) { - tipc_bcbearer_sort(&b_ptr->nodes, dest, false); + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false); tipc_disc_remove_dest(b_ptr->link_req); } rcu_read_unlock(); @@ -260,8 +265,10 @@ void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) /** * tipc_enable_bearer - enable bearer with the given name */ -int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) +int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, + u32 priority) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; struct tipc_media *m_ptr; struct tipc_bearer_names b_names; @@ -271,7 +278,7 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) u32 i; int res = -EINVAL; - if (!tipc_own_addr) { + if (!tn->own_addr) { pr_warn("Bearer <%s> rejected, not supported in standalone mode\n", name); return -ENOPROTOOPT; @@ -281,11 +288,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) return -EINVAL; } if (tipc_addr_domain_valid(disc_domain) && - (disc_domain != tipc_own_addr)) { - if (tipc_in_scope(disc_domain, tipc_own_addr)) { - disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK; + (disc_domain != tn->own_addr)) { + if (tipc_in_scope(disc_domain, tn->own_addr)) { + disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; res = 0; /* accept any node in own cluster */ - } else if (in_own_cluster_exact(disc_domain)) + } else if (in_own_cluster_exact(net, disc_domain)) res = 0; /* accept specified node in own cluster */ } if (res) { @@ -313,7 +320,7 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (!b_ptr) { bearer_id = i; continue; @@ -347,7 +354,7 @@ restart: strcpy(b_ptr->name, name); b_ptr->media = m_ptr; - res = m_ptr->enable_media(b_ptr); + res = m_ptr->enable_media(net, b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -361,15 +368,15 @@ restart: b_ptr->net_plane = bearer_id + 'A'; b_ptr->priority = priority; - res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr); + res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); if (res) { - bearer_disable(b_ptr, false); + bearer_disable(net, b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; } - rcu_assign_pointer(bearer_list[bearer_id], b_ptr); + rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -380,11 +387,11 @@ restart: /** * tipc_reset_bearer - Reset all links established over this bearer */ -static int tipc_reset_bearer(struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) { pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_link_reset_list(b_ptr->identity); - tipc_disc_reset(b_ptr); + tipc_link_reset_list(net, b_ptr->identity); + tipc_disc_reset(net, b_ptr); return 0; } @@ -393,49 +400,51 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; pr_info("Disabling bearer <%s>\n", b_ptr->name); b_ptr->media->disable_media(b_ptr); - tipc_link_delete_list(b_ptr->identity, shutting_down); + tipc_link_delete_list(net, b_ptr->identity, shutting_down); if (b_ptr->link_req) tipc_disc_delete(b_ptr->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == rtnl_dereference(bearer_list[i])) { - RCU_INIT_POINTER(bearer_list[i], NULL); + if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + RCU_INIT_POINTER(tn->bearer_list[i], NULL); break; } } kfree_rcu(b_ptr, rcu); } -int tipc_disable_bearer(const char *name) +int tipc_disable_bearer(struct net *net, const char *name) { struct tipc_bearer *b_ptr; int res; - b_ptr = tipc_bearer_find(name); + b_ptr = tipc_bearer_find(net, name); if (b_ptr == NULL) { pr_warn("Attempt to disable unknown bearer <%s>\n", name); res = -EINVAL; } else { - bearer_disable(b_ptr, false); + bearer_disable(net, b_ptr, false); res = 0; } return res; } -int tipc_enable_l2_media(struct tipc_bearer *b) +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b) { struct net_device *dev; char *driver_name = strchr((const char *)b->name, ':') + 1; /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); + dev = dev_get_by_name(net, driver_name); if (!dev) return -ENODEV; @@ -474,8 +483,8 @@ void tipc_disable_l2_media(struct tipc_bearer *b) * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address */ -int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, - struct tipc_media_addr *dest) +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest) { struct sk_buff *clone; struct net_device *dev; @@ -511,15 +520,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, * The media send routine must not alter the buffer being passed in * as it may be needed for later retransmission! */ -void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (likely(b_ptr)) - b_ptr->media->send_msg(buf, b_ptr, dest); + b_ptr->media->send_msg(net, buf, b_ptr, dest); rcu_read_unlock(); } @@ -539,17 +549,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, { struct tipc_bearer *b_ptr; - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return NET_RX_DROP; - } - rcu_read_lock(); b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_rcv(buf, b_ptr); + tipc_rcv(dev_net(dev), buf, b_ptr); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -572,11 +577,9 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, void *ptr) { - struct tipc_bearer *b_ptr; struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; + struct net *net = dev_net(dev); + struct tipc_bearer *b_ptr; b_ptr = rtnl_dereference(dev->tipc_ptr); if (!b_ptr) @@ -590,16 +593,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_DOWN: case NETDEV_CHANGEMTU: - tipc_reset_bearer(b_ptr); + tipc_reset_bearer(net, b_ptr); break; case NETDEV_CHANGEADDR: b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, (char *)dev->dev_addr); - tipc_reset_bearer(b_ptr); + tipc_reset_bearer(net, b_ptr); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(b_ptr, false); + bearer_disable(dev_net(dev), b_ptr, false); break; } return NOTIFY_OK; @@ -632,16 +635,17 @@ void tipc_bearer_cleanup(void) dev_remove_pack(&tipc_packet_type); } -void tipc_bearer_stop(void) +void tipc_bearer_stop(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (b_ptr) { - bearer_disable(b_ptr, true); - bearer_list[i] = NULL; + bearer_disable(net, b_ptr, true); + tn->bearer_list[i] = NULL; } } } @@ -698,6 +702,8 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) int i = cb->args[0]; struct tipc_bearer *bearer; struct tipc_nl_msg msg; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); if (i == MAX_BEARERS) return 0; @@ -708,7 +714,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); for (i = 0; i < MAX_BEARERS; i++) { - bearer = rtnl_dereference(bearer_list[i]); + bearer = rtnl_dereference(tn->bearer_list[i]); if (!bearer) continue; @@ -730,6 +736,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -753,7 +760,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) msg.seq = info->snd_seq; rtnl_lock(); - bearer = tipc_bearer_find(name); + bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; goto err_out; @@ -778,6 +785,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -794,13 +802,13 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); - bearer = tipc_bearer_find(name); + bearer = tipc_bearer_find(net, name); if (!bearer) { rtnl_unlock(); return -EINVAL; } - bearer_disable(bearer, false); + bearer_disable(net, bearer, false); rtnl_unlock(); return 0; @@ -808,6 +816,8 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); + struct tipc_net *tn = net_generic(net, tipc_net_id); int err; char *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; @@ -815,7 +825,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tipc_own_addr & TIPC_CLUSTER_MASK; + domain = tn->own_addr & TIPC_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -847,7 +857,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = tipc_enable_bearer(bearer, domain, prio); + err = tipc_enable_bearer(net, bearer, domain, prio); if (err) { rtnl_unlock(); return err; @@ -863,6 +873,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -878,7 +889,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); - b = tipc_bearer_find(name); + b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); return -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 2c1230ac5dfe..c035e3e24764 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -37,12 +37,13 @@ #ifndef _TIPC_BEARER_H #define _TIPC_BEARER_H -#include "bcast.h" #include "netlink.h" #include <net/genetlink.h> #define MAX_BEARERS 2 #define MAX_MEDIA 2 +#define MAX_NODES 4096 +#define WSIZE 32 /* Identifiers associated with TIPC message header media address info * - address info field is 32 bytes long @@ -59,6 +60,16 @@ #define TIPC_MEDIA_TYPE_IB 2 /** + * struct tipc_node_map - set of node identifiers + * @count: # of nodes in set + * @map: bitmap of node identifiers that are in the set + */ +struct tipc_node_map { + u32 count; + u32 map[MAX_NODES / WSIZE]; +}; + +/** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) * @media_id: TIPC media type identifier @@ -89,10 +100,10 @@ struct tipc_bearer; * @name: media name */ struct tipc_media { - int (*send_msg)(struct sk_buff *buf, + int (*send_msg)(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_media)(struct tipc_bearer *b_ptr); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr); void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, @@ -157,17 +168,14 @@ struct tipc_bearer_names { char if_name[TIPC_MAX_IF_NAME]; }; -struct tipc_link; - -extern struct tipc_bearer __rcu *bearer_list[]; - /* * TIPC routines available to supported media types */ -void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *tb_ptr); -int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); -int tipc_disable_bearer(const char *name); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); +int tipc_enable_bearer(struct net *net, const char *bearer_name, + u32 disc_domain, u32 priority); +int tipc_disable_bearer(struct net *net, const char *name); /* * Routines made available to TIPC by supported media types @@ -192,20 +200,20 @@ int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); -int tipc_enable_l2_media(struct tipc_bearer *b); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); -int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, - struct tipc_media_addr *dest); +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest); -struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(u32 bearer_id, u32 dest); -void tipc_bearer_remove_dest(u32 bearer_id, u32 dest); -struct tipc_bearer *tipc_bearer_find(const char *name); +struct sk_buff *tipc_bearer_get_names(struct net *net); +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); -void tipc_bearer_stop(void); -void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, +void tipc_bearer_stop(struct net *net); +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c index 876f4c6a2631..6873360cda53 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -134,7 +134,7 @@ static struct sk_buff *tipc_show_stats(void) return buf; } -static struct sk_buff *cfg_enable_bearer(void) +static struct sk_buff *cfg_enable_bearer(struct net *net) { struct tipc_bearer_config *args; @@ -142,7 +142,7 @@ static struct sk_buff *cfg_enable_bearer(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); - if (tipc_enable_bearer(args->name, + if (tipc_enable_bearer(net, args->name, ntohl(args->disc_domain), ntohl(args->priority))) return tipc_cfg_reply_error_string("unable to enable bearer"); @@ -150,78 +150,66 @@ static struct sk_buff *cfg_enable_bearer(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_disable_bearer(void) +static struct sk_buff *cfg_disable_bearer(struct net *net) { if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - if (tipc_disable_bearer((char *)TLV_DATA(req_tlv_area))) + if (tipc_disable_bearer(net, (char *)TLV_DATA(req_tlv_area))) return tipc_cfg_reply_error_string("unable to disable bearer"); return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_own_addr(void) +static struct sk_buff *cfg_set_own_addr(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 addr; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (addr == tipc_own_addr) + if (addr == tn->own_addr) return tipc_cfg_reply_none(); if (!tipc_addr_node_valid(addr)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (node address)"); - if (tipc_own_addr) + if (tn->own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - if (!tipc_net_start(addr)) + if (!tipc_net_start(net, addr)) return tipc_cfg_reply_none(); return tipc_cfg_reply_error_string("cannot change to network mode"); } -static struct sk_buff *cfg_set_max_ports(void) +static struct sk_buff *cfg_set_netid(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 value; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_ports) - return tipc_cfg_reply_none(); - if (value < 127 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max ports must be 127-65535)"); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max ports while TIPC is active)"); -} - -static struct sk_buff *cfg_set_netid(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_net_id) + if (value == tn->net_id) return tipc_cfg_reply_none(); if (value < 1 || value > 9999) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network id must be 1-9999)"); - if (tipc_own_addr) + if (tn->own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change network id once TIPC has joined a network)"); - tipc_net_id = value; + tn->net_id = value; return tipc_cfg_reply_none(); } -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area, - int request_space, int reply_headroom) +struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd, + const void *request_area, int request_space, + int reply_headroom) { struct sk_buff *rep_tlv_buf; + struct tipc_net *tn = net_generic(net, tipc_net_id); rtnl_lock(); @@ -231,7 +219,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area rep_headroom = reply_headroom; /* Check command authorization */ - if (likely(in_own_node(orig_node))) { + if (likely(in_own_node(net, orig_node))) { /* command is permitted */ } else { rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -245,28 +233,33 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area rep_tlv_buf = tipc_cfg_reply_none(); break; case TIPC_CMD_GET_NODES: - rep_tlv_buf = tipc_node_get_nodes(req_tlv_area, req_tlv_space); + rep_tlv_buf = tipc_node_get_nodes(net, req_tlv_area, + req_tlv_space); break; case TIPC_CMD_GET_LINKS: - rep_tlv_buf = tipc_node_get_links(req_tlv_area, req_tlv_space); + rep_tlv_buf = tipc_node_get_links(net, req_tlv_area, + req_tlv_space); break; case TIPC_CMD_SHOW_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_show_stats(req_tlv_area, req_tlv_space); + rep_tlv_buf = tipc_link_cmd_show_stats(net, req_tlv_area, + req_tlv_space); break; case TIPC_CMD_RESET_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_reset_stats(req_tlv_area, req_tlv_space); + rep_tlv_buf = tipc_link_cmd_reset_stats(net, req_tlv_area, + req_tlv_space); break; case TIPC_CMD_SHOW_NAME_TABLE: - rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space); + rep_tlv_buf = tipc_nametbl_get(net, req_tlv_area, + req_tlv_space); break; case TIPC_CMD_GET_BEARER_NAMES: - rep_tlv_buf = tipc_bearer_get_names(); + rep_tlv_buf = tipc_bearer_get_names(net); break; case TIPC_CMD_GET_MEDIA_NAMES: rep_tlv_buf = tipc_media_get_names(); break; case TIPC_CMD_SHOW_PORTS: - rep_tlv_buf = tipc_sk_socks_show(); + rep_tlv_buf = tipc_sk_socks_show(net); break; case TIPC_CMD_SHOW_STATS: rep_tlv_buf = tipc_show_stats(); @@ -274,28 +267,23 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_LINK_TOL: case TIPC_CMD_SET_LINK_PRI: case TIPC_CMD_SET_LINK_WINDOW: - rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd); + rep_tlv_buf = tipc_link_cmd_config(net, req_tlv_area, + req_tlv_space, cmd); break; case TIPC_CMD_ENABLE_BEARER: - rep_tlv_buf = cfg_enable_bearer(); + rep_tlv_buf = cfg_enable_bearer(net); break; case TIPC_CMD_DISABLE_BEARER: - rep_tlv_buf = cfg_disable_bearer(); + rep_tlv_buf = cfg_disable_bearer(net); break; case TIPC_CMD_SET_NODE_ADDR: - rep_tlv_buf = cfg_set_own_addr(); - break; - case TIPC_CMD_SET_MAX_PORTS: - rep_tlv_buf = cfg_set_max_ports(); + rep_tlv_buf = cfg_set_own_addr(net); break; case TIPC_CMD_SET_NETID: - rep_tlv_buf = cfg_set_netid(); - break; - case TIPC_CMD_GET_MAX_PORTS: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); + rep_tlv_buf = cfg_set_netid(net); break; case TIPC_CMD_GET_NETID: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); + rep_tlv_buf = tipc_cfg_reply_unsigned(tn->net_id); break; case TIPC_CMD_NOT_NET_ADMIN: rep_tlv_buf = @@ -317,6 +305,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_REMOTE_MNG: case TIPC_CMD_GET_REMOTE_MNG: case TIPC_CMD_DUMP_LOG: + case TIPC_CMD_SET_MAX_PORTS: + case TIPC_CMD_GET_MAX_PORTS: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (obsolete command)"); break; diff --git a/net/tipc/config.h b/net/tipc/config.h index 47b1bf181612..9e9b575fc429 100644 --- a/net/tipc/config.h +++ b/net/tipc/config.h @@ -37,10 +37,10 @@ #ifndef _TIPC_CONFIG_H #define _TIPC_CONFIG_H -/* ---------------------------------------------------------------------- */ - #include "link.h" +#define ULTRA_STRING_MAX_LEN 32768 + struct sk_buff *tipc_cfg_reply_alloc(int payload_size); int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, void *tlv_data, int tlv_data_size); @@ -61,7 +61,7 @@ static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string) return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string); } -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, +struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd, const void *req_tlv_area, int req_tlv_space, int headroom); #endif diff --git a/net/tipc/core.c b/net/tipc/core.c index a5737b8407dd..674bd2698528 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -34,6 +34,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "core.h" #include "name_table.h" #include "subscr.h" @@ -42,69 +44,68 @@ #include <linux/module.h> -/* global variables used by multiple sub-systems within TIPC */ -int tipc_random __read_mostly; - /* configurable TIPC parameters */ -u32 tipc_own_addr __read_mostly; -int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ -/** - * tipc_buf_acquire - creates a TIPC message buffer - * @size: message size (including TIPC header) - * - * Returns a new buffer with data pointers set to the specified size. - * - * NOTE: Headroom is reserved to allow prepending of a data link header. - * There may also be unrequested tailroom present at the buffer's end. - */ -struct sk_buff *tipc_buf_acquire(u32 size) +static int __net_init tipc_init_net(struct net *net) { - struct sk_buff *skb; - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); - if (skb) { - skb_reserve(skb, BUF_HEADROOM); - skb_put(skb, size); - skb->next = NULL; - } - return skb; + struct tipc_net *tn = net_generic(net, tipc_net_id); + int err; + + tn->net_id = 4711; + tn->own_addr = 0; + get_random_bytes(&tn->random, sizeof(int)); + INIT_LIST_HEAD(&tn->node_list); + spin_lock_init(&tn->node_list_lock); + + err = tipc_sk_rht_init(net); + if (err) + goto out_sk_rht; + + err = tipc_nametbl_init(net); + if (err) + goto out_nametbl; + + err = tipc_subscr_start(net); + if (err) + goto out_subscr; + return 0; + +out_subscr: + tipc_nametbl_stop(net); +out_nametbl: + tipc_sk_rht_destroy(net); +out_sk_rht: + return err; } -/** - * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode - */ -static void tipc_core_stop(void) +static void __net_exit tipc_exit_net(struct net *net) { - tipc_net_stop(); - tipc_bearer_cleanup(); - tipc_netlink_stop(); - tipc_subscr_stop(); - tipc_nametbl_stop(); - tipc_sk_ref_table_stop(); - tipc_socket_stop(); - tipc_unregister_sysctl(); + tipc_subscr_stop(net); + tipc_net_stop(net); + tipc_nametbl_stop(net); + tipc_sk_rht_destroy(net); } -/** - * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode - */ -static int tipc_core_start(void) +static struct pernet_operations tipc_net_ops = { + .init = tipc_init_net, + .exit = tipc_exit_net, + .id = &tipc_net_id, + .size = sizeof(struct tipc_net), +}; + +static int __init tipc_init(void) { int err; - get_random_bytes(&tipc_random, sizeof(tipc_random)); - - err = tipc_sk_ref_table_init(tipc_max_ports, tipc_random); - if (err) - goto out_reftbl; + pr_info("Activated (version " TIPC_MOD_VER ")\n"); - err = tipc_nametbl_init(); - if (err) - goto out_nametbl; + sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; err = tipc_netlink_start(); if (err) @@ -118,58 +119,37 @@ static int tipc_core_start(void) if (err) goto out_sysctl; - err = tipc_subscr_start(); + err = register_pernet_subsys(&tipc_net_ops); if (err) - goto out_subscr; + goto out_pernet; err = tipc_bearer_setup(); if (err) goto out_bearer; + pr_info("Started in single node mode\n"); return 0; out_bearer: - tipc_subscr_stop(); -out_subscr: + unregister_pernet_subsys(&tipc_net_ops); +out_pernet: tipc_unregister_sysctl(); out_sysctl: tipc_socket_stop(); out_socket: tipc_netlink_stop(); out_netlink: - tipc_nametbl_stop(); -out_nametbl: - tipc_sk_ref_table_stop(); -out_reftbl: + pr_err("Unable to start in single node mode\n"); return err; } -static int __init tipc_init(void) -{ - int res; - - pr_info("Activated (version " TIPC_MOD_VER ")\n"); - - tipc_own_addr = 0; - tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_net_id = 4711; - - sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; - - res = tipc_core_start(); - if (res) - pr_err("Unable to start in single node mode\n"); - else - pr_info("Started in single node mode\n"); - return res; -} - static void __exit tipc_exit(void) { - tipc_core_stop(); + tipc_bearer_cleanup(); + tipc_netlink_stop(); + tipc_socket_stop(); + tipc_unregister_sysctl(); + unregister_pernet_subsys(&tipc_net_ops); + pr_info("Deactivated\n"); } diff --git a/net/tipc/core.h b/net/tipc/core.h index 84602137ce20..817b2e9d4227 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -37,8 +37,6 @@ #ifndef _TIPC_CORE_H #define _TIPC_CORE_H -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/tipc.h> #include <linux/tipc_config.h> #include <linux/tipc_netlink.h> @@ -59,47 +57,56 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> +#include <net/netns/generic.h> +#include <linux/rhashtable.h> -#define TIPC_MOD_VER "2.0.0" - -#define ULTRA_STRING_MAX_LEN 32768 -#define TIPC_MAX_SUBSCRIPTIONS 65535 -#define TIPC_MAX_PUBLICATIONS 65535 +#include "node.h" +#include "bearer.h" +#include "bcast.h" +#include "netlink.h" +#include "link.h" +#include "node.h" +#include "msg.h" -struct tipc_msg; /* msg.h */ +#define TIPC_MOD_VER "2.0.0" int tipc_snprintf(char *buf, int len, const char *fmt, ...); -/* - * TIPC-specific error codes - */ -#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ - -/* - * Global configuration variables - */ -extern u32 tipc_own_addr __read_mostly; -extern int tipc_max_ports __read_mostly; extern int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; -/* - * Other global variables - */ -extern int tipc_random __read_mostly; +struct tipc_net { + u32 own_addr; + int net_id; + int random; -/* - * Routines available to privileged subsystems - */ -int tipc_netlink_start(void); -void tipc_netlink_stop(void); -int tipc_socket_init(void); -void tipc_socket_stop(void); -int tipc_sock_create_local(int type, struct socket **res); -void tipc_sock_release_local(struct socket *sock); -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags); + /* Node table and node list */ + spinlock_t node_list_lock; + struct hlist_head node_htable[NODE_HTABLE_SIZE]; + struct list_head node_list; + u32 num_nodes; + u32 num_links; + + /* Bearer list */ + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; + + /* Broadcast link */ + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; + + /* Socket hash table */ + struct rhashtable sk_rht; + + /* Name table */ + spinlock_t nametbl_lock; + struct name_table *nametbl; + + /* Topology subscription server */ + struct tipc_server *topsrv; + atomic_t subscription_count; +}; #ifdef CONFIG_SYSCTL int tipc_register_sysctl(void); @@ -109,101 +116,4 @@ void tipc_unregister_sysctl(void); #define tipc_unregister_sysctl() #endif -/* - * TIPC timer code - */ -typedef void (*Handler) (unsigned long); - -/** - * k_init_timer - initialize a timer - * @timer: pointer to timer structure - * @routine: pointer to routine to invoke when timer expires - * @argument: value to pass to routine when timer expires - * - * Timer must be initialized before use (and terminated when no longer needed). - */ -static inline void k_init_timer(struct timer_list *timer, Handler routine, - unsigned long argument) -{ - setup_timer(timer, routine, argument); -} - -/** - * k_start_timer - start a timer - * @timer: pointer to timer structure - * @msec: time to delay (in ms) - * - * Schedules a previously initialized timer for later execution. - * If timer is already running, the new timeout overrides the previous request. - * - * To ensure the timer doesn't expire before the specified delay elapses, - * the amount of delay is rounded up when converting to the jiffies - * then an additional jiffy is added to account for the fact that - * the starting time may be in the middle of the current jiffy. - */ -static inline void k_start_timer(struct timer_list *timer, unsigned long msec) -{ - mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1); -} - -/** - * k_cancel_timer - cancel a timer - * @timer: pointer to timer structure - * - * Cancels a previously initialized timer. - * Can be called safely even if the timer is already inactive. - * - * WARNING: Must not be called when holding locks required by the timer's - * timeout routine, otherwise deadlock can occur on SMP systems! - */ -static inline void k_cancel_timer(struct timer_list *timer) -{ - del_timer_sync(timer); -} - -/** - * k_term_timer - terminate a timer - * @timer: pointer to timer structure - * - * Prevents further use of a previously initialized timer. - * - * WARNING: Caller must ensure timer isn't currently running. - * - * (Do not "enhance" this routine to automatically cancel an active timer, - * otherwise deadlock can arise when a timeout routine calls k_term_timer.) - */ -static inline void k_term_timer(struct timer_list *timer) -{ -} - -/* - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM LL_MAX_HEADER - -struct tipc_skb_cb { - void *handle; - struct sk_buff *tail; - bool deferred; - bool wakeup_pending; - bool bundling; - u16 chain_sz; - u16 chain_imp; -}; - -#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) - -static inline struct tipc_msg *buf_msg(struct sk_buff *skb) -{ - return (struct tipc_msg *)skb->data; -} - -struct sk_buff *tipc_buf_acquire(u32 size); - #endif diff --git a/net/tipc/discover.c b/net/tipc/discover.c index aa722a42ef8b..5b40cb89ff0a 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -38,15 +38,20 @@ #include "link.h" #include "discover.h" -#define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */ -#define TIPC_LINK_REQ_FAST 1000 /* max delay if bearer has no links */ -#define TIPC_LINK_REQ_SLOW 60000 /* max delay if bearer has links */ -#define TIPC_LINK_REQ_INACTIVE 0xffffffff /* indicates no timer in use */ +/* min delay during bearer start up */ +#define TIPC_LINK_REQ_INIT msecs_to_jiffies(125) +/* max delay if bearer has no links */ +#define TIPC_LINK_REQ_FAST msecs_to_jiffies(1000) +/* max delay if bearer has links */ +#define TIPC_LINK_REQ_SLOW msecs_to_jiffies(60000) +/* indicates no timer in use */ +#define TIPC_LINK_REQ_INACTIVE 0xffffffff /** * struct tipc_link_req - information about an ongoing link setup request * @bearer_id: identity of bearer issuing requests + * @net: network namespace instance * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) @@ -58,31 +63,34 @@ struct tipc_link_req { u32 bearer_id; struct tipc_media_addr dest; + struct net *net; u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; struct timer_list timer; - unsigned int timer_intv; + unsigned long timer_intv; }; /** * tipc_disc_init_msg - initialize a link setup message + * @net: the applicable net namespace * @type: message type (request or response) * @b_ptr: ptr to bearer issuing message */ -static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, +static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, struct tipc_bearer *b_ptr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg; u32 dest_domain = b_ptr->domain; msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); + tipc_msg_init(net, msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); + msg_set_node_sig(msg, tn->random); msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); + msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); } @@ -107,11 +115,14 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, /** * tipc_disc_rcv - handle incoming discovery message (request or response) + * @net: the applicable net namespace * @buf: buffer containing message * @bearer: bearer that message arrived on */ -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *bearer) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_link *link; struct tipc_media_addr maddr; @@ -133,7 +144,7 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) kfree_skb(buf); /* Ensure message from node is valid and communication is permitted */ - if (net_id != tipc_net_id) + if (net_id != tn->net_id) return; if (maddr.broadcast) return; @@ -142,20 +153,20 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) if (!tipc_addr_node_valid(onode)) return; - if (in_own_node(onode)) { + if (in_own_node(net, onode)) { if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) - disc_dupl_alert(bearer, tipc_own_addr, &maddr); + disc_dupl_alert(bearer, tn->own_addr, &maddr); return; } - if (!tipc_in_scope(ddom, tipc_own_addr)) + if (!tipc_in_scope(ddom, tn->own_addr)) return; if (!tipc_in_scope(bearer->domain, onode)) return; /* Locate, or if necessary, create, node: */ - node = tipc_node_find(onode); + node = tipc_node_find(net, onode); if (!node) - node = tipc_node_create(onode); + node = tipc_node_create(net, onode); if (!node) return; @@ -244,8 +255,8 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) if (respond && (mtyp == DSC_REQ_MSG)) { rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { - tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer); - tipc_bearer_send(bearer->identity, rbuf, &maddr); + tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); + tipc_bearer_send(net, bearer->identity, rbuf, &maddr); kfree_skb(rbuf); } } @@ -265,7 +276,7 @@ static void disc_update(struct tipc_link_req *req) if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) || (req->timer_intv > TIPC_LINK_REQ_FAST)) { req->timer_intv = TIPC_LINK_REQ_INIT; - k_start_timer(&req->timer, req->timer_intv); + mod_timer(&req->timer, jiffies + req->timer_intv); } } } @@ -295,12 +306,13 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) /** * disc_timeout - send a periodic link setup request - * @req: ptr to link request structure + * @data: ptr to link request structure * * Called whenever a link setup request timer associated with a bearer expires. */ -static void disc_timeout(struct tipc_link_req *req) +static void disc_timeout(unsigned long data) { + struct tipc_link_req *req = (struct tipc_link_req *)data; int max_delay; spin_lock_bh(&req->lock); @@ -318,7 +330,7 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest); req->timer_intv *= 2; @@ -329,20 +341,22 @@ static void disc_timeout(struct tipc_link_req *req) if (req->timer_intv > max_delay) req->timer_intv = max_delay; - k_start_timer(&req->timer, req->timer_intv); + mod_timer(&req->timer, jiffies + req->timer_intv); exit: spin_unlock_bh(&req->lock); } /** * tipc_disc_create - create object to send periodic link setup requests + * @net: the applicable net namespace * @b_ptr: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain to which links can be established * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -356,17 +370,18 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) return -ENOMEM; } - tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); + req->net = net; req->bearer_id = b_ptr->identity; req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); - k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); - k_start_timer(&req->timer, req->timer_intv); + setup_timer(&req->timer, disc_timeout, (unsigned long)req); + mod_timer(&req->timer, jiffies + req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); return 0; } @@ -376,28 +391,29 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) */ void tipc_disc_delete(struct tipc_link_req *req) { - k_cancel_timer(&req->timer); - k_term_timer(&req->timer); + del_timer_sync(&req->timer); kfree_skb(req->buf); kfree(req); } /** * tipc_disc_reset - reset object to send periodic link setup requests + * @net: the applicable net namespace * @b_ptr: ptr to bearer issuing requests * @dest_domain: network domain to which links can be established */ -void tipc_disc_reset(struct tipc_bearer *b_ptr) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) { struct tipc_link_req *req = b_ptr->link_req; spin_lock_bh(&req->lock); - tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + req->net = net; req->bearer_id = b_ptr->identity; req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; - k_start_timer(&req->timer, req->timer_intv); - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + mod_timer(&req->timer, jiffies + req->timer_intv); + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); spin_unlock_bh(&req->lock); } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 515b57392f4d..c9b12770c5ed 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -39,11 +39,13 @@ struct tipc_link_req; -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); -void tipc_disc_reset(struct tipc_bearer *b_ptr); +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 23bcc1132365..2846ad802e43 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -101,19 +101,23 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { */ #define START_CHANGEOVER 100000u -static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, +static void link_handle_out_of_seq_msg(struct net *net, + struct tipc_link *l_ptr, struct sk_buff *buf); -static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); -static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, +static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr, + struct sk_buff *buf); +static int tipc_link_tunnel_rcv(struct net *net, struct tipc_node *n_ptr, struct sk_buff **buf); -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); -static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf); -static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf); +static int tipc_link_input(struct net *net, struct tipc_link *l, + struct sk_buff *buf); +static int tipc_link_prepare_input(struct net *net, struct tipc_link *l, + struct sk_buff **buf); /* * Simple link routines @@ -125,11 +129,13 @@ static unsigned int align(unsigned int i) static void link_init_max_pkt(struct tipc_link *l_ptr) { + struct tipc_node *node = l_ptr->owner; + struct tipc_net *tn = net_generic(node->net, tipc_net_id); struct tipc_bearer *b_ptr; u32 max_pkt; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); if (!b_ptr) { rcu_read_unlock(); return; @@ -169,8 +175,9 @@ int tipc_link_is_active(struct tipc_link *l_ptr) * link_timeout - handle expiration of link timer * @l_ptr: pointer to link */ -static void link_timeout(struct tipc_link *l_ptr) +static void link_timeout(unsigned long data) { + struct tipc_link *l_ptr = (struct tipc_link *)data; struct sk_buff *skb; tipc_node_lock(l_ptr->owner); @@ -217,9 +224,9 @@ static void link_timeout(struct tipc_link *l_ptr) tipc_node_unlock(l_ptr->owner); } -static void link_set_timer(struct tipc_link *l_ptr, u32 time) +static void link_set_timer(struct tipc_link *link, unsigned long time) { - k_start_timer(&l_ptr->timer, time); + mod_timer(&link->timer, jiffies + time); } /** @@ -234,6 +241,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_msg *msg; char *if_name; @@ -263,8 +271,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->addr = peer; if_name = strchr(b_ptr->name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", - tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), + tipc_zone(tn->own_addr), tipc_cluster(tn->own_addr), + tipc_node(tn->own_addr), if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); /* note: peer i/f name is updated by reset/activate message */ @@ -278,9 +286,10 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; - tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(n_ptr->net, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, + l_ptr->addr); msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, (tipc_random & 0xffff)); + msg_set_session(msg, (tn->random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); @@ -299,21 +308,22 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, tipc_node_attach_link(n_ptr, l_ptr); - k_init_timer(&l_ptr->timer, (Handler)link_timeout, - (unsigned long)l_ptr); + setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr); link_state_event(l_ptr, STARTING_EVT); return l_ptr; } -void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_node *n_ptr; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) { @@ -350,10 +360,12 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) static bool link_schedule_user(struct tipc_link *link, u32 oport, uint chain_sz, uint imp) { + struct net *net = link->owner->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf; - buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, tipc_own_addr, - tipc_own_addr, oport, 0, 0); + buf = tipc_msg_create(net, SOCK_WAKEUP, 0, INT_H_SIZE, 0, tn->own_addr, + tn->own_addr, oport, 0, 0); if (!buf) return false; TIPC_SKB_CB(buf)->chain_sz = chain_sz; @@ -425,7 +437,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) return; tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr); + tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; @@ -448,13 +460,14 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_reset_statistics(l_ptr); } -void tipc_link_reset_list(unsigned int bearer_id) +void tipc_link_reset_list(struct net *net, unsigned int bearer_id) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_node *n_ptr; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) @@ -464,11 +477,14 @@ void tipc_link_reset_list(unsigned int bearer_id) rcu_read_unlock(); } -static void link_activate(struct tipc_link *l_ptr) +static void link_activate(struct tipc_link *link) { - l_ptr->next_in_no = l_ptr->stats.recv_info = 1; - tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr); + struct tipc_node *node = link->owner; + + link->next_in_no = 1; + link->stats.recv_info = 1; + tipc_node_link_up(node, link); + tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); } /** @@ -479,7 +495,7 @@ static void link_activate(struct tipc_link *l_ptr) static void link_state_event(struct tipc_link *l_ptr, unsigned int event) { struct tipc_link *other; - u32 cont_intv = l_ptr->continuity_interval; + unsigned long cont_intv = l_ptr->cont_intv; if (l_ptr->flags & LINK_STOPPED) return; @@ -522,8 +538,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv / 4); break; case RESET_MSG: - pr_info("%s<%s>, requested by peer\n", link_rst_msg, - l_ptr->name); + pr_debug("%s<%s>, requested by peer\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -533,7 +549,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; default: - pr_err("%s%u in WW state\n", link_unk_evt, event); + pr_debug("%s%u in WW state\n", link_unk_evt, event); } break; case WORKING_UNKNOWN: @@ -545,8 +561,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: - pr_info("%s<%s>, requested by peer while probing\n", - link_rst_msg, l_ptr->name); + pr_debug("%s<%s>, requested by peer while probing\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -572,8 +588,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ - pr_warn("%s<%s>, peer not responding\n", - link_rst_msg, l_ptr->name); + pr_debug("%s<%s>, peer not responding\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; @@ -700,7 +716,8 @@ drop: * Only the socket functions tipc_send_stream() and tipc_send_packet() need * to act on the return value, since they may need to do more send attempts. */ -int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list) { struct tipc_msg *msg = buf_msg(skb_peek(list)); uint psz = msg_size(msg); @@ -733,13 +750,14 @@ int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) if (skb_queue_len(outqueue) < sndlim) { __skb_queue_tail(outqueue, skb); - tipc_bearer_send(link->bearer_id, skb, addr); + tipc_bearer_send(net, link->bearer_id, + skb, addr); link->next_out = NULL; link->unacked_window = 0; } else if (tipc_msg_bundle(outqueue, skb, mtu)) { link->stats.sent_bundled++; continue; - } else if (tipc_msg_make_bundle(outqueue, skb, mtu, + } else if (tipc_msg_make_bundle(net, outqueue, skb, mtu, link->addr)) { link->stats.sent_bundled++; link->stats.sent_bundles++; @@ -767,19 +785,21 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) struct sk_buff_head head; skb2list(skb, &head); - return __tipc_link_xmit(link, &head); + return __tipc_link_xmit(link->owner->net, link, &head); } -int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector) +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, + u32 selector) { struct sk_buff_head head; skb2list(skb, &head); - return tipc_link_xmit(&head, dnode, selector); + return tipc_link_xmit(net, &head, dnode, selector); } /** * tipc_link_xmit() is the general link level function for message sending + * @net: the applicable net namespace * @list: chain of buffers containing message * @dsz: amount of user data to be sent * @dnode: address of destination node @@ -787,30 +807,31 @@ int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector) * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_link_xmit(struct sk_buff_head *list, u32 dnode, u32 selector) +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, + u32 selector) { struct tipc_link *link = NULL; struct tipc_node *node; int rc = -EHOSTUNREACH; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (node) { tipc_node_lock(node); link = node->active_links[selector & 1]; if (link) - rc = __tipc_link_xmit(link, list); + rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); } if (link) return rc; - if (likely(in_own_node(dnode))) { + if (likely(in_own_node(net, dnode))) { /* As a node local message chain never contains more than one * buffer, we just need to dequeue one SKB buffer from the * head list. */ - return tipc_sk_rcv(__skb_dequeue(list)); + return tipc_sk_rcv(net, __skb_dequeue(list)); } __skb_queue_purge(list); @@ -835,7 +856,8 @@ static void tipc_link_sync_xmit(struct tipc_link *link) return; msg = buf_msg(skb); - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr); + tipc_msg_init(link->owner->net, msg, BCAST_PROTOCOL, STATE_MSG, + INT_H_SIZE, link->addr); msg_set_last_bcast(msg, link->owner->bclink.acked); __tipc_link_xmit_skb(link, skb); } @@ -890,7 +912,8 @@ void tipc_link_push_packets(struct tipc_link *l_ptr) msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); if (msg_user(msg) == MSG_BUNDLER) TIPC_SKB_CB(skb)->bundling = false; - tipc_bearer_send(l_ptr->bearer_id, skb, + tipc_bearer_send(l_ptr->owner->net, + l_ptr->bearer_id, skb, &l_ptr->media_addr); l_ptr->next_out = tipc_skb_queue_next(outqueue, skb); } else { @@ -923,6 +946,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); + struct net *net = l_ptr->owner->net; pr_warn("Retransmission failure on link <%s>\n", l_ptr->name); @@ -940,7 +964,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, pr_cont("Outstanding acks: %lu\n", (unsigned long) TIPC_SKB_CB(buf)->handle); - n_ptr = tipc_bclink_retransmit_to(); + n_ptr = tipc_bclink_retransmit_to(net); tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); @@ -955,7 +979,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, tipc_node_unlock(n_ptr); - tipc_bclink_set_flags(TIPC_BCLINK_RESET); + tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); l_ptr->stale_count = 0; } } @@ -987,7 +1011,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, msg = buf_msg(skb); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->bearer_id, skb, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, + &l_ptr->media_addr); retransmits--; l_ptr->stats.retransmitted++; } @@ -1063,14 +1088,16 @@ static int link_recv_buf_validate(struct sk_buff *buf) /** * tipc_rcv - process TIPC packets/messages arriving from off-node + * @net: the applicable net namespace * @skb: TIPC packet * @b_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff_head head; struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -1096,19 +1123,19 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) if (unlikely(msg_non_seq(msg))) { if (msg_user(msg) == LINK_CONFIG) - tipc_disc_rcv(skb, b_ptr); + tipc_disc_rcv(net, skb, b_ptr); else - tipc_bclink_rcv(skb); + tipc_bclink_rcv(net, skb); continue; } /* Discard unicast link messages destined for another node */ if (unlikely(!msg_short(msg) && - (msg_destnode(msg) != tipc_own_addr))) + (msg_destnode(msg) != tn->own_addr))) goto discard; /* Locate neighboring node that sent message */ - n_ptr = tipc_node_find(msg_prevnode(msg)); + n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; tipc_node_lock(n_ptr); @@ -1159,7 +1186,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { - tipc_link_proto_rcv(l_ptr, skb); + tipc_link_proto_rcv(net, l_ptr, skb); link_retrieve_defq(l_ptr, &head); tipc_node_unlock(n_ptr); continue; @@ -1179,7 +1206,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) /* Link is now in state WORKING_WORKING */ if (unlikely(seq_no != mod(l_ptr->next_in_no))) { - link_handle_out_of_seq_msg(l_ptr, skb); + link_handle_out_of_seq_msg(net, l_ptr, skb); link_retrieve_defq(l_ptr, &head); tipc_node_unlock(n_ptr); continue; @@ -1193,13 +1220,13 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } - if (tipc_link_prepare_input(l_ptr, &skb)) { + if (tipc_link_prepare_input(net, l_ptr, &skb)) { tipc_node_unlock(n_ptr); continue; } tipc_node_unlock(n_ptr); - if (tipc_link_input(l_ptr, skb) != 0) + if (tipc_link_input(net, l_ptr, skb) != 0) goto discard; continue; unlock_discard: @@ -1216,7 +1243,8 @@ discard: * * Node lock must be held */ -static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) +static int tipc_link_prepare_input(struct net *net, struct tipc_link *l, + struct sk_buff **buf) { struct tipc_node *n; struct tipc_msg *msg; @@ -1226,7 +1254,7 @@ static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) msg = buf_msg(*buf); switch (msg_user(msg)) { case CHANGEOVER_PROTOCOL: - if (tipc_link_tunnel_rcv(n, buf)) + if (tipc_link_tunnel_rcv(net, n, buf)) res = 0; break; case MSG_FRAGMENTER: @@ -1258,7 +1286,8 @@ static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) /** * tipc_link_input - Deliver message too higher layers */ -static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf) +static int tipc_link_input(struct net *net, struct tipc_link *l, + struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); int res = 0; @@ -1269,13 +1298,13 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf) case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: case CONN_MANAGER: - tipc_sk_rcv(buf); + tipc_sk_rcv(net, buf); break; case NAME_DISTRIBUTOR: - tipc_named_rcv(buf); + tipc_named_rcv(net, buf); break; case MSG_BUNDLER: - tipc_link_bundle_rcv(buf); + tipc_link_bundle_rcv(net, buf); break; default: res = -EINVAL; @@ -1325,13 +1354,14 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) /* * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet */ -static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, +static void link_handle_out_of_seq_msg(struct net *net, + struct tipc_link *l_ptr, struct sk_buff *buf) { u32 seq_no = buf_seqno(buf); if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) { - tipc_link_proto_rcv(l_ptr, buf); + tipc_link_proto_rcv(net, l_ptr, buf); return; } @@ -1381,7 +1411,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_type(msg, msg_typ); msg_set_net_plane(msg, l_ptr->net_plane); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); + msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net)); if (msg_typ == STATE_MSG) { u32 next_sent = mod(l_ptr->next_out_no); @@ -1445,7 +1475,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, + &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); } @@ -1455,8 +1486,10 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, * Note that network plane id propagates through the network, and may * change at any time. The node with lowest address rules */ -static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) +static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr, + struct sk_buff *buf) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 rec_gap = 0; u32 max_pkt_info; u32 max_pkt_ack; @@ -1468,7 +1501,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) goto exit; if (l_ptr->net_plane != msg_net_plane(msg)) - if (tipc_own_addr > msg_prevnode(msg)) + if (tn->own_addr > msg_prevnode(msg)) l_ptr->net_plane = msg_net_plane(msg); switch (msg_type(msg)) { @@ -1535,9 +1568,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) if (msg_linkprio(msg) && (msg_linkprio(msg) != l_ptr->priority)) { - pr_warn("%s<%s>, priority change %u->%u\n", - link_rst_msg, l_ptr->name, l_ptr->priority, - msg_linkprio(msg)); + pr_debug("%s<%s>, priority change %u->%u\n", + link_rst_msg, l_ptr->name, + l_ptr->priority, msg_linkprio(msg)); l_ptr->priority = msg_linkprio(msg); tipc_link_reset(l_ptr); /* Enforce change to take effect */ break; @@ -1571,7 +1604,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) /* Protocol message before retransmits, reduce loss risk */ if (l_ptr->owner->bclink.recv_permitted) - tipc_bclink_update_link_state(l_ptr->owner, + tipc_bclink_update_link_state(net, l_ptr->owner, msg_last_bcast(msg)); if (rec_gap || (msg_probe(msg))) { @@ -1636,8 +1669,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (!tunnel) return; - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(l_ptr->owner->net, &tunnel_hdr, CHANGEOVER_PROTOCOL, + ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); @@ -1694,8 +1727,8 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct sk_buff *skb; struct tipc_msg tunnel_hdr; - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(l_ptr->owner->net, &tunnel_hdr, CHANGEOVER_PROTOCOL, + DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue)); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); skb_queue_walk(&l_ptr->outqueue, skb) { @@ -1748,7 +1781,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. * Owner node is locked. */ -static void tipc_link_dup_rcv(struct tipc_link *l_ptr, +static void tipc_link_dup_rcv(struct net *net, struct tipc_link *l_ptr, struct sk_buff *t_buf) { struct sk_buff *buf; @@ -1763,7 +1796,7 @@ static void tipc_link_dup_rcv(struct tipc_link *l_ptr, } /* Add buffer to deferred queue, if applicable: */ - link_handle_out_of_seq_msg(l_ptr, buf); + link_handle_out_of_seq_msg(net, l_ptr, buf); } /* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet @@ -1817,7 +1850,7 @@ exit: * returned to the active link for delivery upwards. * Owner node is locked. */ -static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, +static int tipc_link_tunnel_rcv(struct net *net, struct tipc_node *n_ptr, struct sk_buff **buf) { struct sk_buff *t_buf = *buf; @@ -1835,7 +1868,7 @@ static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, goto exit; if (msg_type(t_msg) == DUPLICATE_MSG) - tipc_link_dup_rcv(l_ptr, t_buf); + tipc_link_dup_rcv(net, l_ptr, t_buf); else if (msg_type(t_msg) == ORIGINAL_MSG) *buf = tipc_link_failover_rcv(l_ptr, t_buf); else @@ -1848,7 +1881,7 @@ exit: /* * Bundler functionality: */ -void tipc_link_bundle_rcv(struct sk_buff *buf) +void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf) { u32 msgcount = msg_msgcnt(buf_msg(buf)); u32 pos = INT_H_SIZE; @@ -1865,13 +1898,13 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) pos += align(msg_size(omsg)); if (msg_isdata(omsg)) { if (unlikely(msg_type(omsg) == TIPC_MCAST_MSG)) - tipc_sk_mcast_rcv(obuf); + tipc_sk_mcast_rcv(net, obuf); else - tipc_sk_rcv(obuf); + tipc_sk_rcv(net, obuf); } else if (msg_user(omsg) == CONN_MANAGER) { - tipc_sk_rcv(obuf); + tipc_sk_rcv(net, obuf); } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { - tipc_named_rcv(obuf); + tipc_named_rcv(net, obuf); } else { pr_warn("Illegal bundled msg: %u\n", msg_user(omsg)); kfree_skb(obuf); @@ -1880,15 +1913,16 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) kfree_skb(buf); } -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) { - if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) + unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; + + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) return; - l_ptr->tolerance = tolerance; - l_ptr->continuity_interval = - ((tolerance / 4) > 500) ? 500 : tolerance / 4; - l_ptr->abort_limit = tolerance / (l_ptr->continuity_interval / 4); + l_ptr->tolerance = tol; + l_ptr->cont_intv = msecs_to_jiffies(intv); + l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); } void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) @@ -1911,22 +1945,25 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) } /* tipc_link_find_owner - locate owner node of link by link's name + * @net: the applicable net namespace * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ -static struct tipc_node *tipc_link_find_owner(const char *link_name, +static struct tipc_node *tipc_link_find_owner(struct net *net, + const char *link_name, unsigned int *bearer_id) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_node *n_ptr; - struct tipc_node *found_node = 0; + struct tipc_node *found_node = NULL; int i; *bearer_id = 0; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { l_ptr = n_ptr->links[i]; @@ -1970,6 +2007,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value) /** * link_cmd_set_value - change priority/tolerance/window for link/bearer/media + * @net: the applicable net namespace * @name: ptr to link, bearer, or media name * @new_value: new value of link, bearer, or media setting * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) @@ -1978,7 +2016,8 @@ static int link_value_is_valid(u16 cmd, u32 new_value) * * Returns 0 if value updated and negative value on error. */ -static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) +static int link_cmd_set_value(struct net *net, const char *name, u32 new_value, + u16 cmd) { struct tipc_node *node; struct tipc_link *l_ptr; @@ -1987,7 +2026,7 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) int bearer_id; int res = 0; - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (node) { tipc_node_lock(node); l_ptr = node->links[bearer_id]; @@ -2016,7 +2055,7 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) return res; } - b_ptr = tipc_bearer_find(name); + b_ptr = tipc_bearer_find(net, name); if (b_ptr) { switch (cmd) { case TIPC_CMD_SET_LINK_TOL: @@ -2055,8 +2094,8 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) return res; } -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, - u16 cmd) +struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area, + int req_tlv_space, u16 cmd) { struct tipc_link_config *args; u32 new_value; @@ -2074,13 +2113,13 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space if (!strcmp(args->name, tipc_bclink_name)) { if ((cmd == TIPC_CMD_SET_LINK_WINDOW) && - (tipc_bclink_set_queue_limits(new_value) == 0)) + (tipc_bclink_set_queue_limits(net, new_value) == 0)) return tipc_cfg_reply_none(); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change setting on broadcast link)"); } - res = link_cmd_set_value(args->name, new_value, cmd); + res = link_cmd_set_value(net, args->name, new_value, cmd); if (res) return tipc_cfg_reply_error_string("cannot change link setting"); @@ -2098,7 +2137,9 @@ static void link_reset_statistics(struct tipc_link *l_ptr) l_ptr->stats.recv_info = l_ptr->next_in_no; } -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space) +struct sk_buff *tipc_link_cmd_reset_stats(struct net *net, + const void *req_tlv_area, + int req_tlv_space) { char *link_name; struct tipc_link *l_ptr; @@ -2110,11 +2151,11 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ link_name = (char *)TLV_DATA(req_tlv_area); if (!strcmp(link_name, tipc_bclink_name)) { - if (tipc_bclink_reset_stats()) + if (tipc_bclink_reset_stats(net)) return tipc_cfg_reply_error_string("link not found"); return tipc_cfg_reply_none(); } - node = tipc_link_find_owner(link_name, &bearer_id); + node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return tipc_cfg_reply_error_string("link not found"); @@ -2139,13 +2180,15 @@ static u32 percent(u32 count, u32 total) /** * tipc_link_stats - print link statistics + * @net: the applicable net namespace * @name: link name * @buf: print buffer area * @buf_size: size of print buffer area * * Returns length of print buffer data string (or 0 if error) */ -static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) +static int tipc_link_stats(struct net *net, const char *name, char *buf, + const u32 buf_size) { struct tipc_link *l; struct tipc_stats *s; @@ -2156,9 +2199,9 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) int ret; if (!strcmp(name, tipc_bclink_name)) - return tipc_bclink_stats(buf, buf_size); + return tipc_bclink_stats(net, buf, buf_size); - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return 0; @@ -2235,7 +2278,9 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) return ret; } -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space) +struct sk_buff *tipc_link_cmd_show_stats(struct net *net, + const void *req_tlv_area, + int req_tlv_space) { struct sk_buff *buf; struct tlv_desc *rep_tlv; @@ -2253,7 +2298,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s rep_tlv = (struct tlv_desc *)buf->data; pb = TLV_DATA(rep_tlv); pb_len = ULTRA_STRING_MAX_LEN; - str_len = tipc_link_stats((char *)TLV_DATA(req_tlv_area), + str_len = tipc_link_stats(net, (char *)TLV_DATA(req_tlv_area), pb, pb_len); if (!str_len) { kfree_skb(buf); @@ -2266,39 +2311,13 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s return buf; } -/** - * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination - * @dest: network address of destination node - * @selector: used to select from set of active links - * - * If no active link can be found, uses default maximum packet size. - */ -u32 tipc_link_get_max_pkt(u32 dest, u32 selector) -{ - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - u32 res = MAX_PKT_DEFAULT; - - if (dest == tipc_own_addr) - return MAX_MSG_SIZE; - - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) - res = l_ptr->max_pkt; - tipc_node_unlock(n_ptr); - } - return res; -} - static void link_print(struct tipc_link *l_ptr, const char *str) { + struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); if (b_ptr) pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); rcu_read_unlock(); @@ -2362,6 +2381,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) struct tipc_link *link; struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2377,7 +2397,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2493,12 +2513,14 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) +static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_LINK_GET); @@ -2512,7 +2534,7 @@ static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, - tipc_cluster_mask(tipc_own_addr))) + tipc_cluster_mask(tn->own_addr))) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt)) goto attr_msg_full; @@ -2562,9 +2584,8 @@ msg_full: } /* Caller should hold node lock */ -static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, - struct tipc_node *node, - u32 *prev_link) +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) { u32 i; int err; @@ -2575,7 +2596,7 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, if (!node->links[i]) continue; - err = __tipc_nl_add_link(msg, node->links[i]); + err = __tipc_nl_add_link(net, msg, node->links[i]); if (err) return err; } @@ -2586,6 +2607,8 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_nl_msg msg; u32 prev_node = cb->args[0]; @@ -2603,7 +2626,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); if (prev_node) { - node = tipc_node_find(prev_node); + node = tipc_node_find(net, prev_node); if (!node) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the @@ -2615,9 +2638,11 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out; } - list_for_each_entry_continue_rcu(node, &tipc_node_list, list) { + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { tipc_node_lock(node); - err = __tipc_nl_add_node_links(&msg, node, &prev_link); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); tipc_node_unlock(node); if (err) goto out; @@ -2625,13 +2650,14 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) prev_node = node->addr; } } else { - err = tipc_nl_add_bc_link(&msg); + err = tipc_nl_add_bc_link(net, &msg); if (err) goto out; - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_lock(node); - err = __tipc_nl_add_node_links(&msg, node, &prev_link); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); tipc_node_unlock(node); if (err) goto out; @@ -2652,6 +2678,7 @@ out: int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct sk_buff *ans_skb; struct tipc_nl_msg msg; struct tipc_link *link; @@ -2664,7 +2691,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) return -EINVAL; name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2683,7 +2710,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_link(&msg, link); + err = __tipc_nl_add_link(net, &msg, link); if (err) goto err_out; @@ -2706,6 +2733,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_link *link; struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2722,13 +2750,13 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(); + err = tipc_bclink_reset_stats(net); if (err) return err; return 0; } - node = tipc_link_find_owner(link_name, &bearer_id); + node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return -EINVAL; diff --git a/net/tipc/link.h b/net/tipc/link.h index 55812e87ca1e..9df7fa4d3bdd 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -41,6 +41,10 @@ #include "msg.h" #include "node.h" +/* TIPC-specific error codes +*/ +#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ + /* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 @@ -105,7 +109,7 @@ struct tipc_stats { * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @continuity_interval: link continuity testing interval [in ms] + * @cont_intv: link continuity testing interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state @@ -146,7 +150,7 @@ struct tipc_link { u32 peer_bearer_id; u32 bearer_id; u32 tolerance; - u32 continuity_interval; + unsigned long cont_intv; u32 abort_limit; int state; u32 fsm_msg_cnt; @@ -196,28 +200,32 @@ struct tipc_port; struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); -void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down); +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); void tipc_link_purge_queues(struct tipc_link *l_ptr); -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, - int req_tlv_space, - u16 cmd); -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, +struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area, + int req_tlv_space, u16 cmd); +struct sk_buff *tipc_link_cmd_show_stats(struct net *net, + const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, +struct sk_buff *tipc_link_cmd_reset_stats(struct net *net, + const void *req_tlv_area, int req_tlv_space); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); -void tipc_link_reset_list(unsigned int bearer_id); -int tipc_link_xmit_skb(struct sk_buff *skb, u32 dest, u32 selector); -int tipc_link_xmit(struct sk_buff_head *list, u32 dest, u32 selector); -int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list); -u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -void tipc_link_bundle_rcv(struct sk_buff *buf); +void tipc_link_reset_list(struct net *net, unsigned int bearer_id); +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, + u32 selector); +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, + u32 selector); +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list); +void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); void tipc_link_push_packets(struct tipc_link *l_ptr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index a687b30a699c..18aba9e99345 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -34,6 +34,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" #include "msg.h" #include "addr.h" @@ -46,25 +47,50 @@ static unsigned int align(unsigned int i) return (i + 3) & ~3u; } -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode) +/** + * tipc_buf_acquire - creates a TIPC message buffer + * @size: message size (including TIPC header) + * + * Returns a new buffer with data pointers set to the specified size. + * + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. + */ +struct sk_buff *tipc_buf_acquire(u32 size) { + struct sk_buff *skb; + unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; + + skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + if (skb) { + skb_reserve(skb, BUF_HEADROOM); + skb_put(skb, size); + skb->next = NULL; + } + return skb; +} + +void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + memset(m, 0, hsize); msg_set_version(m); msg_set_user(m, user); msg_set_hdr_sz(m, hsize); msg_set_size(m, hsize); - msg_set_prevnode(m, tipc_own_addr); + msg_set_prevnode(m, tn->own_addr); msg_set_type(m, type); if (hsize > SHORT_H_SIZE) { - msg_set_orignode(m, tipc_own_addr); + msg_set_orignode(m, tn->own_addr); msg_set_destnode(m, destnode); } } -struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, - uint data_sz, u32 dnode, u32 onode, - u32 dport, u32 oport, int errcode) +struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode) { struct tipc_msg *msg; struct sk_buff *buf; @@ -74,7 +100,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, return NULL; msg = buf_msg(buf); - tipc_msg_init(msg, user, type, hdr_sz, dnode); + tipc_msg_init(net, msg, user, type, hdr_sz, dnode); msg_set_size(msg, hdr_sz + data_sz); msg_set_prevnode(msg, onode); msg_set_origport(msg, oport); @@ -170,8 +196,8 @@ err: * * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, - int dsz, int pktmax, struct sk_buff_head *list) +int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int pktmax, struct sk_buff_head *list) { int mhsz = msg_hdr_sz(mhdr); int msz = mhsz + dsz; @@ -191,6 +217,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, skb = tipc_buf_acquire(msz); if (unlikely(!skb)) return -ENOMEM; + skb_orphan(skb); __skb_queue_tail(list, skb); skb_copy_to_linear_data(skb, mhdr, mhsz); pktpos = skb->data + mhsz; @@ -202,8 +229,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, } /* Prepare reusable fragment header */ - tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(mhdr)); + tipc_msg_init(net, &pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE, + msg_destnode(mhdr)); msg_set_size(&pkthdr, pktmax); msg_set_fragm_no(&pkthdr, pktno); @@ -211,6 +238,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, skb = tipc_buf_acquire(pktmax); if (!skb) return -ENOMEM; + skb_orphan(skb); __skb_queue_tail(list, skb); pktpos = skb->data; skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); @@ -244,6 +272,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, rc = -ENOMEM; goto error; } + skb_orphan(skb); __skb_queue_tail(list, skb); msg_set_type(&pkthdr, FRAGMENT); msg_set_size(&pkthdr, pktsz); @@ -312,8 +341,8 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) * Replaces buffer if successful * Returns true if success, otherwise false */ -bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, - u32 mtu, u32 dnode) +bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list, + struct sk_buff *skb, u32 mtu, u32 dnode) { struct sk_buff *bskb; struct tipc_msg *bmsg; @@ -336,7 +365,7 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, skb_trim(bskb, INT_H_SIZE); bmsg = buf_msg(bskb); - tipc_msg_init(bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode); + tipc_msg_init(net, bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode); msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); @@ -353,8 +382,10 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, * Consumes buffer if failure * Returns true if success, otherwise false */ -bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode, + int err) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg = buf_msg(buf); uint imp = msg_importance(msg); struct tipc_msg ohdr; @@ -374,7 +405,7 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); - msg_set_prevnode(msg, tipc_own_addr); + msg_set_prevnode(msg, tn->own_addr); if (!msg_short(msg)) { msg_set_orignode(msg, msg_destnode(&ohdr)); msg_set_destnode(msg, msg_orignode(&ohdr)); @@ -399,7 +430,7 @@ exit: * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error * code if message to be rejected */ -int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode) { struct tipc_msg *msg = buf_msg(buf); u32 dport; @@ -413,8 +444,8 @@ int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) if (msg_reroute_cnt(msg) > 0) return -TIPC_ERR_NO_NAME; - *dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), + *dnode = addr_domain(net, msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(net, msg_nametype(msg), msg_nameinst(msg), dnode); if (!dport) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d5c83d7ecb47..526ef345b70e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -37,7 +37,7 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "bearer.h" +#include <linux/tipc.h> /* * Constants and routines used to read and write TIPC payload message headers @@ -77,11 +77,37 @@ #define TIPC_MEDIA_ADDR_OFFSET 5 +/** + * TIPC message buffer code + * + * TIPC message buffer headroom reserves space for the worst-case + * link-level device header (in case the message is sent off-node). + * + * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields + * are word aligned for quicker access + */ +#define BUF_HEADROOM LL_MAX_HEADER + +struct tipc_skb_cb { + void *handle; + struct sk_buff *tail; + bool deferred; + bool wakeup_pending; + bool bundling; + u16 chain_sz; + u16 chain_imp; +}; + +#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) struct tipc_msg { __be32 hdr[15]; }; +static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +{ + return (struct tipc_msg *)skb->data; +} static inline u32 msg_word(struct tipc_msg *m, u32 pos) { @@ -721,27 +747,21 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) return msg_origport(m); } -bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); - -int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); - -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode); - -struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, - uint data_sz, u32 dnode, u32 onode, - u32 dport, u32 oport, int errcode); - +struct sk_buff *tipc_buf_acquire(u32 size); +bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode, + int err); +int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode); +void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode); +struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); - bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); - -bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, - u32 mtu, u32 dnode); - -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, - int dsz, int mtu, struct sk_buff_head *list); - +bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list, + struct sk_buff *skb, u32 mtu, u32 dnode); +int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int mtu, struct sk_buff_head *list); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index ba6083dca95b..7f31cd4badc4 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -68,29 +68,32 @@ static void publ_to_item(struct distr_item *i, struct publication *p) /** * named_prepare_buf - allocate & initialize a publication message */ -static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) +static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, + u32 dest) { struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); struct tipc_msg *msg; if (buf != NULL) { msg = buf_msg(buf); - tipc_msg_init(msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest); + tipc_msg_init(net, msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, + dest); msg_set_size(msg, INT_H_SIZE + size); } return buf; } -void named_cluster_distribute(struct sk_buff *skb) +void named_cluster_distribute(struct net *net, struct sk_buff *skb) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *oskb; struct tipc_node *node; u32 dnode; rcu_read_lock(); - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { dnode = node->addr; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) continue; if (!tipc_node_active_links(node)) continue; @@ -98,7 +101,7 @@ void named_cluster_distribute(struct sk_buff *skb) if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); - tipc_link_xmit_skb(oskb, dnode, dnode); + tipc_link_xmit_skb(net, oskb, dnode, dnode); } rcu_read_unlock(); @@ -108,18 +111,19 @@ void named_cluster_distribute(struct sk_buff *skb) /** * tipc_named_publish - tell other nodes about a new publication by this node */ -struct sk_buff *tipc_named_publish(struct publication *publ) +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf; struct distr_item *item; list_add_tail_rcu(&publ->local_list, - &tipc_nametbl->publ_list[publ->scope]); + &tn->nametbl->publ_list[publ->scope]); if (publ->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); + buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); if (!buf) { pr_warn("Publication distribution failure\n"); return NULL; @@ -133,7 +137,7 @@ struct sk_buff *tipc_named_publish(struct publication *publ) /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node */ -struct sk_buff *tipc_named_withdraw(struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -143,7 +147,7 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) if (publ->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); + buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { pr_warn("Withdrawal distribution failure\n"); return NULL; @@ -160,19 +164,21 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) * @dnode: node to be updated * @pls: linked list of publication items to be packed into buffer chain */ -static void named_distribute(struct sk_buff_head *list, u32 dnode, - struct list_head *pls) +static void named_distribute(struct net *net, struct sk_buff_head *list, + u32 dnode, struct list_head *pls) { struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE; + uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) * + ITEM_SIZE; uint msg_rem = msg_dsz; list_for_each_entry(publ, pls, local_list) { /* Prepare next buffer: */ if (!skb) { - skb = named_prepare_buf(PUBLICATION, msg_rem, dnode); + skb = named_prepare_buf(net, PUBLICATION, msg_rem, + dnode); if (!skb) { pr_warn("Bulk publication failure\n"); return; @@ -202,30 +208,32 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode, /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(u32 dnode) +void tipc_named_node_up(struct net *net, u32 dnode) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff_head head; __skb_queue_head_init(&head); rcu_read_lock(); - named_distribute(&head, dnode, - &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - named_distribute(&head, dnode, - &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); rcu_read_unlock(); - tipc_link_xmit(&head, dnode, dnode); + tipc_link_xmit(net, &head, dnode, dnode); } -static void tipc_publ_subscribe(struct publication *publ, u32 addr) +static void tipc_publ_subscribe(struct net *net, struct publication *publ, + u32 addr) { struct tipc_node *node; - if (in_own_node(addr)) + if (in_own_node(net, addr)) return; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (!node) { pr_warn("Node subscription rejected, unknown node 0x%x\n", addr); @@ -237,11 +245,12 @@ static void tipc_publ_subscribe(struct publication *publ, u32 addr) tipc_node_unlock(node); } -static void tipc_publ_unsubscribe(struct publication *publ, u32 addr) +static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, + u32 addr) { struct tipc_node *node; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (!node) return; @@ -256,16 +265,17 @@ static void tipc_publ_unsubscribe(struct publication *publ, u32 addr) * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. */ -static void tipc_publ_purge(struct publication *publ, u32 addr) +static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *p; - spin_lock_bh(&tipc_nametbl_lock); - p = tipc_nametbl_remove_publ(publ->type, publ->lower, + spin_lock_bh(&tn->nametbl_lock); + p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) - tipc_publ_unsubscribe(p, addr); - spin_unlock_bh(&tipc_nametbl_lock); + tipc_publ_unsubscribe(net, p, addr); + spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" @@ -277,12 +287,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr) kfree_rcu(p, rcu); } -void tipc_publ_notify(struct list_head *nsub_list, u32 addr) +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) - tipc_publ_purge(publ, addr); + tipc_publ_purge(net, publ, addr); } /** @@ -292,25 +302,28 @@ void tipc_publ_notify(struct list_head *nsub_list, u32 addr) * tipc_nametbl_lock must be held. * Returns the publication item if successful, otherwise NULL. */ -static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype) +static bool tipc_update_nametbl(struct net *net, struct distr_item *i, + u32 node, u32 dtype) { struct publication *publ = NULL; if (dtype == PUBLICATION) { - publ = tipc_nametbl_insert_publ(ntohl(i->type), ntohl(i->lower), + publ = tipc_nametbl_insert_publ(net, ntohl(i->type), + ntohl(i->lower), ntohl(i->upper), TIPC_CLUSTER_SCOPE, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_subscribe(publ, node); + tipc_publ_subscribe(net, publ, node); return true; } } else if (dtype == WITHDRAWAL) { - publ = tipc_nametbl_remove_publ(ntohl(i->type), ntohl(i->lower), + publ = tipc_nametbl_remove_publ(net, ntohl(i->type), + ntohl(i->lower), node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_unsubscribe(publ, node); + tipc_publ_unsubscribe(net, publ, node); kfree_rcu(publ, rcu); return true; } @@ -343,7 +356,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) * tipc_named_process_backlog - try to process any pending name table updates * from the network. */ -void tipc_named_process_backlog(void) +void tipc_named_process_backlog(struct net *net) { struct distr_queue_item *e, *tmp; char addr[16]; @@ -351,7 +364,7 @@ void tipc_named_process_backlog(void) list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { if (time_after(e->expires, now)) { - if (!tipc_update_nametbl(&e->i, e->node, e->dtype)) + if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) continue; } else { tipc_addr_string_fill(addr, e->node); @@ -369,21 +382,22 @@ void tipc_named_process_backlog(void) /** * tipc_named_rcv - process name table update message sent by another node */ -void tipc_named_rcv(struct sk_buff *buf) +void tipc_named_rcv(struct net *net, struct sk_buff *buf) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg = buf_msg(buf); struct distr_item *item = (struct distr_item *)msg_data(msg); u32 count = msg_data_sz(msg) / ITEM_SIZE; u32 node = msg_orignode(msg); - spin_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tn->nametbl_lock); while (count--) { - if (!tipc_update_nametbl(item, node, msg_type(msg))) + if (!tipc_update_nametbl(net, item, node, msg_type(msg))) tipc_named_add_backlog(item, msg_type(msg), node); item++; } - tipc_named_process_backlog(); - spin_unlock_bh(&tipc_nametbl_lock); + tipc_named_process_backlog(net); + spin_unlock_bh(&tn->nametbl_lock); kfree_skb(buf); } @@ -394,17 +408,18 @@ void tipc_named_rcv(struct sk_buff *buf) * All name table entries published by this node are updated to reflect * the node's new network address. */ -void tipc_named_reinit(void) +void tipc_named_reinit(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; int scope; - spin_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tn->nametbl_lock); for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope], + list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope], local_list) - publ->node = tipc_own_addr; + publ->node = tn->own_addr; - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index cef55cedcfb2..5ec10b59527b 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -67,13 +67,13 @@ struct distr_item { __be32 key; }; -struct sk_buff *tipc_named_publish(struct publication *publ); -struct sk_buff *tipc_named_withdraw(struct publication *publ); -void named_cluster_distribute(struct sk_buff *buf); -void tipc_named_node_up(u32 dnode); -void tipc_named_rcv(struct sk_buff *buf); -void tipc_named_reinit(void); -void tipc_named_process_backlog(void); -void tipc_publ_notify(struct list_head *nsub_list, u32 addr); +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); +void named_cluster_distribute(struct net *net, struct sk_buff *buf); +void tipc_named_node_up(struct net *net, u32 dnode); +void tipc_named_rcv(struct net *net, struct sk_buff *buf); +void tipc_named_reinit(struct net *net); +void tipc_named_process_backlog(struct net *net); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index c8df0223371a..ce09b863528c 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -34,11 +34,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" #include "config.h" #include "name_table.h" #include "name_distr.h" #include "subscr.h" +#include "bcast.h" #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -105,9 +107,6 @@ struct name_seq { struct rcu_head rcu; }; -struct name_table *tipc_nametbl; -DEFINE_SPINLOCK(tipc_nametbl_lock); - static int hash(int x) { return x & (TIPC_NAMETBL_SIZE - 1); @@ -228,9 +227,11 @@ static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance) /** * tipc_nameseq_insert_publ */ -static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, - u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) +static struct publication *tipc_nameseq_insert_publ(struct net *net, + struct name_seq *nseq, + u32 type, u32 lower, + u32 upper, u32 scope, + u32 node, u32 port, u32 key) { struct tipc_subscription *s; struct tipc_subscription *st; @@ -315,12 +316,12 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, list_add(&publ->zone_list, &info->zone_list); info->zone_list_size++; - if (in_own_cluster(node)) { + if (in_own_cluster(net, node)) { list_add(&publ->cluster_list, &info->cluster_list); info->cluster_list_size++; } - if (in_own_node(node)) { + if (in_own_node(net, node)) { list_add(&publ->node_list, &info->node_list); info->node_list_size++; } @@ -349,8 +350,10 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, * A failed withdraw request simply returns a failure indication and lets the * caller issue any error or warning messages associated with such a problem. */ -static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst, - u32 node, u32 ref, u32 key) +static struct publication *tipc_nameseq_remove_publ(struct net *net, + struct name_seq *nseq, + u32 inst, u32 node, + u32 ref, u32 key) { struct publication *publ; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); @@ -378,13 +381,13 @@ found: info->zone_list_size--; /* Remove publication from cluster scope list, if present */ - if (in_own_cluster(node)) { + if (in_own_cluster(net, node)) { list_del(&publ->cluster_list); info->cluster_list_size--; } /* Remove publication from node scope list, if present */ - if (in_own_node(node)) { + if (in_own_node(net, node)) { list_del(&publ->node_list); info->node_list_size--; } @@ -447,12 +450,13 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, } } -static struct name_seq *nametbl_find_seq(u32 type) +static struct name_seq *nametbl_find_seq(struct net *net, u32 type) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct hlist_head *seq_head; struct name_seq *ns; - seq_head = &tipc_nametbl->seq_hlist[hash(type)]; + seq_head = &tn->nametbl->seq_hlist[hash(type)]; hlist_for_each_entry_rcu(ns, seq_head, ns_list) { if (ns->type == type) return ns; @@ -461,11 +465,13 @@ static struct name_seq *nametbl_find_seq(u32 type) return NULL; }; -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 port, u32 key) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; - struct name_seq *seq = nametbl_find_seq(type); + struct name_seq *seq = nametbl_find_seq(net, type); int index = hash(type); if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || @@ -476,29 +482,29 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, } if (!seq) - seq = tipc_nameseq_create(type, - &tipc_nametbl->seq_hlist[index]); + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (!seq) return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_insert_publ(seq, type, lower, upper, + publ = tipc_nameseq_insert_publ(net, seq, type, lower, upper, scope, node, port, key); spin_unlock_bh(&seq->lock); return publ; } -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key) +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, + u32 key) { struct publication *publ; - struct name_seq *seq = nametbl_find_seq(type); + struct name_seq *seq = nametbl_find_seq(net, type); if (!seq) return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); + publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); @@ -523,8 +529,10 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, * - if name translation is attempted and fails, sets 'destnode' to 0 * and returns 0 */ -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, + u32 *destnode) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sub_seq *sseq; struct name_info *info; struct publication *publ; @@ -532,11 +540,11 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) u32 ref = 0; u32 node = 0; - if (!tipc_in_scope(*destnode, tipc_own_addr)) + if (!tipc_in_scope(*destnode, tn->own_addr)) return 0; rcu_read_lock(); - seq = nametbl_find_seq(type); + seq = nametbl_find_seq(net, type); if (unlikely(!seq)) goto not_found; spin_lock_bh(&seq->lock); @@ -569,13 +577,13 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) } /* Round-Robin Algorithm */ - else if (*destnode == tipc_own_addr) { + else if (*destnode == tn->own_addr) { if (list_empty(&info->node_list)) goto no_match; publ = list_first_entry(&info->node_list, struct publication, node_list); list_move_tail(&publ->node_list, &info->node_list); - } else if (in_own_cluster_exact(*destnode)) { + } else if (in_own_cluster_exact(net, *destnode)) { if (list_empty(&info->cluster_list)) goto no_match; publ = list_first_entry(&info->cluster_list, struct publication, @@ -609,8 +617,8 @@ not_found: * * Returns non-zero if any off-node ports overlap */ -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports) +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_port_list *dports) { struct name_seq *seq; struct sub_seq *sseq; @@ -619,7 +627,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, int res = 0; rcu_read_lock(); - seq = nametbl_find_seq(type); + seq = nametbl_find_seq(net, type); if (!seq) goto exit; @@ -650,50 +658,55 @@ exit: /* * tipc_nametbl_publish - add name publication to network name tables */ -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key) { struct publication *publ; struct sk_buff *buf = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&tipc_nametbl_lock); - if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { + spin_lock_bh(&tn->nametbl_lock); + if (tn->nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", TIPC_MAX_PUBLICATIONS); - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); return NULL; } - publ = tipc_nametbl_insert_publ(type, lower, upper, scope, - tipc_own_addr, port_ref, key); + publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope, + tn->own_addr, port_ref, key); if (likely(publ)) { - tipc_nametbl->local_publ_count++; - buf = tipc_named_publish(publ); + tn->nametbl->local_publ_count++; + buf = tipc_named_publish(net, publ); /* Any pending external events? */ - tipc_named_process_backlog(); + tipc_named_process_backlog(net); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); if (buf) - named_cluster_distribute(buf); + named_cluster_distribute(net, buf); return publ; } /** * tipc_nametbl_withdraw - withdraw name publication from network name tables */ -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key) { struct publication *publ; struct sk_buff *skb = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&tipc_nametbl_lock); - publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); + spin_lock_bh(&tn->nametbl_lock); + publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr, + ref, key); if (likely(publ)) { - tipc_nametbl->local_publ_count--; - skb = tipc_named_withdraw(publ); + tn->nametbl->local_publ_count--; + skb = tipc_named_withdraw(net, publ); /* Any pending external events? */ - tipc_named_process_backlog(); + tipc_named_process_backlog(net); list_del_init(&publ->pport_list); kfree_rcu(publ, rcu); } else { @@ -701,10 +714,10 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) "(type=%u, lower=%u, ref=%u, key=%u)\n", type, lower, ref, key); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); if (skb) { - named_cluster_distribute(skb); + named_cluster_distribute(net, skb); return 1; } return 0; @@ -715,15 +728,15 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) */ void tipc_nametbl_subscribe(struct tipc_subscription *s) { + struct tipc_net *tn = net_generic(s->net, tipc_net_id); u32 type = s->seq.type; int index = hash(type); struct name_seq *seq; - spin_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(type); + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, type); if (!seq) - seq = tipc_nameseq_create(type, - &tipc_nametbl->seq_hlist[index]); + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); tipc_nameseq_subscribe(seq, s); @@ -732,7 +745,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) pr_warn("Failed to create subscription for {%u,%u,%u}\n", s->seq.type, s->seq.lower, s->seq.upper); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); } /** @@ -740,10 +753,11 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) */ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { + struct tipc_net *tn = net_generic(s->net, tipc_net_id); struct name_seq *seq; - spin_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(s->seq.type); + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, s->seq.type); if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); @@ -756,7 +770,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) spin_unlock_bh(&seq->lock); } } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); } /** @@ -858,9 +872,10 @@ static int nametbl_header(char *buf, int len, u32 depth) /** * nametbl_list - print specified name table contents into the given buffer */ -static int nametbl_list(char *buf, int len, u32 depth_info, +static int nametbl_list(struct net *net, char *buf, int len, u32 depth_info, u32 type, u32 lowbound, u32 upbound) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct hlist_head *seq_head; struct name_seq *seq; int all_types; @@ -880,7 +895,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, lowbound = 0; upbound = ~0; for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &tipc_nametbl->seq_hlist[i]; + seq_head = &tn->nametbl->seq_hlist[i]; hlist_for_each_entry_rcu(seq, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, depth, seq->type, @@ -896,7 +911,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, } ret += nametbl_header(buf + ret, len - ret, depth); i = hash(type); - seq_head = &tipc_nametbl->seq_hlist[i]; + seq_head = &tn->nametbl->seq_hlist[i]; hlist_for_each_entry_rcu(seq, seq_head, ns_list) { if (seq->type == type) { ret += nameseq_list(seq, buf + ret, len - ret, @@ -909,7 +924,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info, return ret; } -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) +struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area, + int req_tlv_space) { struct sk_buff *buf; struct tipc_name_table_query *argv; @@ -930,7 +946,7 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) pb_len = ULTRA_STRING_MAX_LEN; argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area); rcu_read_lock(); - str_len = nametbl_list(pb, pb_len, ntohl(argv->depth), + str_len = nametbl_list(net, pb, pb_len, ntohl(argv->depth), ntohl(argv->type), ntohl(argv->lowbound), ntohl(argv->upbound)); rcu_read_unlock(); @@ -941,8 +957,10 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) return buf; } -int tipc_nametbl_init(void) +int tipc_nametbl_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl; int i; tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC); @@ -955,6 +973,8 @@ int tipc_nametbl_init(void) INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); + tn->nametbl = tipc_nametbl; + spin_lock_init(&tn->nametbl_lock); return 0; } @@ -963,7 +983,7 @@ int tipc_nametbl_init(void) * * tipc_nametbl_lock must be held when calling this function */ -static void tipc_purge_publications(struct name_seq *seq) +static void tipc_purge_publications(struct net *net, struct name_seq *seq) { struct publication *publ, *safe; struct sub_seq *sseq; @@ -973,8 +993,8 @@ static void tipc_purge_publications(struct name_seq *seq) sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { - tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, - publ->ref, publ->key); + tipc_nametbl_remove_publ(net, publ->type, publ->lower, + publ->node, publ->ref, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); @@ -984,25 +1004,27 @@ static void tipc_purge_publications(struct name_seq *seq) kfree_rcu(seq, rcu); } -void tipc_nametbl_stop(void) +void tipc_nametbl_stop(struct net *net) { u32 i; struct name_seq *seq; struct hlist_head *seq_head; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl = tn->nametbl; /* Verify name table is empty and purge any lingering * publications, then release the name table */ - spin_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tn->nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&tipc_nametbl->seq_hlist[i])) continue; seq_head = &tipc_nametbl->seq_hlist[i]; hlist_for_each_entry_rcu(seq, seq_head, ns_list) { - tipc_purge_publications(seq); + tipc_purge_publications(net, seq); } } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); synchronize_net(); kfree(tipc_nametbl); @@ -1106,9 +1128,10 @@ static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, return 0; } -static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, - u32 *last_lower, u32 *last_publ) +static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg, + u32 *last_type, u32 *last_lower, u32 *last_publ) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct hlist_head *seq_head; struct name_seq *seq = NULL; int err; @@ -1120,10 +1143,10 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, i = 0; for (; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &tipc_nametbl->seq_hlist[i]; + seq_head = &tn->nametbl->seq_hlist[i]; if (*last_type) { - seq = nametbl_find_seq(*last_type); + seq = nametbl_find_seq(net, *last_type); if (!seq) return -EPIPE; } else { @@ -1157,6 +1180,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 last_type = cb->args[0]; u32 last_lower = cb->args[1]; u32 last_publ = cb->args[2]; + struct net *net = sock_net(skb->sk); struct tipc_nl_msg msg; if (done) @@ -1167,7 +1191,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ); + err = tipc_nl_seq_list(net, &msg, &last_type, &last_lower, &last_publ); if (!err) { done = 1; } else if (err != -EMSGSIZE) { diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 5f0dee92010d..f67b3d8d4b2f 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -95,26 +95,27 @@ struct name_table { u32 local_publ_count; }; -extern spinlock_t tipc_nametbl_lock; -extern struct name_table *tipc_nametbl; - int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, +struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area, + int req_tlv_space); +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_port_list *dports); +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key); +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 ref, u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, - u32 ref, u32 key); void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); -int tipc_nametbl_init(void); -void tipc_nametbl_stop(void); +int tipc_nametbl_init(struct net *net); +void tipc_nametbl_stop(struct net *net); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index cf13df3cde8f..263267e0e7fe 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -41,6 +41,7 @@ #include "socket.h" #include "node.h" #include "config.h" +#include "bcast.h" static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, @@ -108,44 +109,50 @@ static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { * - A local spin_lock protecting the queue of subscriber events. */ -int tipc_net_start(u32 addr) +int tipc_net_start(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); char addr_string[16]; int res; - tipc_own_addr = addr; - tipc_named_reinit(); - tipc_sk_reinit(); - res = tipc_bclink_init(); + tn->own_addr = addr; + tipc_named_reinit(net); + tipc_sk_reinit(net); + res = tipc_bclink_init(net); if (res) return res; - tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, - TIPC_ZONE_SCOPE, 0, tipc_own_addr); + tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, + TIPC_ZONE_SCOPE, 0, tn->own_addr); pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", - tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); + tipc_addr_string_fill(addr_string, tn->own_addr), + tn->net_id); return 0; } -void tipc_net_stop(void) +void tipc_net_stop(struct net *net) { - if (!tipc_own_addr) + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (!tn->own_addr) return; - tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); + tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0, + tn->own_addr); rtnl_lock(); - tipc_bearer_stop(); - tipc_bclink_stop(); - tipc_node_stop(); + tipc_bearer_stop(net); + tipc_bclink_stop(net); + tipc_node_stop(net); rtnl_unlock(); pr_info("Left network mode\n"); } -static int __tipc_nl_add_net(struct tipc_nl_msg *msg) +static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) { + struct tipc_net *tn = net_generic(net, tipc_net_id); void *hdr; struct nlattr *attrs; @@ -158,7 +165,7 @@ static int __tipc_nl_add_net(struct tipc_nl_msg *msg) if (!attrs) goto msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tipc_net_id)) + if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id)) goto attr_msg_full; nla_nest_end(msg->skb, attrs); @@ -176,6 +183,7 @@ msg_full: int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); int err; int done = cb->args[0]; struct tipc_nl_msg msg; @@ -187,7 +195,7 @@ int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; - err = __tipc_nl_add_net(&msg); + err = __tipc_nl_add_net(net, &msg); if (err) goto out; @@ -200,8 +208,10 @@ out: int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) { - int err; + struct net *net = genl_info_net(info); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + int err; if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; @@ -216,21 +226,21 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) u32 val; /* Can't change net id once TIPC has joined a network */ - if (tipc_own_addr) + if (tn->own_addr) return -EPERM; val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); if (val < 1 || val > 9999) return -EINVAL; - tipc_net_id = val; + tn->net_id = val; } if (attrs[TIPC_NLA_NET_ADDR]) { u32 addr; /* Can't change net addr once TIPC has joined a network */ - if (tipc_own_addr) + if (tn->own_addr) return -EPERM; addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); @@ -238,7 +248,7 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; rtnl_lock(); - tipc_net_start(addr); + tipc_net_start(net, addr); rtnl_unlock(); } diff --git a/net/tipc/net.h b/net/tipc/net.h index a81c1b9eb150..77a7a118911d 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -39,9 +39,9 @@ #include <net/genetlink.h> -int tipc_net_start(u32 addr); +int tipc_net_start(struct net *net, u32 addr); -void tipc_net_stop(void); +void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index b891e3905bc4..fe0f5134ce15 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -46,6 +46,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct sk_buff *rep_buf; struct nlmsghdr *rep_nlh; struct nlmsghdr *req_nlh = info->nlhdr; @@ -53,22 +54,24 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); u16 cmd; - if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN))) + if ((req_userhdr->cmd & 0xC000) && + (!netlink_net_capable(skb, CAP_NET_ADMIN))) cmd = TIPC_CMD_NOT_NET_ADMIN; else cmd = req_userhdr->cmd; - rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, - nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), - hdr_space); + rep_buf = tipc_cfg_do_cmd(net, req_userhdr->dest, cmd, + nlmsg_data(req_nlh) + GENL_HDRLEN + + TIPC_GENL_HDRLEN, + nlmsg_attrlen(req_nlh, GENL_HDRLEN + + TIPC_GENL_HDRLEN), hdr_space); if (rep_buf) { skb_push(rep_buf, hdr_space); rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); + genlmsg_unicast(net, rep_buf, NETLINK_CB(skb).portid); } return 0; @@ -93,6 +96,7 @@ static struct genl_family tipc_genl_family = { .version = TIPC_GENL_VERSION, .hdrsize = TIPC_GENL_HDRLEN, .maxattr = 0, + .netnsok = true, }; /* Legacy ASCII API */ @@ -112,6 +116,7 @@ struct genl_family tipc_genl_v2_family = { .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .netnsok = true, }; static const struct genl_ops tipc_genl_v2_ops[] = { diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index 1425c6869de0..ae2f2d923a15 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -45,4 +45,7 @@ struct tipc_nl_msg { u32 seq; }; +int tipc_netlink_start(void); +void tipc_netlink_stop(void); + #endif diff --git a/net/tipc/node.c b/net/tipc/node.c index 8d353ec77a66..ee5d33cfcf80 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -40,17 +40,9 @@ #include "name_distr.h" #include "socket.h" -#define NODE_HTABLE_SIZE 512 - static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); -static struct hlist_head node_htable[NODE_HTABLE_SIZE]; -LIST_HEAD(tipc_node_list); -static u32 tipc_num_nodes; -static u32 tipc_num_links; -static DEFINE_SPINLOCK(node_list_lock); - struct tipc_sock_conn { u32 port; u32 peer_port; @@ -78,15 +70,17 @@ static unsigned int tipc_hashfn(u32 addr) /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(u32 addr) +struct tipc_node *tipc_node_find(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; - if (unlikely(!in_own_cluster_exact(addr))) + if (unlikely(!in_own_cluster_exact(net, addr))) return NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(node, &node_htable[tipc_hashfn(addr)], hash) { + hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], + hash) { if (node->addr == addr) { rcu_read_unlock(); return node; @@ -96,20 +90,22 @@ struct tipc_node *tipc_node_find(u32 addr) return NULL; } -struct tipc_node *tipc_node_create(u32 addr) +struct tipc_node *tipc_node_create(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n_ptr, *temp_node; - spin_lock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); if (!n_ptr) { - spin_unlock_bh(&node_list_lock); + spin_unlock_bh(&tn->node_list_lock); pr_warn("Node creation failed, no memory\n"); return NULL; } n_ptr->addr = addr; + n_ptr->net = net; spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); @@ -118,9 +114,9 @@ struct tipc_node *tipc_node_create(u32 addr) skb_queue_head_init(&n_ptr->waiting_sks); __skb_queue_head_init(&n_ptr->bclink.deferred_queue); - hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); + hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); - list_for_each_entry_rcu(temp_node, &tipc_node_list, list) { + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) break; } @@ -128,40 +124,41 @@ struct tipc_node *tipc_node_create(u32 addr) n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; - tipc_num_nodes++; + tn->num_nodes++; - spin_unlock_bh(&node_list_lock); + spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) { list_del_rcu(&n_ptr->list); hlist_del_rcu(&n_ptr->hash); kfree_rcu(n_ptr, rcu); - tipc_num_nodes--; + tn->num_nodes--; } -void tipc_node_stop(void) +void tipc_node_stop(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node, *t_node; - spin_lock_bh(&node_list_lock); - list_for_each_entry_safe(node, t_node, &tipc_node_list, list) - tipc_node_delete(node); - spin_unlock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_safe(node, t_node, &tn->node_list, list) + tipc_node_delete(tn, node); + spin_unlock_bh(&tn->node_list_lock); } -int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port) +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) return 0; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (!node) { pr_warn("Connecting sock to node 0x%x failed\n", dnode); return -EHOSTUNREACH; @@ -179,15 +176,15 @@ int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port) return 0; } -void tipc_node_remove_conn(u32 dnode, u32 port) +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) { struct tipc_node *node; struct tipc_sock_conn *conn, *safe; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) return; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (!node) return; @@ -201,18 +198,20 @@ void tipc_node_remove_conn(u32 dnode, u32 port) tipc_node_unlock(node); } -void tipc_node_abort_sock_conns(struct list_head *conns) +void tipc_node_abort_sock_conns(struct net *net, struct list_head *conns) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_sock_conn *conn, *safe; struct sk_buff *buf; list_for_each_entry_safe(conn, safe, conns, list) { - buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tipc_own_addr, - conn->peer_node, conn->port, - conn->peer_port, TIPC_ERR_NO_NODE); + buf = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, + tn->own_addr, conn->peer_node, + conn->port, conn->peer_port, + TIPC_ERR_NO_NODE); if (likely(buf)) - tipc_sk_rcv(buf); + tipc_sk_rcv(net, buf); list_del(&conn->list); kfree(conn); } @@ -231,8 +230,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; - pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Established link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); if (!active[0]) { active[0] = active[1] = l_ptr; @@ -240,7 +239,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) goto exit; } if (l_ptr->priority < active[0]->priority) { - pr_info("New link <%s> becomes standby\n", l_ptr->name); + pr_debug("New link <%s> becomes standby\n", l_ptr->name); goto exit; } tipc_link_dup_queue_xmit(active[0], l_ptr); @@ -248,9 +247,9 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) active[0] = l_ptr; goto exit; } - pr_info("Old link <%s> becomes standby\n", active[0]->name); + pr_debug("Old link <%s> becomes standby\n", active[0]->name); if (active[1] != active[0]) - pr_info("Old link <%s> becomes standby\n", active[1]->name); + pr_debug("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; exit: /* Leave room for changeover header when returning 'mtu' to users: */ @@ -290,6 +289,7 @@ static void node_select_active_links(struct tipc_node *n_ptr) */ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); struct tipc_link **active; n_ptr->working_links--; @@ -297,12 +297,12 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; if (!tipc_link_is_active(l_ptr)) { - pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Lost standby link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); return; } - pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Lost link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); active = &n_ptr->active_links[0]; if (active[0] == l_ptr) @@ -324,7 +324,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) } /* Loopback link went down? No fragmentation needed from now on. */ - if (n_ptr->addr == tipc_own_addr) { + if (n_ptr->addr == tn->own_addr) { n_ptr->act_mtus[0] = MAX_MSG_SIZE; n_ptr->act_mtus[1] = MAX_MSG_SIZE; } @@ -342,24 +342,27 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); + n_ptr->links[l_ptr->bearer_id] = l_ptr; - spin_lock_bh(&node_list_lock); - tipc_num_links++; - spin_unlock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); + tn->num_links++; + spin_unlock_bh(&tn->node_list_lock); n_ptr->link_cnt++; } void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); int i; for (i = 0; i < MAX_BEARERS; i++) { if (l_ptr != n_ptr->links[i]) continue; n_ptr->links[i] = NULL; - spin_lock_bh(&node_list_lock); - tipc_num_links--; - spin_unlock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); + tn->num_links--; + spin_unlock_bh(&tn->node_list_lock); n_ptr->link_cnt--; } } @@ -368,8 +371,8 @@ static void node_established_contact(struct tipc_node *n_ptr) { n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; n_ptr->bclink.oos_state = 0; - n_ptr->bclink.acked = tipc_bclink_get_last_sent(); - tipc_bclink_add_node(n_ptr->addr); + n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); + tipc_bclink_add_node(n_ptr->net, n_ptr->addr); } static void node_lost_contact(struct tipc_node *n_ptr) @@ -377,8 +380,8 @@ static void node_lost_contact(struct tipc_node *n_ptr) char addr_string[16]; u32 i; - pr_info("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + pr_debug("Lost contact with %s\n", + tipc_addr_string_fill(addr_string, n_ptr->addr)); /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { @@ -389,7 +392,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) n_ptr->bclink.reasm_buf = NULL; } - tipc_bclink_remove_node(n_ptr->addr); + tipc_bclink_remove_node(n_ptr->net, n_ptr->addr); tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); n_ptr->bclink.recv_permitted = false; @@ -414,8 +417,10 @@ static void node_lost_contact(struct tipc_node *n_ptr) TIPC_NOTIFY_NODE_DOWN; } -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) +struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area, + int req_tlv_space) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 domain; struct sk_buff *buf; struct tipc_node *n_ptr; @@ -430,20 +435,20 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network address)"); - spin_lock_bh(&node_list_lock); - if (!tipc_num_nodes) { - spin_unlock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); + if (!tn->num_nodes) { + spin_unlock_bh(&tn->node_list_lock); return tipc_cfg_reply_none(); } /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; + payload_size = TLV_SPACE(sizeof(node_info)) * tn->num_nodes; if (payload_size > 32768u) { - spin_unlock_bh(&node_list_lock); + spin_unlock_bh(&tn->node_list_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many nodes)"); } - spin_unlock_bh(&node_list_lock); + spin_unlock_bh(&tn->node_list_lock); buf = tipc_cfg_reply_alloc(payload_size); if (!buf) @@ -451,7 +456,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for all nodes in scope */ rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { if (!tipc_in_scope(domain, n_ptr->addr)) continue; node_info.addr = htonl(n_ptr->addr); @@ -463,8 +468,10 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) return buf; } -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) +struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area, + int req_tlv_space) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 domain; struct sk_buff *buf; struct tipc_node *n_ptr; @@ -479,32 +486,32 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network address)"); - if (!tipc_own_addr) + if (!tn->own_addr) return tipc_cfg_reply_none(); - spin_lock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); /* Get space for all unicast links + broadcast link */ - payload_size = TLV_SPACE((sizeof(link_info)) * (tipc_num_links + 1)); + payload_size = TLV_SPACE((sizeof(link_info)) * (tn->num_links + 1)); if (payload_size > 32768u) { - spin_unlock_bh(&node_list_lock); + spin_unlock_bh(&tn->node_list_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many links)"); } - spin_unlock_bh(&node_list_lock); + spin_unlock_bh(&tn->node_list_lock); buf = tipc_cfg_reply_alloc(payload_size); if (!buf) return NULL; /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); + link_info.dest = htonl(tipc_cluster_mask(tn->own_addr)); link_info.up = htonl(1); strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); /* Add TLVs for any other links in scope */ rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { u32 i; if (!tipc_in_scope(domain, n_ptr->addr)) @@ -534,10 +541,11 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) * * Returns 0 on success */ -int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, + char *linkname, size_t len) { struct tipc_link *link; - struct tipc_node *node = tipc_node_find(addr); + struct tipc_node *node = tipc_node_find(net, addr); if ((bearer_id >= MAX_BEARERS) || !node) return -EINVAL; @@ -554,6 +562,7 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) void tipc_node_unlock(struct tipc_node *node) { + struct net *net = node->net; LIST_HEAD(nsub_list); LIST_HEAD(conn_sks); struct sk_buff_head waiting_sks; @@ -585,26 +594,26 @@ void tipc_node_unlock(struct tipc_node *node) spin_unlock_bh(&node->lock); while (!skb_queue_empty(&waiting_sks)) - tipc_sk_rcv(__skb_dequeue(&waiting_sks)); + tipc_sk_rcv(net, __skb_dequeue(&waiting_sks)); if (!list_empty(&conn_sks)) - tipc_node_abort_sock_conns(&conn_sks); + tipc_node_abort_sock_conns(net, &conn_sks); if (!list_empty(&nsub_list)) - tipc_publ_notify(&nsub_list, addr); + tipc_publ_notify(net, &nsub_list, addr); if (flags & TIPC_WAKEUP_BCAST_USERS) - tipc_bclink_wakeup_users(); + tipc_bclink_wakeup_users(net); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(addr); + tipc_named_node_up(net, addr); if (flags & TIPC_NOTIFY_LINK_UP) - tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, link_id, addr); if (flags & TIPC_NOTIFY_LINK_DOWN) - tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); } @@ -645,6 +654,8 @@ msg_full: int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); int done = cb->args[0]; int last_addr = cb->args[1]; struct tipc_node *node; @@ -659,7 +670,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); - if (last_addr && !tipc_node_find(last_addr)) { + if (last_addr && !tipc_node_find(net, last_addr)) { rcu_read_unlock(); /* We never set seq or call nl_dump_check_consistent() this * means that setting prev_seq here will cause the consistence @@ -671,7 +682,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) return -EPIPE; } - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { if (last_addr) { if (node->addr == last_addr) last_addr = 0; diff --git a/net/tipc/node.h b/net/tipc/node.h index cbe0e950f1cc..43ef88ef3035 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,10 +42,10 @@ #include "bearer.h" #include "msg.h" -/* - * Out-of-range value for node signature - */ -#define INVALID_NODE_SIG 0x10000 +/* Out-of-range value for node signature */ +#define INVALID_NODE_SIG 0x10000 + +#define NODE_HTABLE_SIZE 512 /* Flags used to take different actions according to flag type * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down @@ -90,6 +90,7 @@ struct tipc_node_bclink { * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure + * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain * @active_links: pointers to active links to node * @links: pointers to all links to node @@ -106,6 +107,7 @@ struct tipc_node_bclink { struct tipc_node { u32 addr; spinlock_t lock; + struct net *net; struct hlist_node hash; struct tipc_link *active_links[2]; u32 act_mtus[2]; @@ -123,23 +125,24 @@ struct tipc_node { struct rcu_head rcu; }; -extern struct list_head tipc_node_list; - -struct tipc_node *tipc_node_find(u32 addr); -struct tipc_node *tipc_node_create(u32 addr); -void tipc_node_stop(void); +struct tipc_node *tipc_node_find(struct net *net, u32 addr); +struct tipc_node *tipc_node_create(struct net *net, u32 addr); +void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); +struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area, + int req_tlv_space); +struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area, + int req_tlv_space); +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, + char *linkname, size_t len); void tipc_node_unlock(struct tipc_node *node); -int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port); -void tipc_node_remove_conn(u32 dnode, u32 port); +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); @@ -154,12 +157,12 @@ static inline bool tipc_node_blocked(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } -static inline uint tipc_node_get_mtu(u32 addr, u32 selector) +static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) { struct tipc_node *node; u32 mtu; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (likely(node)) mtu = node->act_mtus[selector & 1]; diff --git a/net/tipc/server.c b/net/tipc/server.c index a538a02f869b..eadd4ed45905 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -35,6 +35,7 @@ #include "server.h" #include "core.h" +#include "socket.h" #include <net/sock.h> /* Number of messages to send before rescheduling */ @@ -255,7 +256,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con) goto out_close; } - s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); + s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, + con->usr_data, buf, ret); kmem_cache_free(s->rcvbuf_cache, buf); @@ -307,7 +309,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = tipc_sock_create_local(s->type, &sock); + ret = tipc_sock_create_local(s->net, s->type, &sock); if (ret < 0) return NULL; ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, diff --git a/net/tipc/server.h b/net/tipc/server.h index be817b0b547e..9015faedb1b0 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -36,7 +36,9 @@ #ifndef _TIPC_SERVER_H #define _TIPC_SERVER_H -#include "core.h" +#include <linux/idr.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> #define TIPC_SERVER_NAME_LEN 32 @@ -45,6 +47,7 @@ * @conn_idr: identifier set of connection * @idr_lock: protect the connection identifier set * @idr_in_use: amount of allocated identifier entry + * @net: network namspace instance * @rcvbuf_cache: memory cache of server receive buffer * @rcv_wq: receive workqueue * @send_wq: send workqueue @@ -61,16 +64,18 @@ struct tipc_server { struct idr conn_idr; spinlock_t idr_lock; int idr_in_use; + struct net *net; struct kmem_cache *rcvbuf_cache; struct workqueue_struct *rcv_wq; struct workqueue_struct *send_wq; int max_rcvbuf_size; - void *(*tipc_conn_new) (int conid); - void (*tipc_conn_shutdown) (int conid, void *usr_data); - void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); + void *(*tipc_conn_new)(int conid); + void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_recvmsg)(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len); struct sockaddr_tipc *saddr; - const char name[TIPC_SERVER_NAME_LEN]; + char name[TIPC_SERVER_NAME_LEN]; int imp; int type; }; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4731cad99d1c..679a22082fcb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -34,22 +34,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/rhashtable.h> +#include <linux/jhash.h> #include "core.h" #include "name_table.h" #include "node.h" #include "link.h" -#include <linux/export.h> #include "config.h" #include "socket.h" -#define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ +#define SS_LISTENING -1 /* socket is listening */ +#define SS_READY -2 /* socket is connectionless */ -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 +#define TIPC_MAX_PORT 0xffffffff +#define TIPC_MIN_PORT 1 /** * struct tipc_sock - TIPC socket structure @@ -59,14 +62,13 @@ * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names * @max_pkt: maximum packet size "hint" used when building messages sent by port - * @ref: unique reference to port in TIPC object registry + * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_interval: - * @timer: + * @probing_intv: * @port: port - interacts with 'sk' and with the rest of the TIPC stack * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request @@ -74,6 +76,8 @@ * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @node: hash table node + * @rcu: rcu struct for tipc_sock */ struct tipc_sock { struct sock sk; @@ -82,19 +86,20 @@ struct tipc_sock { u32 conn_instance; int published; u32 max_pkt; - u32 ref; + u32 portid; struct tipc_msg phdr; struct list_head sock_list; struct list_head publications; u32 pub_count; u32 probing_state; - u32 probing_interval; - struct timer_list timer; + unsigned long probing_intv; uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; uint sent_unacked; uint rcv_unacked; + struct rhash_head node; + struct rcu_head rcu; }; static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -103,16 +108,14 @@ static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); -static void tipc_sk_timeout(unsigned long ref); +static void tipc_sk_timeout(unsigned long data); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); -static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk); -static void tipc_sk_ref_discard(u32 ref); -static struct tipc_sock *tipc_sk_get(u32 ref); -static struct tipc_sock *tipc_sk_get_next(u32 *ref); -static void tipc_sk_put(struct tipc_sock *tsk); +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); +static int tipc_sk_insert(struct tipc_sock *tsk); +static void tipc_sk_remove(struct tipc_sock *tsk); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -246,10 +249,11 @@ static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *skb; u32 dnode; + struct net *net = sock_net(sk); while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + if (tipc_msg_reverse(net, skb, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(net, skb, dnode, 0); } } @@ -260,6 +264,7 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { + struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; @@ -276,10 +281,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) if (likely(orig_node == peer_node)) return true; - if (!orig_node && (peer_node == tipc_own_addr)) + if (!orig_node && (peer_node == tn->own_addr)) return true; - if (!peer_node && (orig_node == tipc_own_addr)) + if (!peer_node && (orig_node == tn->own_addr)) return true; return false; @@ -305,7 +310,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref; /* Validate arguments */ if (unlikely(protocol != 0)) @@ -339,24 +343,22 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -ENOMEM; tsk = tipc_sk(sk); - ref = tipc_sk_ref_acquire(tsk); - if (!ref) { - pr_warn("Socket create failed; reference table exhausted\n"); - return -ENOMEM; - } tsk->max_pkt = MAX_PKT_DEFAULT; - tsk->ref = ref; INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; - tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + tipc_msg_init(net, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); - msg_set_origport(msg, ref); /* Finish initializing socket data structures */ sock->ops = ops; sock->state = state; sock_init_data(sock, sk); - k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref); + if (tipc_sk_insert(tsk)) { + pr_warn("Socket create failed; port numbrer exhausted\n"); + return -EINVAL; + } + msg_set_origport(msg, tsk->portid); + setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -384,7 +386,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, * * Returns 0 on success, errno otherwise */ -int tipc_sock_create_local(int type, struct socket **res) +int tipc_sock_create_local(struct net *net, int type, struct socket **res) { int rc; @@ -393,7 +395,7 @@ int tipc_sock_create_local(int type, struct socket **res) pr_err("Failed to create kernel socket\n"); return rc; } - tipc_sk_create(&init_net, *res, 0, 1); + tipc_sk_create(net, *res, 0, 1); return 0; } @@ -442,6 +444,13 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, return ret; } +static void tipc_sk_callback(struct rcu_head *head) +{ + struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); + + sock_put(&tsk->sk); +} + /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy @@ -461,9 +470,11 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net; + struct tipc_net *tn; struct tipc_sock *tsk; struct sk_buff *skb; - u32 dnode; + u32 dnode, probing_state; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -472,6 +483,9 @@ static int tipc_release(struct socket *sock) if (sk == NULL) return 0; + net = sock_net(sk); + tn = net_generic(net, tipc_net_id); + tsk = tipc_sk(sk); lock_sock(sk); @@ -491,26 +505,29 @@ static int tipc_release(struct socket *sock) (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; tsk->connected = 0; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + if (tipc_msg_reverse(net, skb, &dnode, + TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(net, skb, dnode, 0); } } tipc_sk_withdraw(tsk, 0, NULL); - tipc_sk_ref_discard(tsk->ref); - k_cancel_timer(&tsk->timer); + probing_state = tsk->probing_state; + if (del_timer_sync(&sk->sk_timer) && + probing_state != TIPC_CONN_PROBING) + sock_put(sk); + tipc_sk_remove(tsk); if (tsk->connected) { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, dnode, tipc_own_addr, - tsk_peer_port(tsk), - tsk->ref, TIPC_ERR_NO_PORT); + skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tn->own_addr, tsk_peer_port(tsk), + tsk->portid, TIPC_ERR_NO_PORT); if (skb) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } - k_term_timer(&tsk->timer); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -518,7 +535,8 @@ static int tipc_release(struct socket *sock) /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; release_sock(sk); - sock_put(sk); + + call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; return 0; @@ -602,6 +620,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); + struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { @@ -611,8 +630,8 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); } else { - addr->addr.id.ref = tsk->ref; - addr->addr.id.node = tipc_own_addr; + addr->addr.id.ref = tsk->portid; + addr->addr.id.node = tn->own_addr; } *uaddr_len = sizeof(*addr); @@ -711,6 +730,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct msghdr *msg, size_t dsz, long timeo) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; struct sk_buff_head head; uint mtu; @@ -728,12 +748,12 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, new_mtu: mtu = tipc_bclink_get_mtu(); __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head); + rc = tipc_msg_build(net, mhdr, msg, 0, dsz, mtu, &head); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bclink_xmit(&head); + rc = tipc_bclink_xmit(net, &head); if (likely(rc >= 0)) { rc = dsz; break; @@ -752,7 +772,7 @@ new_mtu: /* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets */ -void tipc_sk_mcast_rcv(struct sk_buff *buf) +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); struct tipc_port_list dports = {0, NULL, }; @@ -761,15 +781,12 @@ void tipc_sk_mcast_rcv(struct sk_buff *buf) uint i, last, dst = 0; u32 scope = TIPC_CLUSTER_SCOPE; - if (in_own_node(msg_orignode(msg))) + if (in_own_node(net, msg_orignode(msg))) scope = TIPC_NODE_SCOPE; /* Create destination port list: */ - tipc_nametbl_mc_translate(msg_nametype(msg), - msg_namelower(msg), - msg_nameupper(msg), - scope, - &dports); + tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg), + msg_nameupper(msg), scope, &dports); last = dports.count; if (!last) { kfree_skb(buf); @@ -784,7 +801,7 @@ void tipc_sk_mcast_rcv(struct sk_buff *buf) continue; } msg_set_destport(msg, item->ports[i]); - tipc_sk_rcv(b); + tipc_sk_rcv(net, b); } } tipc_port_list_free(&dports); @@ -816,7 +833,7 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) + if (!tipc_msg_reverse(sock_net(&tsk->sk), buf, dnode, TIPC_OK)) return TIPC_OK; msg_set_type(msg, CONN_PROBE_REPLY); return TIPC_FWD_MSG; @@ -872,6 +889,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; struct sk_buff_head head; @@ -929,7 +947,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_nametype(mhdr, type); msg_set_nameinst(mhdr, inst); msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); - dport = tipc_nametbl_translate(type, inst, &dnode); + dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(mhdr, dnode); msg_set_destport(mhdr, dport); if (unlikely(!dport && !dnode)) { @@ -946,16 +964,16 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, } new_mtu: - mtu = tipc_node_get_mtu(dnode, tsk->ref); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); + rc = tipc_msg_build(net, mhdr, m, 0, dsz, mtu, &head); if (rc < 0) goto exit; do { skb = skb_peek(&head); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(&head, dnode, tsk->ref); + rc = tipc_link_xmit(net, &head, dnode, tsk->portid); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -1024,11 +1042,12 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; struct sk_buff_head head; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - u32 ref = tsk->ref; + u32 portid = tsk->portid; int rc = -EINVAL; long timeo; u32 dnode; @@ -1062,12 +1081,12 @@ next: mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head); + rc = tipc_msg_build(net, mhdr, m, sent, send, mtu, &head); if (unlikely(rc < 0)) goto exit; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(&head, dnode, ref); + rc = tipc_link_xmit(net, &head, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1076,7 +1095,8 @@ next: goto next; } if (rc == -EMSGSIZE) { - tsk->max_pkt = tipc_node_get_mtu(dnode, ref); + tsk->max_pkt = tipc_node_get_mtu(net, dnode, + portid); goto next; } if (rc != -ELINKCONG) @@ -1118,6 +1138,8 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, u32 peer_node) { + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct tipc_msg *msg = &tsk->phdr; msg_set_destnode(msg, peer_node); @@ -1126,12 +1148,12 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_interval = CONN_PROBING_INTERVAL; + tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; tsk->connected = 1; - k_start_timer(&tsk->timer, tsk->probing_interval); - tipc_node_add_conn(peer_node, tsk->ref, peer_port); - tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); } /** @@ -1230,6 +1252,8 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) { + struct net *net = sock_net(&tsk->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); @@ -1237,13 +1261,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!tsk->connected) return; - skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, - tipc_own_addr, peer_port, tsk->ref, TIPC_OK); + skb = tipc_msg_create(net, CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tn->own_addr, peer_port, tsk->portid, + TIPC_OK); if (!skb) return; msg = buf_msg(skb); msg_set_msgcnt(msg, ack); - tipc_link_xmit_skb(skb, dnode, msg_link_selector(msg)); + tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1536,6 +1561,7 @@ static void tipc_data_ready(struct sock *sk) static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); int retval = -TIPC_ERR_NO_PORT; @@ -1551,8 +1577,8 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) sock->state = SS_DISCONNECTING; tsk->connected = 0; /* let timer expire on it's own */ - tipc_node_remove_conn(tsk_peer_node(tsk), - tsk->ref); + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); } retval = TIPC_OK; } @@ -1709,6 +1735,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) int rc; u32 onode; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); uint truesize = skb->truesize; rc = filter_rcv(sk, skb); @@ -1719,10 +1746,10 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } - if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc)) + if ((rc < 0) && !tipc_msg_reverse(net, skb, &onode, -rc)) return 0; - tipc_link_xmit_skb(skb, onode, 0); + tipc_link_xmit_skb(net, skb, onode, 0); return 0; } @@ -1733,7 +1760,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Consumes buffer * Returns 0 if success, or errno: -EHOSTUNREACH */ -int tipc_sk_rcv(struct sk_buff *skb) +int tipc_sk_rcv(struct net *net, struct sk_buff *skb) { struct tipc_sock *tsk; struct sock *sk; @@ -1743,9 +1770,9 @@ int tipc_sk_rcv(struct sk_buff *skb) u32 dnode; /* Validate destination and message */ - tsk = tipc_sk_get(dport); + tsk = tipc_sk_lookup(net, dport); if (unlikely(!tsk)) { - rc = tipc_msg_eval(skb, &dnode); + rc = tipc_msg_eval(net, skb, &dnode); goto exit; } sk = &tsk->sk; @@ -1763,14 +1790,14 @@ int tipc_sk_rcv(struct sk_buff *skb) rc = -TIPC_ERR_OVERLOAD; } spin_unlock_bh(&sk->sk_lock.slock); - tipc_sk_put(tsk); + sock_put(sk); if (likely(!rc)) return 0; exit: - if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc)) + if ((rc < 0) && !tipc_msg_reverse(net, skb, &dnode, -rc)) return -EHOSTUNREACH; - tipc_link_xmit_skb(skb, dnode, 0); + tipc_link_xmit_skb(net, skb, dnode, 0); return (rc < 0) ? -EHOSTUNREACH : 0; } @@ -2027,6 +2054,8 @@ exit: static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; u32 dnode; @@ -2049,21 +2078,23 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + if (tipc_msg_reverse(net, skb, &dnode, + TIPC_CONN_SHUTDOWN)) + tipc_link_xmit_skb(net, skb, dnode, + tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, tipc_own_addr, + 0, dnode, tn->own_addr, tsk_peer_port(tsk), - tsk->ref, TIPC_CONN_SHUTDOWN); - tipc_link_xmit_skb(skb, dnode, tsk->ref); + tsk->portid, TIPC_CONN_SHUTDOWN); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(net, dnode, tsk->portid); /* fall through */ case SS_DISCONNECTING: @@ -2084,18 +2115,15 @@ restart: return res; } -static void tipc_sk_timeout(unsigned long ref) +static void tipc_sk_timeout(unsigned long data) { - struct tipc_sock *tsk; - struct sock *sk; + struct tipc_sock *tsk = (struct tipc_sock *)data; + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb = NULL; u32 peer_port, peer_node; - tsk = tipc_sk_get(ref); - if (!tsk) - return; - - sk = &tsk->sk; bh_lock_sock(sk); if (!tsk->connected) { bh_unlock_sock(sk); @@ -2106,38 +2134,39 @@ static void tipc_sk_timeout(unsigned long ref) if (tsk->probing_state == TIPC_CONN_PROBING) { /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tipc_own_addr, - peer_node, ref, peer_port, - TIPC_ERR_NO_PORT); + skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, + tn->own_addr, peer_node, tsk->portid, + peer_port, TIPC_ERR_NO_PORT); } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, - 0, peer_node, tipc_own_addr, - peer_port, ref, TIPC_OK); + skb = tipc_msg_create(net, CONN_MANAGER, CONN_PROBE, INT_H_SIZE, + 0, peer_node, tn->own_addr, + peer_port, tsk->portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; - k_start_timer(&tsk->timer, tsk->probing_interval); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(skb, peer_node, ref); + tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); exit: - tipc_sk_put(tsk); + sock_put(sk); } static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; u32 key; if (tsk->connected) return -EINVAL; - key = tsk->ref + tsk->pub_count + 1; - if (key == tsk->ref) + key = tsk->portid + tsk->pub_count + 1; + if (key == tsk->portid) return -EADDRINUSE; - publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, tsk->ref, key); + publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, + scope, tsk->portid, key); if (unlikely(!publ)) return -EINVAL; @@ -2150,6 +2179,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; struct publication *safe; int rc = -EINVAL; @@ -2164,12 +2194,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, continue; if (publ->upper != seq->upper) break; - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; break; } - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; } @@ -2181,16 +2211,18 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, static int tipc_sk_show(struct tipc_sock *tsk, char *buf, int len, int full_id) { + struct net *net = sock_net(&tsk->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; int ret; if (full_id) ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", - tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), tsk->ref); + tipc_zone(tn->own_addr), + tipc_cluster(tn->own_addr), + tipc_node(tn->own_addr), tsk->portid); else - ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref); + ret = tipc_snprintf(buf, len, "%-10u:", tsk->portid); if (tsk->connected) { u32 dport = tsk_peer_port(tsk); @@ -2222,15 +2254,18 @@ static int tipc_sk_show(struct tipc_sock *tsk, char *buf, return ret; } -struct sk_buff *tipc_sk_socks_show(void) +struct sk_buff *tipc_sk_socks_show(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + const struct bucket_table *tbl; + struct rhash_head *pos; struct sk_buff *buf; struct tlv_desc *rep_tlv; char *pb; int pb_len; struct tipc_sock *tsk; int str_len = 0; - u32 ref = 0; + int i; buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); if (!buf) @@ -2239,14 +2274,18 @@ struct sk_buff *tipc_sk_socks_show(void) pb = TLV_DATA(rep_tlv); pb_len = ULTRA_STRING_MAX_LEN; - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - str_len += tipc_sk_show(tsk, pb + str_len, - pb_len - str_len, 0); - release_sock(&tsk->sk); - tipc_sk_put(tsk); + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + str_len += tipc_sk_show(tsk, pb + str_len, + pb_len - str_len, 0); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } } + rcu_read_unlock(); + str_len += 1; /* for "\0" */ skb_put(buf, TLV_SPACE(str_len)); TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); @@ -2257,257 +2296,102 @@ struct sk_buff *tipc_sk_socks_show(void) /* tipc_sk_reinit: set non-zero address in all existing sockets * when we go from standalone to network mode. */ -void tipc_sk_reinit(void) +void tipc_sk_reinit(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + const struct bucket_table *tbl; + struct rhash_head *pos; + struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref = 0; - struct tipc_sock *tsk = tipc_sk_get_next(&ref); + int i; - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - msg = &tsk->phdr; - msg_set_prevnode(msg, tipc_own_addr); - msg_set_orignode(msg, tipc_own_addr); - release_sock(&tsk->sk); - tipc_sk_put(tsk); + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + msg = &tsk->phdr; + msg_set_prevnode(msg, tn->own_addr); + msg_set_orignode(msg, tn->own_addr); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } } + rcu_read_unlock(); } -/** - * struct reference - TIPC socket reference entry - * @tsk: pointer to socket associated with reference entry - * @ref: reference value for socket (combines instance & array index info) - */ -struct reference { - struct tipc_sock *tsk; - u32 ref; -}; - -/** - * struct tipc_ref_table - table of TIPC socket reference entries - * @entries: pointer to array of reference entries - * @capacity: array index of first unusable entry - * @init_point: array index of first uninitialized entry - * @first_free: array index of first unused socket reference entry - * @last_free: array index of last unused socket reference entry - * @index_mask: bitmask for array index portion of reference values - * @start_mask: initial value for instance value portion of reference values - */ -struct ref_table { - struct reference *entries; - u32 capacity; - u32 init_point; - u32 first_free; - u32 last_free; - u32 index_mask; - u32 start_mask; -}; - -/* Socket reference table consists of 2**N entries. - * - * State Socket ptr Reference - * ----- ---------- --------- - * In use non-NULL XXXX|own index - * (XXXX changes each time entry is acquired) - * Free NULL YYYY|next free index - * (YYYY is one more than last used XXXX) - * Uninitialized NULL 0 - * - * Entry 0 is not used; this allows index 0 to denote the end of the free list. - * - * Note that a reference value of 0 does not necessarily indicate that an - * entry is uninitialized, since the last entry in the free list could also - * have a reference value of 0 (although this is unlikely). - */ - -static struct ref_table tipc_ref_table; - -static DEFINE_RWLOCK(ref_table_lock); - -/** - * tipc_ref_table_init - create reference table for sockets - */ -int tipc_sk_ref_table_init(u32 req_sz, u32 start) +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) { - struct reference *table; - u32 actual_sz; - - /* account for unused entry, then round up size to a power of 2 */ - - req_sz++; - for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) { - /* do nothing */ - }; - - /* allocate table & mark all entries as uninitialized */ - table = vzalloc(actual_sz * sizeof(struct reference)); - if (table == NULL) - return -ENOMEM; - - tipc_ref_table.entries = table; - tipc_ref_table.capacity = req_sz; - tipc_ref_table.init_point = 1; - tipc_ref_table.first_free = 0; - tipc_ref_table.last_free = 0; - tipc_ref_table.index_mask = actual_sz - 1; - tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_sock *tsk; - return 0; -} + rcu_read_lock(); + tsk = rhashtable_lookup(&tn->sk_rht, &portid); + if (tsk) + sock_hold(&tsk->sk); + rcu_read_unlock(); -/** - * tipc_ref_table_stop - destroy reference table for sockets - */ -void tipc_sk_ref_table_stop(void) -{ - if (!tipc_ref_table.entries) - return; - vfree(tipc_ref_table.entries); - tipc_ref_table.entries = NULL; + return tsk; } -/* tipc_ref_acquire - create reference to a socket - * - * Register an socket pointer in the reference table. - * Returns a unique reference value that is used from then on to retrieve the - * socket pointer, or to determine if the socket has been deregistered. - */ -u32 tipc_sk_ref_acquire(struct tipc_sock *tsk) +static int tipc_sk_insert(struct tipc_sock *tsk) { - u32 index; - u32 index_mask; - u32 next_plus_upper; - u32 ref = 0; - struct reference *entry; - - if (unlikely(!tsk)) { - pr_err("Attempt to acquire ref. to non-existent obj\n"); - return 0; - } - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found in acquisition attempt\n"); - return 0; - } - - /* Take a free entry, if available; otherwise initialize a new one */ - write_lock_bh(&ref_table_lock); - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; - - if (likely(index)) { - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; - index_mask = tipc_ref_table.index_mask; - next_plus_upper = entry->ref; - tipc_ref_table.first_free = next_plus_upper & index_mask; - ref = (next_plus_upper & ~index_mask) + index; - entry->tsk = tsk; - } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { - index = tipc_ref_table.init_point++; - entry = &tipc_ref_table.entries[index]; - ref = tipc_ref_table.start_mask + index; + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; + u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; + + while (remaining--) { + portid++; + if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) + portid = TIPC_MIN_PORT; + tsk->portid = portid; + sock_hold(&tsk->sk); + if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node)) + return 0; + sock_put(&tsk->sk); } - if (ref) { - entry->ref = ref; - entry->tsk = tsk; - } - write_unlock_bh(&ref_table_lock); - return ref; + return -1; } -/* tipc_sk_ref_discard - invalidate reference to an socket - * - * Disallow future references to an socket and free up the entry for re-use. - */ -void tipc_sk_ref_discard(u32 ref) +static void tipc_sk_remove(struct tipc_sock *tsk) { - struct reference *entry; - u32 index; - u32 index_mask; - - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found during discard attempt\n"); - return; - } - - index_mask = tipc_ref_table.index_mask; - index = ref & index_mask; - entry = &tipc_ref_table.entries[index]; - - write_lock_bh(&ref_table_lock); + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (unlikely(!entry->tsk)) { - pr_err("Attempt to discard ref. to non-existent socket\n"); - goto exit; + if (rhashtable_remove(&tn->sk_rht, &tsk->node)) { + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); } - if (unlikely(entry->ref != ref)) { - pr_err("Attempt to discard non-existent reference\n"); - goto exit; - } - - /* Mark entry as unused; increment instance part of entry's - * reference to invalidate any subsequent references - */ - - entry->tsk = NULL; - entry->ref = (ref & ~index_mask) + (index_mask + 1); - - /* Append entry to free entry list */ - if (unlikely(tipc_ref_table.first_free == 0)) - tipc_ref_table.first_free = index; - else - tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; - tipc_ref_table.last_free = index; -exit: - write_unlock_bh(&ref_table_lock); } -/* tipc_sk_get - find referenced socket and return pointer to it - */ -struct tipc_sock *tipc_sk_get(u32 ref) +int tipc_sk_rht_init(struct net *net) { - struct reference *entry; - struct tipc_sock *tsk; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct rhashtable_params rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .hashfn = jhash, + .max_shift = 20, /* 1M */ + .min_shift = 8, /* 256 */ + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + }; - if (unlikely(!tipc_ref_table.entries)) - return NULL; - read_lock_bh(&ref_table_lock); - entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask]; - tsk = entry->tsk; - if (likely(tsk && (entry->ref == ref))) - sock_hold(&tsk->sk); - else - tsk = NULL; - read_unlock_bh(&ref_table_lock); - return tsk; + return rhashtable_init(&tn->sk_rht, &rht_params); } -/* tipc_sk_get_next - lock & return next socket after referenced one -*/ -struct tipc_sock *tipc_sk_get_next(u32 *ref) +void tipc_sk_rht_destroy(struct net *net) { - struct reference *entry; - struct tipc_sock *tsk = NULL; - uint index = *ref & tipc_ref_table.index_mask; + struct tipc_net *tn = net_generic(net, tipc_net_id); - read_lock_bh(&ref_table_lock); - while (++index < tipc_ref_table.capacity) { - entry = &tipc_ref_table.entries[index]; - if (!entry->tsk) - continue; - tsk = entry->tsk; - sock_hold(&tsk->sk); - *ref = entry->ref; - break; - } - read_unlock_bh(&ref_table_lock); - return tsk; -} + /* Wait for socket readers to complete */ + synchronize_net(); -static void tipc_sk_put(struct tipc_sock *tsk) -{ - sock_put(&tsk->sk); + rhashtable_destroy(&tn->sk_rht); } /** @@ -2639,8 +2523,9 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } -static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + struct sock *sk = sock->sk; struct tipc_sioc_ln_req lnr; void __user *argp = (void __user *)arg; @@ -2648,7 +2533,8 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer, + if (!tipc_node_get_linkname(sock_net(sk), + lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; @@ -2820,6 +2706,8 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, int err; void *hdr; struct nlattr *attrs; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); @@ -2829,9 +2717,9 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, attrs = nla_nest_start(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->ref)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) goto attr_msg_cancel; if (tsk->connected) { @@ -2859,22 +2747,37 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; struct tipc_sock *tsk; - u32 prev_ref = cb->args[0]; - u32 ref = prev_ref; - - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - err = __tipc_nl_add_sk(skb, cb, tsk); - release_sock(&tsk->sk); - tipc_sk_put(tsk); - if (err) - break; + const struct bucket_table *tbl; + struct rhash_head *pos; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 tbl_id = cb->args[0]; + u32 prev_portid = cb->args[1]; - prev_ref = ref; - } + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (; tbl_id < tbl->size; tbl_id++) { + rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + if (prev_portid && prev_portid != tsk->portid) { + spin_unlock_bh(&tsk->sk.sk_lock.slock); + continue; + } - cb->args[0] = prev_ref; + err = __tipc_nl_add_sk(skb, cb, tsk); + if (err) { + prev_portid = tsk->portid; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + goto out; + } + prev_portid = 0; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } + } +out: + rcu_read_unlock(); + cb->args[0] = tbl_id; + cb->args[1] = prev_portid; return skb->len; } @@ -2962,12 +2865,13 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; - u32 tsk_ref = cb->args[0]; + u32 tsk_portid = cb->args[0]; u32 last_publ = cb->args[1]; u32 done = cb->args[2]; + struct net *net = sock_net(skb->sk); struct tipc_sock *tsk; - if (!tsk_ref) { + if (!tsk_portid) { struct nlattr **attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; @@ -2984,13 +2888,13 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!sock[TIPC_NLA_SOCK_REF]) return -EINVAL; - tsk_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); } if (done) return 0; - tsk = tipc_sk_get(tsk_ref); + tsk = tipc_sk_lookup(net, tsk_portid); if (!tsk) return -EINVAL; @@ -2999,9 +2903,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!err) done = 1; release_sock(&tsk->sk); - tipc_sk_put(tsk); + sock_put(&tsk->sk); - cb->args[0] = tsk_ref; + cb->args[0] = tsk_portid; cb->args[1] = last_publ; cb->args[2] = done; diff --git a/net/tipc/socket.h b/net/tipc/socket.h index d34089387006..f56c3fded51f 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -42,12 +42,19 @@ #define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) #define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) -int tipc_sk_rcv(struct sk_buff *buf); -struct sk_buff *tipc_sk_socks_show(void); -void tipc_sk_mcast_rcv(struct sk_buff *buf); -void tipc_sk_reinit(void); -int tipc_sk_ref_table_init(u32 requested_size, u32 start); -void tipc_sk_ref_table_stop(void); + +int tipc_socket_init(void); +void tipc_socket_stop(void); +int tipc_sock_create_local(struct net *net, int type, struct socket **res); +void tipc_sock_release_local(struct socket *sock); +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags); +int tipc_sk_rcv(struct net *net, struct sk_buff *buf); +struct sk_buff *tipc_sk_socks_show(struct net *net); +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf); +void tipc_sk_reinit(struct net *net); +int tipc_sk_rht_init(struct net *net); +void tipc_sk_rht_destroy(struct net *net); int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0344206b984f..72c339e432aa 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -50,33 +50,6 @@ struct tipc_subscriber { struct list_head subscription_list; }; -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); -static void *subscr_named_msg_event(int conid); -static void subscr_conn_shutdown_event(int conid, void *usr_data); - -static atomic_t subscription_count = ATOMIC_INIT(0); - -static struct sockaddr_tipc topsrv_addr __read_mostly = { - .family = AF_TIPC, - .addrtype = TIPC_ADDR_NAMESEQ, - .addr.nameseq.type = TIPC_TOP_SRV, - .addr.nameseq.lower = TIPC_TOP_SRV, - .addr.nameseq.upper = TIPC_TOP_SRV, - .scope = TIPC_NODE_SCOPE -}; - -static struct tipc_server topsrv __read_mostly = { - .saddr = &topsrv_addr, - .imp = TIPC_CRITICAL_IMPORTANCE, - .type = SOCK_SEQPACKET, - .max_rcvbuf_size = sizeof(struct tipc_subscr), - .name = "topology_server", - .tipc_conn_recvmsg = subscr_conn_msg_event, - .tipc_conn_new = subscr_named_msg_event, - .tipc_conn_shutdown = subscr_conn_shutdown_event, -}; - /** * htohl - convert value to endianness used by destination * @in: value to convert @@ -93,6 +66,7 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node) { + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; struct kvec msg_sect; @@ -103,8 +77,8 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base, - msg_sect.iov_len); + tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); } /** @@ -141,9 +115,11 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } -static void subscr_timeout(struct tipc_subscription *sub) +static void subscr_timeout(unsigned long data) { + struct tipc_subscription *sub = (struct tipc_subscription *)data; struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); /* The spin lock per subscriber is used to protect its members */ spin_lock_bh(&subscriber->lock); @@ -167,9 +143,8 @@ static void subscr_timeout(struct tipc_subscription *sub) TIPC_SUBSCR_TIMEOUT, 0, 0); /* Now destroy subscription */ - k_term_timer(&sub->timer); kfree(sub); - atomic_dec(&subscription_count); + atomic_dec(&tn->subscription_count); } /** @@ -179,10 +154,12 @@ static void subscr_timeout(struct tipc_subscription *sub) */ static void subscr_del(struct tipc_subscription *sub) { + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); kfree(sub); - atomic_dec(&subscription_count); + atomic_dec(&tn->subscription_count); } /** @@ -190,9 +167,12 @@ static void subscr_del(struct tipc_subscription *sub) * * Note: Must call it in process context since it might sleep. */ -static void subscr_terminate(struct tipc_subscriber *subscriber) +static void subscr_terminate(struct tipc_subscription *sub) { - tipc_conn_terminate(&topsrv, subscriber->conid); + struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + + tipc_conn_terminate(tn->topsrv, subscriber->conid); } static void subscr_release(struct tipc_subscriber *subscriber) @@ -207,8 +187,7 @@ static void subscr_release(struct tipc_subscriber *subscriber) subscription_list) { if (sub->timeout != TIPC_WAIT_FOREVER) { spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); + del_timer_sync(&sub->timer); spin_lock_bh(&subscriber->lock); } subscr_del(sub); @@ -250,8 +229,7 @@ static void subscr_cancel(struct tipc_subscr *s, if (sub->timeout != TIPC_WAIT_FOREVER) { sub->timeout = TIPC_WAIT_FOREVER; spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); + del_timer_sync(&sub->timer); spin_lock_bh(&subscriber->lock); } subscr_del(sub); @@ -262,9 +240,11 @@ static void subscr_cancel(struct tipc_subscr *s, * * Called with subscriber lock held. */ -static int subscr_subscribe(struct tipc_subscr *s, +static int subscr_subscribe(struct net *net, struct tipc_subscr *s, struct tipc_subscriber *subscriber, - struct tipc_subscription **sub_p) { + struct tipc_subscription **sub_p) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub; int swap; @@ -279,7 +259,7 @@ static int subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); return -EINVAL; @@ -293,10 +273,11 @@ static int subscr_subscribe(struct tipc_subscr *s, } /* Initialize subscription object */ + sub->net = net; sub->seq.type = htohl(s->seq.type, swap); sub->seq.lower = htohl(s->seq.lower, swap); sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = htohl(s->timeout, swap); + sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap)); sub->filter = htohl(s->filter, swap); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || @@ -309,11 +290,10 @@ static int subscr_subscribe(struct tipc_subscr *s, sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&subscription_count); + atomic_inc(&tn->subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { - k_init_timer(&sub->timer, - (Handler)subscr_timeout, (unsigned long)sub); - k_start_timer(&sub->timer, sub->timeout); + setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub); + mod_timer(&sub->timer, jiffies + sub->timeout); } *sub_p = sub; return 0; @@ -326,16 +306,18 @@ static void subscr_conn_shutdown_event(int conid, void *usr_data) } /* Handle one request to create a new subscription for the subscriber */ -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len) +static void subscr_conn_msg_event(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; spin_lock_bh(&subscriber->lock); - if (subscr_subscribe((struct tipc_subscr *)buf, subscriber, &sub) < 0) { + if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, + &sub) < 0) { spin_unlock_bh(&subscriber->lock); - subscr_terminate(subscriber); + subscr_terminate(sub); return; } if (sub) @@ -343,7 +325,6 @@ static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, spin_unlock_bh(&subscriber->lock); } - /* Handle one request to establish a new subscriber */ static void *subscr_named_msg_event(int conid) { @@ -362,12 +343,50 @@ static void *subscr_named_msg_event(int conid) return (void *)subscriber; } -int tipc_subscr_start(void) +int tipc_subscr_start(struct net *net) { - return tipc_server_start(&topsrv); + struct tipc_net *tn = net_generic(net, tipc_net_id); + const char name[] = "topology_server"; + struct tipc_server *topsrv; + struct sockaddr_tipc *saddr; + + saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC); + if (!saddr) + return -ENOMEM; + saddr->family = AF_TIPC; + saddr->addrtype = TIPC_ADDR_NAMESEQ; + saddr->addr.nameseq.type = TIPC_TOP_SRV; + saddr->addr.nameseq.lower = TIPC_TOP_SRV; + saddr->addr.nameseq.upper = TIPC_TOP_SRV; + saddr->scope = TIPC_NODE_SCOPE; + + topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC); + if (!topsrv) { + kfree(saddr); + return -ENOMEM; + } + topsrv->net = net; + topsrv->saddr = saddr; + topsrv->imp = TIPC_CRITICAL_IMPORTANCE; + topsrv->type = SOCK_SEQPACKET; + topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); + topsrv->tipc_conn_recvmsg = subscr_conn_msg_event; + topsrv->tipc_conn_new = subscr_named_msg_event; + topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event; + + strncpy(topsrv->name, name, strlen(name) + 1); + tn->topsrv = topsrv; + atomic_set(&tn->subscription_count, 0); + + return tipc_server_start(topsrv); } -void tipc_subscr_stop(void) +void tipc_subscr_stop(struct net *net) { - tipc_server_stop(&topsrv); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_server *topsrv = tn->topsrv; + + tipc_server_stop(topsrv); + kfree(topsrv->saddr); + kfree(topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 393e417bee3f..33488bd9fe3c 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -39,6 +39,9 @@ #include "server.h" +#define TIPC_MAX_SUBSCRIPTIONS 65535 +#define TIPC_MAX_PUBLICATIONS 65535 + struct tipc_subscription; struct tipc_subscriber; @@ -46,6 +49,7 @@ struct tipc_subscriber; * struct tipc_subscription - TIPC network topology subscription object * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription + * @net: point to network namespace * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription * @timer: timer governing subscription duration (optional) @@ -58,7 +62,8 @@ struct tipc_subscriber; struct tipc_subscription { struct tipc_subscriber *subscriber; struct tipc_name_seq seq; - u32 timeout; + struct net *net; + unsigned long timeout; u32 filter; struct timer_list timer; struct list_head nameseq_list; @@ -69,13 +74,10 @@ struct tipc_subscription { int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); - void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node, int must); - -int tipc_subscr_start(void); - -void tipc_subscr_stop(void); +int tipc_subscr_start(struct net *net); +void tipc_subscr_stop(struct net *net); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8e1b10274b02..526b6edab018 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1445,7 +1445,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); @@ -1456,14 +1455,12 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, unsigned int hash; struct sk_buff *skb; long timeo; - struct scm_cookie tmp_scm; + struct scm_cookie scm; int max_level; int data_len = 0; - if (NULL == siocb->scm) - siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm, false); + err = scm_send(sock, msg, &scm, false); if (err < 0) return err; @@ -1507,11 +1504,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - err = unix_scm_to_skb(siocb->scm, skb, true); + err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; max_level = err + 1; - unix_get_secdata(siocb->scm, skb); + unix_get_secdata(&scm, skb); skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1606,7 +1603,7 @@ restart: unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); - scm_destroy(siocb->scm); + scm_destroy(&scm); return len; out_unlock: @@ -1616,7 +1613,7 @@ out_free: out: if (other) sock_put(other); - scm_destroy(siocb->scm); + scm_destroy(&scm); return err; } @@ -1628,21 +1625,18 @@ out: static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct sock *other = NULL; int err, size; struct sk_buff *skb; int sent = 0; - struct scm_cookie tmp_scm; + struct scm_cookie scm; bool fds_sent = false; int max_level; int data_len; - if (NULL == siocb->scm) - siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm, false); + err = scm_send(sock, msg, &scm, false); if (err < 0) return err; @@ -1683,7 +1677,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out_err; /* Only send the fds in the first buffer */ - err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); + err = unix_scm_to_skb(&scm, skb, !fds_sent); if (err < 0) { kfree_skb(skb); goto out_err; @@ -1715,8 +1709,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, sent += size; } - scm_destroy(siocb->scm); - siocb->scm = NULL; + scm_destroy(&scm); return sent; @@ -1728,8 +1721,7 @@ pipe_err: send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: - scm_destroy(siocb->scm); - siocb->scm = NULL; + scm_destroy(&scm); return sent ? : err; } @@ -1778,8 +1770,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(iocb); - struct scm_cookie tmp_scm; + struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int noblock = flags & MSG_DONTWAIT; @@ -1831,16 +1822,14 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (sock_flag(sk, SOCK_RCVTSTAMP)) __sock_recv_timestamp(msg, sk, skb); - if (!siocb->scm) { - siocb->scm = &tmp_scm; - memset(&tmp_scm, 0, sizeof(tmp_scm)); - } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); - unix_set_secdata(siocb->scm, skb); + memset(&scm, 0, sizeof(scm)); + + scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + unix_set_secdata(&scm, skb); if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) - unix_detach_fds(siocb->scm, skb); + unix_detach_fds(&scm, skb); sk_peek_offset_bwd(sk, skb->len); } else { @@ -1860,11 +1849,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); + scm.fp = scm_fp_dup(UNIXCB(skb).fp); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out_free: skb_free_datagram(sk, skb); @@ -1915,8 +1904,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(iocb); - struct scm_cookie tmp_scm; + struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); @@ -1943,10 +1931,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, * while sleeps in memcpy_tomsg */ - if (!siocb->scm) { - siocb->scm = &tmp_scm; - memset(&tmp_scm, 0, sizeof(tmp_scm)); - } + memset(&scm, 0, sizeof(scm)); err = mutex_lock_interruptible(&u->readlock); if (unlikely(err)) { @@ -2012,13 +1997,13 @@ again: if (check_creds) { /* Never glue messages from different writers */ - if ((UNIXCB(skb).pid != siocb->scm->pid) || - !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || - !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) + if ((UNIXCB(skb).pid != scm.pid) || + !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || + !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } @@ -2045,7 +2030,7 @@ again: sk_peek_offset_bwd(sk, chunk); if (UNIXCB(skb).fp) - unix_detach_fds(siocb->scm, skb); + unix_detach_fds(&scm, skb); if (unix_skb_len(skb)) break; @@ -2053,13 +2038,13 @@ again: skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); - if (siocb->scm->fp) + if (scm.fp) break; } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); + scm.fp = scm_fp_dup(UNIXCB(skb).fp); sk_peek_offset_fwd(sk, chunk); @@ -2068,7 +2053,7 @@ again: } while (size); mutex_unlock(&u->readlock); - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out: return copied ? : err; } diff --git a/net/unix/diag.c b/net/unix/diag.c index 86fa0f3b2caf..ef542fbca9fe 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -155,7 +155,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 22ba971741e5..29c8675f9a11 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -175,7 +175,7 @@ config CFG80211_INTERNAL_REGDB Most distributions have a CRDA package. So if unsure, say N. config CFG80211_WEXT - bool + bool "cfg80211 wireless extensions compatibility" depends on CFG80211 select WEXT_CORE help diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 454d7a079d03..d78fd8b54515 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1723,7 +1723,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, break; } finish: - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -2406,7 +2407,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -2871,6 +2873,9 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->get_key) return -EOPNOTSUPP; + if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) + return -ENOENT; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -2890,10 +2895,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) goto nla_put_failure; - if (pairwise && mac_addr && - !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) - return -ENOENT; - err = rdev_get_key(rdev, dev, key_idx, pairwise, mac_addr, &cookie, get_key_callback); @@ -3064,7 +3065,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) wdev_lock(dev->ieee80211_ptr); err = nl80211_key_allowed(dev->ieee80211_ptr); - if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr && + if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; @@ -3840,7 +3841,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, sinfo->assoc_req_ies)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -4570,7 +4572,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, nla_nest_end(msg, pinfoattr); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -5522,7 +5525,8 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb, nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -6596,7 +6600,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, nla_nest_end(msg, bss); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; fail_unlock_rcu: rcu_read_unlock(); @@ -6705,7 +6710,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, nla_nest_end(msg, infoattr); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -11049,7 +11055,8 @@ static int nl80211_send_scan_msg(struct sk_buff *msg, /* ignore errors and send incomplete event anyway */ nl80211_add_scan_req(msg, rdev); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -11072,7 +11079,8 @@ nl80211_send_sched_scan_msg(struct sk_buff *msg, nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 886cc7cb5566..b586d0dcb09e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1533,45 +1533,40 @@ static void reg_call_notifier(struct wiphy *wiphy, static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) { - struct ieee80211_channel *ch; struct cfg80211_chan_def chandef; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - bool ret = true; + enum nl80211_iftype iftype; wdev_lock(wdev); + iftype = wdev->iftype; + /* make sure the interface is active */ if (!wdev->netdev || !netif_running(wdev->netdev)) - goto out; + goto wdev_inactive_unlock; - switch (wdev->iftype) { + switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (!wdev->beacon_interval) - goto out; - - ret = cfg80211_reg_can_beacon(wiphy, - &wdev->chandef, wdev->iftype); + goto wdev_inactive_unlock; + chandef = wdev->chandef; break; case NL80211_IFTYPE_ADHOC: if (!wdev->ssid_len) - goto out; - - ret = cfg80211_reg_can_beacon(wiphy, - &wdev->chandef, wdev->iftype); + goto wdev_inactive_unlock; + chandef = wdev->chandef; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (!wdev->current_bss || !wdev->current_bss->pub.channel) - goto out; + goto wdev_inactive_unlock; - ch = wdev->current_bss->pub.channel; - if (rdev->ops->get_channel && - !rdev_get_channel(rdev, wdev, &chandef)) - ret = cfg80211_chandef_usable(wiphy, &chandef, - IEEE80211_CHAN_DISABLED); - else - ret = !(ch->flags & IEEE80211_CHAN_DISABLED); + if (!rdev->ops->get_channel || + rdev_get_channel(rdev, wdev, &chandef)) + cfg80211_chandef_create(&chandef, + wdev->current_bss->pub.channel, + NL80211_CHAN_NO_HT); break; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -1584,9 +1579,26 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) break; } -out: wdev_unlock(wdev); - return ret; + + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_ADHOC: + return cfg80211_reg_can_beacon(wiphy, &chandef, iftype); + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + return cfg80211_chandef_usable(wiphy, &chandef, + IEEE80211_CHAN_DISABLED); + default: + break; + } + + return true; + +wdev_inactive_unlock: + wdev_unlock(wdev); + return true; } static void reg_leave_invalid_chans(struct wiphy *wiphy) diff --git a/net/wireless/util.c b/net/wireless/util.c index 919fee807dd9..6903dbdcb8c1 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -352,6 +352,12 @@ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc) goto out; } + if (ieee80211_is_mgmt(fc)) { + if (ieee80211_has_order(fc)) + hdrlen += IEEE80211_HT_CTL_LEN; + goto out; + } + if (ieee80211_is_ctl(fc)) { /* * ACK and CTS are 10 bytes, all others 16. To see how @@ -752,8 +758,8 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, if (skb->priority >= 256 && skb->priority <= 263) return skb->priority - 256; - if (vlan_tx_tag_present(skb)) { - vlan_priority = (vlan_tx_tag_get(skb) & VLAN_PRIO_MASK) + if (skb_vlan_tag_present(skb)) { + vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; if (vlan_priority > 0) return vlan_priority; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index debe733386f8..12e82a5e4ad5 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -561,11 +561,6 @@ static struct xfrm_algo_desc calg_list[] = { }, }; -static inline int aead_entries(void) -{ - return ARRAY_SIZE(aead_list); -} - static inline int aalg_entries(void) { return ARRAY_SIZE(aalg_list); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8128594ab379..7de2ed9ec46d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1019,7 +1019,8 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1121,7 +1122,8 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1842,7 +1844,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) goto out_cancel; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_cancel: nlmsg_cancel(skb, nlh); @@ -2282,7 +2285,8 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, goto out_cancel; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_cancel: nlmsg_cancel(skb, nlh); @@ -2490,7 +2494,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) return err; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2712,7 +2717,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, @@ -2827,7 +2833,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, } upe->hard = !!hard; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2986,7 +2993,8 @@ static int build_report(struct sk_buff *skb, u8 proto, return err; } } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_report(struct net *net, u8 proto, @@ -3031,7 +3039,8 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, um->old_sport = x->encap->encap_sport; um->reqid = x->props.reqid; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, |