diff options
Diffstat (limited to 'net')
460 files changed, 17843 insertions, 9632 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8ca533c95de0..add69d0fd99d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -105,7 +105,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) return NULL; memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; - skb_reset_mac_len(skb); return skb; } @@ -139,6 +138,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); + skb_reset_mac_len(skb); + return skb; err_free: @@ -368,3 +369,9 @@ void vlan_vids_del_by_dev(struct net_device *dev, vlan_vid_del(dev, vid_info->vid); } EXPORT_SYMBOL(vlan_vids_del_by_dev); + +bool vlan_uses_dev(const struct net_device *dev) +{ + return rtnl_dereference(dev->vlan_info) ? true : false; +} +EXPORT_SYMBOL(vlan_uses_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 73a2a83ee2da..402442402af7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return rc; } +static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb) +{ +#ifdef CONFIG_NET_POLL_CONTROLLER + if (vlan->netpoll) + netpoll_send_skb(vlan->netpoll, skb); +#else + BUG(); +#endif + return NETDEV_TX_OK; +} + static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; @@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ if (veth->h_vlan_proto != htons(ETH_P_8021Q) || - vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) { + vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; - vlan_tci = vlan_dev_priv(dev)->vlan_id; + vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb->dev = vlan_dev_priv(dev)->real_dev; + skb->dev = vlan->real_dev; len = skb->len; - if (netpoll_tx_running(dev)) - return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); + if (unlikely(netpoll_tx_running(dev))) + return vlan_netpoll_send_skb(vlan, skb); + ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *stats; - stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats); + stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; u64_stats_update_end(&stats->syncp); } else { - this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped); + this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); } return ret; @@ -669,25 +682,26 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, + gfp_t gfp) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct net_device *real_dev = info->real_dev; + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; struct netpoll *netpoll; int err = 0; - netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + netpoll = kzalloc(sizeof(*netpoll), gfp); err = -ENOMEM; if (!netpoll) goto out; - err = __netpoll_setup(netpoll, real_dev); + err = __netpoll_setup(netpoll, real_dev, gfp); if (err) { kfree(netpoll); goto out; } - info->netpoll = netpoll; + vlan->netpoll = netpoll; out: return err; @@ -695,19 +709,15 @@ out: static void vlan_dev_netpoll_cleanup(struct net_device *dev) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct netpoll *netpoll = info->netpoll; + struct vlan_dev_priv *vlan= vlan_dev_priv(dev); + struct netpoll *netpoll = vlan->netpoll; if (!netpoll) return; - info->netpoll = NULL; - - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); + vlan->netpoll = NULL; - __netpoll_cleanup(netpoll); - kfree(netpoll); + __netpoll_free_rcu(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index c718fd3664b6..4de77ea5fa37 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -105,7 +105,7 @@ static const struct file_operations vlandev_fops = { }; /* - * Proc filesystem derectory entries. + * Proc filesystem directory entries. */ /* Strings */ diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6449bae15702..505f0ce3f10b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1083,7 +1083,7 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - flush_work_sync(&p9_poll_work); + flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); diff --git a/net/Kconfig b/net/Kconfig index 245831bec09a..30b48f523135 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -52,6 +52,8 @@ source "net/iucv/Kconfig" config INET bool "TCP/IP networking" + select CRYPTO + select CRYPTO_AES ---help--- These are the protocols used on the Internet and on most local Ethernets. It is highly recommended to say Y here (this will enlarge diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index b5b1a221c242..c30f3a0717fb 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -183,7 +183,8 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v) ntohs(at->dest_net), at->dest_node, at->dest_port, sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), - s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); + s->sk_state, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s))); out: return 0; } diff --git a/net/atm/common.c b/net/atm/common.c index b4b44dbed645..0c0ad930a632 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = vcc->dev->number; pvc.sap_addr.vpi = vcc->vpi; diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 3a734919c36c..ae0324021407 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, return -ENOTCONN; *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; + memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; diff --git a/net/atm/resources.c b/net/atm/resources.c index 23f45ce6f351..0447d5d0b639 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -432,7 +432,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) size = dev->ops->ioctl(dev, cmd, buf); } if (size < 0) { - error = (size == -ENOIOCTLCMD ? -EINVAL : size); + error = (size == -ENOIOCTLCMD ? -ENOTTY : size); goto done; } } diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index e3c579ba6325..957999e43ff7 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -51,14 +51,14 @@ int ax25_uid_policy; EXPORT_SYMBOL(ax25_uid_policy); -ax25_uid_assoc *ax25_findbyuid(uid_t uid) +ax25_uid_assoc *ax25_findbyuid(kuid_t uid) { ax25_uid_assoc *ax25_uid, *res = NULL; struct hlist_node *node; read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { - if (ax25_uid->uid == uid) { + if (uid_eq(ax25_uid->uid, uid)) { ax25_uid_hold(ax25_uid); res = ax25_uid; break; @@ -84,7 +84,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { - res = ax25_uid->uid; + res = from_kuid_munged(current_user_ns(), ax25_uid->uid); break; } } @@ -93,9 +93,14 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) return res; case SIOCAX25ADDUID: + { + kuid_t sax25_kuid; if (!capable(CAP_NET_ADMIN)) return -EPERM; - user = ax25_findbyuid(sax->sax25_uid); + sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid); + if (!uid_valid(sax25_kuid)) + return -EINVAL; + user = ax25_findbyuid(sax25_kuid); if (user) { ax25_uid_put(user); return -EEXIST; @@ -106,7 +111,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) return -ENOMEM; atomic_set(&ax25_uid->refcount, 1); - ax25_uid->uid = sax->sax25_uid; + ax25_uid->uid = sax25_kuid; ax25_uid->call = sax->sax25_call; write_lock(&ax25_uid_lock); @@ -114,7 +119,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) write_unlock(&ax25_uid_lock); return 0; - + } case SIOCAX25DELUID: if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -172,7 +177,9 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v) struct ax25_uid_assoc *pt; pt = hlist_entry(v, struct ax25_uid_assoc, uid_node); - seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(buf, &pt->call)); + seq_printf(seq, "%6d %s\n", + from_kuid_munged(seq_user_ns(seq), pt->uid), + ax2asc(buf, &pt->call)); } return 0; } diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e877af8bdd1e..b02b75dae3a8 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -166,13 +166,15 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, int16_t buff_pos; struct batadv_ogm_packet *batadv_ogm_packet; struct sk_buff *skb; + uint8_t *packet_pos; if (hard_iface->if_status != BATADV_IF_ACTIVE) return; packet_num = 0; buff_pos = 0; - batadv_ogm_packet = (struct batadv_ogm_packet *)forw_packet->skb->data; + packet_pos = forw_packet->skb->data; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, @@ -181,15 +183,17 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* we might have aggregated direct link packets with an * ordinary base packet */ - if ((forw_packet->direct_link_flags & (1 << packet_num)) && - (forw_packet->if_incoming == hard_iface)) + if (forw_packet->direct_link_flags & BIT(packet_num) && + forw_packet->if_incoming == hard_iface) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK; - fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ? - "Sending own" : - "Forwarding")); + if (packet_num > 0 || !forw_packet->own) + fwd_str = "Forwarding"; + else + fwd_str = "Sending own"; + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s, ttvn %d) on interface %s [%pM]\n", fwd_str, (packet_num > 0 ? "aggregated " : ""), @@ -204,8 +208,8 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, buff_pos += BATADV_OGM_HLEN; buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes); packet_num++; - batadv_ogm_packet = (struct batadv_ogm_packet *) - (forw_packet->skb->data + buff_pos); + packet_pos = forw_packet->skb->data + buff_pos; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; } /* create clone because function is called more than once */ @@ -227,9 +231,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) struct batadv_hard_iface *primary_if = NULL; struct batadv_ogm_packet *batadv_ogm_packet; unsigned char directlink; + uint8_t *packet_pos; - batadv_ogm_packet = (struct batadv_ogm_packet *) - (forw_packet->skb->data); + packet_pos = forw_packet->skb->data; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; directlink = (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0); if (!forw_packet->if_incoming) { @@ -454,6 +459,7 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, int packet_len, bool direct_link) { unsigned char *skb_buff; + unsigned long new_direct_link_flag; skb_buff = skb_put(forw_packet_aggr->skb, packet_len); memcpy(skb_buff, packet_buff, packet_len); @@ -461,9 +467,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, forw_packet_aggr->num_packets++; /* save packet direct link flag status */ - if (direct_link) - forw_packet_aggr->direct_link_flags |= - (1 << forw_packet_aggr->num_packets); + if (direct_link) { + new_direct_link_flag = BIT(forw_packet_aggr->num_packets); + forw_packet_aggr->direct_link_flags |= new_direct_link_flag; + } } static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, @@ -586,6 +593,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if; int vis_server, tt_num_changes = 0; + uint32_t seqno; + uint8_t bandwidth; vis_server = atomic_read(&bat_priv->vis_mode); primary_if = batadv_primary_if_get_selected(bat_priv); @@ -599,12 +608,12 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; /* change sequence number to network order */ - batadv_ogm_packet->seqno = - htonl((uint32_t)atomic_read(&hard_iface->seqno)); + seqno = (uint32_t)atomic_read(&hard_iface->seqno); + batadv_ogm_packet->seqno = htonl(seqno); atomic_inc(&hard_iface->seqno); - batadv_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn); - batadv_ogm_packet->tt_crc = htons(bat_priv->tt_crc); + batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn); + batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc); if (tt_num_changes >= 0) batadv_ogm_packet->tt_num_changes = tt_num_changes; @@ -613,12 +622,13 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) else batadv_ogm_packet->flags &= ~BATADV_VIS_SERVER; - if ((hard_iface == primary_if) && - (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER)) - batadv_ogm_packet->gw_flags = - (uint8_t)atomic_read(&bat_priv->gw_bandwidth); - else + if (hard_iface == primary_if && + atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER) { + bandwidth = (uint8_t)atomic_read(&bat_priv->gw_bandwidth); + batadv_ogm_packet->gw_flags = bandwidth; + } else { batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; + } batadv_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff, @@ -642,8 +652,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + int if_num; + uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; + uint8_t tq_avg; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "update_originator(): Searching and updating originator entry of received packet\n"); @@ -667,8 +679,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, spin_lock_bh(&tmp_neigh_node->lq_update_lock); batadv_ring_buffer_set(tmp_neigh_node->tq_recv, &tmp_neigh_node->tq_index, 0); - tmp_neigh_node->tq_avg = - batadv_ring_buffer_avg(tmp_neigh_node->tq_recv); + tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->tq_recv); + tmp_neigh_node->tq_avg = tq_avg; spin_unlock_bh(&tmp_neigh_node->lq_update_lock); } @@ -727,17 +739,17 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (router && (neigh_node->tq_avg == router->tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = router->if_incoming->if_num; + sum_orig = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); orig_node_tmp = neigh_node->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = neigh_node->if_incoming->if_num; + sum_neigh = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - if (bcast_own_sum_orig >= bcast_own_sum_neigh) + if (sum_orig >= sum_neigh) goto update_tt; } @@ -835,8 +847,10 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, spin_unlock_bh(&orig_node->ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ - total_count = (orig_eq_count > neigh_rq_count ? - neigh_rq_count : orig_eq_count); + if (orig_eq_count > neigh_rq_count) + total_count = neigh_rq_count; + else + total_count = orig_eq_count; /* if we have too few packets (too less data) we set tq_own to zero * if we receive too few packets it is not considered bidirectional @@ -910,6 +924,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, int set_mark, ret = -1; uint32_t seqno = ntohl(batadv_ogm_packet->seqno); uint8_t *neigh_addr; + uint8_t packet_count; orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig); if (!orig_node) @@ -944,9 +959,9 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, tmp_neigh_node->real_bits, seq_diff, set_mark); - tmp_neigh_node->real_packet_count = - bitmap_weight(tmp_neigh_node->real_bits, - BATADV_TQ_LOCAL_WINDOW_SIZE); + packet_count = bitmap_weight(tmp_neigh_node->real_bits, + BATADV_TQ_LOCAL_WINDOW_SIZE); + tmp_neigh_node->real_packet_count = packet_count; } rcu_read_unlock(); @@ -1163,9 +1178,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, /* if sender is a direct neighbor the sender mac equals * originator mac */ - orig_neigh_node = (is_single_hop_neigh ? - orig_node : - batadv_get_orig_node(bat_priv, ethhdr->h_source)); + if (is_single_hop_neigh) + orig_neigh_node = orig_node; + else + orig_neigh_node = batadv_get_orig_node(bat_priv, + ethhdr->h_source); + if (!orig_neigh_node) goto out; @@ -1251,6 +1269,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, int buff_pos = 0, packet_len; unsigned char *tt_buff, *packet_buff; bool ret; + uint8_t *packet_pos; ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); if (!ret) @@ -1281,8 +1300,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, buff_pos += BATADV_OGM_HLEN; buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes); - batadv_ogm_packet = (struct batadv_ogm_packet *) - (packet_buff + buff_pos); + packet_pos = packet_buff + buff_pos; + batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; } while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, batadv_ogm_packet->tt_num_changes)); diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index a081ce1c0514..cebaae7e148b 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -20,8 +20,8 @@ #ifndef _NET_BATMAN_ADV_BITARRAY_H_ #define _NET_BATMAN_ADV_BITARRAY_H_ -/* returns true if the corresponding bit in the given seq_bits indicates true - * and curr_seqno is within range of last_seqno +/* Returns 1 if the corresponding bit in the given seq_bits indicates true + * and curr_seqno is within range of last_seqno. Otherwise returns 0. */ static inline int batadv_test_bit(const unsigned long *seq_bits, uint32_t last_seqno, uint32_t curr_seqno) @@ -32,7 +32,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) return 0; else - return test_bit(diff, seq_bits); + return test_bit(diff, seq_bits) != 0; } /* turn corresponding bit on, so we can remember that we got the packet */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6705d35b17ce..0a9084ad19a6 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -133,7 +133,7 @@ static void batadv_claim_free_ref(struct batadv_claim *claim) static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, struct batadv_claim *data) { - struct batadv_hashtable *hash = bat_priv->claim_hash; + struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct hlist_head *head; struct hlist_node *node; struct batadv_claim *claim; @@ -174,7 +174,7 @@ static struct batadv_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, uint8_t *addr, short vid) { - struct batadv_hashtable *hash = bat_priv->backbone_hash; + struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; struct hlist_node *node; struct batadv_backbone_gw search_entry, *backbone_gw; @@ -218,7 +218,7 @@ batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw) int i; spinlock_t *list_lock; /* protects write access to the hash lists */ - hash = backbone_gw->bat_priv->claim_hash; + hash = backbone_gw->bat_priv->bla.claim_hash; if (!hash) return; @@ -265,7 +265,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, if (!primary_if) return; - memcpy(&local_claim_dest, &bat_priv->claim_dest, + memcpy(&local_claim_dest, &bat_priv->bla.claim_dest, sizeof(local_claim_dest)); local_claim_dest.type = claimtype; @@ -281,7 +281,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, NULL, /* Ethernet SRC/HW SRC: originator mac */ primary_if->net_dev->dev_addr, - /* HW DST: FF:43:05:XX:00:00 + /* HW DST: FF:43:05:XX:YY:YY * with XX = claim type * and YY:YY = group id */ @@ -295,7 +295,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, /* now we pretend that the client would have sent this ... */ switch (claimtype) { - case BATADV_CLAIM_TYPE_ADD: + case BATADV_CLAIM_TYPE_CLAIM: /* normal claim frame * set Ethernet SRC to the clients mac */ @@ -303,7 +303,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); break; - case BATADV_CLAIM_TYPE_DEL: + case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame * set HW SRC to the clients mac */ @@ -323,7 +323,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame - * set HW SRC to the special mac containg the crc + * set HW SRC and header destination to the receiving backbone + * gws mac */ memcpy(hw_src, mac, ETH_ALEN); memcpy(ethhdr->h_dest, mac, ETH_ALEN); @@ -339,8 +340,9 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); - bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; + batadv_inc_counter(bat_priv, BATADV_CNT_RX); + batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, + skb->len + ETH_HLEN); soft_iface->last_rx = jiffies; netif_rx(skb); @@ -389,7 +391,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, /* one for the hash, one for returning */ atomic_set(&entry->refcount, 2); - hash_added = batadv_hash_add(bat_priv->backbone_hash, + hash_added = batadv_hash_add(bat_priv->bla.backbone_hash, batadv_compare_backbone_gw, batadv_choose_backbone_gw, entry, &entry->hash_entry); @@ -456,7 +458,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, if (!backbone_gw) return; - hash = bat_priv->claim_hash; + hash = bat_priv->bla.claim_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -467,7 +469,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, continue; batadv_bla_send_claim(bat_priv, claim->addr, claim->vid, - BATADV_CLAIM_TYPE_ADD); + BATADV_CLAIM_TYPE_CLAIM); } rcu_read_unlock(); } @@ -497,7 +499,7 @@ static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw) /* no local broadcasts should be sent or received, for now. */ if (!atomic_read(&backbone_gw->request_sent)) { - atomic_inc(&backbone_gw->bat_priv->bla_num_requests); + atomic_inc(&backbone_gw->bat_priv->bla.num_requests); atomic_set(&backbone_gw->request_sent, 1); } } @@ -557,7 +559,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", mac, vid); - hash_added = batadv_hash_add(bat_priv->claim_hash, + hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, &claim->hash_entry); @@ -577,8 +579,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", mac, vid); - claim->backbone_gw->crc ^= - crc16(0, claim->addr, ETH_ALEN); + claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); } @@ -610,7 +611,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", mac, vid); - batadv_hash_remove(bat_priv->claim_hash, batadv_compare_claim, + batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); batadv_claim_free_ref(claim); /* reference from the hash is gone */ @@ -657,7 +658,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, * we can allow traffic again. */ if (atomic_read(&backbone_gw->request_sent)) { - atomic_dec(&backbone_gw->bat_priv->bla_num_requests); + atomic_dec(&backbone_gw->bat_priv->bla.num_requests); atomic_set(&backbone_gw->request_sent, 0); } } @@ -702,7 +703,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, if (primary_if && batadv_compare_eth(backbone_addr, primary_if->net_dev->dev_addr)) batadv_bla_send_claim(bat_priv, claim_addr, vid, - BATADV_CLAIM_TYPE_DEL); + BATADV_CLAIM_TYPE_UNCLAIM); backbone_gw = batadv_backbone_hash_find(bat_priv, backbone_addr, vid); @@ -738,7 +739,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, batadv_bla_add_claim(bat_priv, claim_addr, vid, backbone_gw); if (batadv_compare_eth(backbone_addr, primary_if->net_dev->dev_addr)) batadv_bla_send_claim(bat_priv, claim_addr, vid, - BATADV_CLAIM_TYPE_ADD); + BATADV_CLAIM_TYPE_CLAIM); /* TODO: we could call something like tt_local_del() here. */ @@ -772,7 +773,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; bla_dst = (struct batadv_bla_claim_dst *)hw_dst; - bla_dst_own = &bat_priv->claim_dest; + bla_dst_own = &bat_priv->bla.claim_dest; /* check if it is a claim packet in general */ if (memcmp(bla_dst->magic, bla_dst_own->magic, @@ -783,12 +784,12 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, * otherwise assume it is in the hw_src */ switch (bla_dst->type) { - case BATADV_CLAIM_TYPE_ADD: + case BATADV_CLAIM_TYPE_CLAIM: backbone_addr = hw_src; break; case BATADV_CLAIM_TYPE_REQUEST: case BATADV_CLAIM_TYPE_ANNOUNCE: - case BATADV_CLAIM_TYPE_DEL: + case BATADV_CLAIM_TYPE_UNCLAIM: backbone_addr = ethhdr->h_source; break; default: @@ -904,12 +905,12 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, /* check for the different types of claim frames ... */ switch (bla_dst->type) { - case BATADV_CLAIM_TYPE_ADD: + case BATADV_CLAIM_TYPE_CLAIM: if (batadv_handle_claim(bat_priv, primary_if, hw_src, ethhdr->h_source, vid)) return 1; break; - case BATADV_CLAIM_TYPE_DEL: + case BATADV_CLAIM_TYPE_UNCLAIM: if (batadv_handle_unclaim(bat_priv, primary_if, ethhdr->h_source, hw_src, vid)) return 1; @@ -945,7 +946,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) spinlock_t *list_lock; /* protects write access to the hash lists */ int i; - hash = bat_priv->backbone_hash; + hash = bat_priv->bla.backbone_hash; if (!hash) return; @@ -969,7 +970,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) purge_now: /* don't wait for the pending request anymore */ if (atomic_read(&backbone_gw->request_sent)) - atomic_dec(&bat_priv->bla_num_requests); + atomic_dec(&bat_priv->bla.num_requests); batadv_bla_del_backbone_claims(backbone_gw); @@ -999,7 +1000,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, struct batadv_hashtable *hash; int i; - hash = bat_priv->claim_hash; + hash = bat_priv->bla.claim_hash; if (!hash) return; @@ -1046,11 +1047,12 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; + __be16 group; int i; /* reset bridge loop avoidance group id */ - bat_priv->claim_dest.group = - htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN)); + group = htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN)); + bat_priv->bla.claim_dest.group = group; if (!oldif) { batadv_bla_purge_claims(bat_priv, NULL, 1); @@ -1058,7 +1060,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, return; } - hash = bat_priv->backbone_hash; + hash = bat_priv->bla.backbone_hash; if (!hash) return; @@ -1088,8 +1090,8 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, /* (re)start the timer */ static void batadv_bla_start_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->bla_work, batadv_bla_periodic_work); - queue_delayed_work(batadv_event_workqueue, &bat_priv->bla_work, + INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work); + queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); } @@ -1099,9 +1101,9 @@ static void batadv_bla_start_timer(struct batadv_priv *bat_priv) */ static void batadv_bla_periodic_work(struct work_struct *work) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_priv *bat_priv; + struct batadv_priv_bla *priv_bla; struct hlist_node *node; struct hlist_head *head; struct batadv_backbone_gw *backbone_gw; @@ -1109,7 +1111,9 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct batadv_hard_iface *primary_if; int i; - bat_priv = container_of(delayed_work, struct batadv_priv, bla_work); + delayed_work = container_of(work, struct delayed_work, work); + priv_bla = container_of(delayed_work, struct batadv_priv_bla, work); + bat_priv = container_of(priv_bla, struct batadv_priv, bla); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; @@ -1120,7 +1124,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto out; - hash = bat_priv->backbone_hash; + hash = bat_priv->bla.backbone_hash; if (!hash) goto out; @@ -1160,40 +1164,41 @@ int batadv_bla_init(struct batadv_priv *bat_priv) int i; uint8_t claim_dest[ETH_ALEN] = {0xff, 0x43, 0x05, 0x00, 0x00, 0x00}; struct batadv_hard_iface *primary_if; + uint16_t crc; + unsigned long entrytime; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hash registering\n"); /* setting claim destination address */ - memcpy(&bat_priv->claim_dest.magic, claim_dest, 3); - bat_priv->claim_dest.type = 0; + memcpy(&bat_priv->bla.claim_dest.magic, claim_dest, 3); + bat_priv->bla.claim_dest.type = 0; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if) { - bat_priv->claim_dest.group = - htons(crc16(0, primary_if->net_dev->dev_addr, - ETH_ALEN)); + crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN); + bat_priv->bla.claim_dest.group = htons(crc); batadv_hardif_free_ref(primary_if); } else { - bat_priv->claim_dest.group = 0; /* will be set later */ + bat_priv->bla.claim_dest.group = 0; /* will be set later */ } /* initialize the duplicate list */ + entrytime = jiffies - msecs_to_jiffies(BATADV_DUPLIST_TIMEOUT); for (i = 0; i < BATADV_DUPLIST_SIZE; i++) - bat_priv->bcast_duplist[i].entrytime = - jiffies - msecs_to_jiffies(BATADV_DUPLIST_TIMEOUT); - bat_priv->bcast_duplist_curr = 0; + bat_priv->bla.bcast_duplist[i].entrytime = entrytime; + bat_priv->bla.bcast_duplist_curr = 0; - if (bat_priv->claim_hash) + if (bat_priv->bla.claim_hash) return 0; - bat_priv->claim_hash = batadv_hash_new(128); - bat_priv->backbone_hash = batadv_hash_new(32); + bat_priv->bla.claim_hash = batadv_hash_new(128); + bat_priv->bla.backbone_hash = batadv_hash_new(32); - if (!bat_priv->claim_hash || !bat_priv->backbone_hash) + if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash) return -ENOMEM; - batadv_hash_set_lock_class(bat_priv->claim_hash, + batadv_hash_set_lock_class(bat_priv->bla.claim_hash, &batadv_claim_hash_lock_class_key); - batadv_hash_set_lock_class(bat_priv->backbone_hash, + batadv_hash_set_lock_class(bat_priv->bla.backbone_hash, &batadv_backbone_hash_lock_class_key); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hashes initialized\n"); @@ -1234,8 +1239,9 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, crc = crc16(0, content, length); for (i = 0; i < BATADV_DUPLIST_SIZE; i++) { - curr = (bat_priv->bcast_duplist_curr + i) % BATADV_DUPLIST_SIZE; - entry = &bat_priv->bcast_duplist[curr]; + curr = (bat_priv->bla.bcast_duplist_curr + i); + curr %= BATADV_DUPLIST_SIZE; + entry = &bat_priv->bla.bcast_duplist[curr]; /* we can stop searching if the entry is too old ; * later entries will be even older @@ -1256,13 +1262,13 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, return 1; } /* not found, add a new entry (overwrite the oldest entry) */ - curr = (bat_priv->bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1); + curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1); curr %= BATADV_DUPLIST_SIZE; - entry = &bat_priv->bcast_duplist[curr]; + entry = &bat_priv->bla.bcast_duplist[curr]; entry->crc = crc; entry->entrytime = jiffies; memcpy(entry->orig, bcast_packet->orig, ETH_ALEN); - bat_priv->bcast_duplist_curr = curr; + bat_priv->bla.bcast_duplist_curr = curr; /* allow it, its the first occurence. */ return 0; @@ -1279,7 +1285,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, */ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { - struct batadv_hashtable *hash = bat_priv->backbone_hash; + struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; struct hlist_node *node; struct batadv_backbone_gw *backbone_gw; @@ -1339,8 +1345,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, if (!pskb_may_pull(skb, hdr_size + sizeof(struct vlan_ethhdr))) return 0; - vhdr = (struct vlan_ethhdr *)(((uint8_t *)skb->data) + - hdr_size); + vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; } @@ -1359,18 +1364,18 @@ void batadv_bla_free(struct batadv_priv *bat_priv) { struct batadv_hard_iface *primary_if; - cancel_delayed_work_sync(&bat_priv->bla_work); + cancel_delayed_work_sync(&bat_priv->bla.work); primary_if = batadv_primary_if_get_selected(bat_priv); - if (bat_priv->claim_hash) { + if (bat_priv->bla.claim_hash) { batadv_bla_purge_claims(bat_priv, primary_if, 1); - batadv_hash_destroy(bat_priv->claim_hash); - bat_priv->claim_hash = NULL; + batadv_hash_destroy(bat_priv->bla.claim_hash); + bat_priv->bla.claim_hash = NULL; } - if (bat_priv->backbone_hash) { + if (bat_priv->bla.backbone_hash) { batadv_bla_purge_backbone_gw(bat_priv, 1); - batadv_hash_destroy(bat_priv->backbone_hash); - bat_priv->backbone_hash = NULL; + batadv_hash_destroy(bat_priv->bla.backbone_hash); + bat_priv->bla.backbone_hash = NULL; } if (primary_if) batadv_hardif_free_ref(primary_if); @@ -1409,7 +1414,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, goto allow; - if (unlikely(atomic_read(&bat_priv->bla_num_requests))) + if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) goto handled; @@ -1508,7 +1513,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) ethhdr = (struct ethhdr *)skb_mac_header(skb); - if (unlikely(atomic_read(&bat_priv->bla_num_requests))) + if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest)) goto handled; @@ -1564,7 +1569,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->claim_hash; + struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct batadv_claim *claim; struct batadv_hard_iface *primary_if; struct hlist_node *node; @@ -1593,7 +1598,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Claims announced for the mesh %s (orig %pM, group id %04x)\n", net_dev->name, primary_addr, - ntohs(bat_priv->claim_dest.group)); + ntohs(bat_priv->bla.claim_dest.group)); seq_printf(seq, " %-17s %-5s %-17s [o] (%-4s)\n", "Client", "VID", "Originator", "CRC"); for (i = 0; i < hash->size; i++) { @@ -1616,3 +1621,68 @@ out: batadv_hardif_free_ref(primary_if); return ret; } + +int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; + struct batadv_backbone_gw *backbone_gw; + struct batadv_hard_iface *primary_if; + struct hlist_node *node; + struct hlist_head *head; + int secs, msecs; + uint32_t i; + bool is_own; + int ret = 0; + uint8_t *primary_addr; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) { + ret = seq_printf(seq, + "BATMAN mesh %s disabled - please specify interfaces to enable it\n", + net_dev->name); + goto out; + } + + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = seq_printf(seq, + "BATMAN mesh %s disabled - primary interface not active\n", + net_dev->name); + goto out; + } + + primary_addr = primary_if->net_dev->dev_addr; + seq_printf(seq, + "Backbones announced for the mesh %s (orig %pM, group id %04x)\n", + net_dev->name, primary_addr, + ntohs(bat_priv->bla.claim_dest.group)); + seq_printf(seq, " %-17s %-5s %-9s (%-4s)\n", + "Originator", "VID", "last seen", "CRC"); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + msecs = jiffies_to_msecs(jiffies - + backbone_gw->lasttime); + secs = msecs / 1000; + msecs = msecs % 1000; + + is_own = batadv_compare_eth(backbone_gw->orig, + primary_addr); + if (is_own) + continue; + + seq_printf(seq, + " * %pM on % 5d % 4i.%03is (%04x)\n", + backbone_gw->orig, backbone_gw->vid, + secs, msecs, backbone_gw->crc); + } + rcu_read_unlock(); + } +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 563cfbf94a7f..789cb73bde67 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -27,6 +27,8 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, + void *offset); int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig); int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct batadv_bcast_packet *bcast_packet, @@ -41,8 +43,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid, - bool is_bcast) + struct sk_buff *skb, short vid, bool is_bcast) { return 0; } @@ -66,6 +67,12 @@ static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, return 0; } +static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, + void *offset) +{ + return 0; +} + static inline int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 34fbb1667bcd..391d4fb2026f 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -267,6 +267,15 @@ static int batadv_bla_claim_table_open(struct inode *inode, struct file *file) return single_open(file, batadv_bla_claim_table_seq_print_text, net_dev); } + +static int batadv_bla_backbone_table_open(struct inode *inode, + struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_bla_backbone_table_seq_print_text, + net_dev); +} + #endif static int batadv_transtable_local_open(struct inode *inode, struct file *file) @@ -305,6 +314,8 @@ static BATADV_DEBUGINFO(transtable_global, S_IRUGO, batadv_transtable_global_open); #ifdef CONFIG_BATMAN_ADV_BLA static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open); +static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, + batadv_bla_backbone_table_open); #endif static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); @@ -316,6 +327,7 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_transtable_global, #ifdef CONFIG_BATMAN_ADV_BLA &batadv_debuginfo_bla_claim_table, + &batadv_debuginfo_bla_backbone_table, #endif &batadv_debuginfo_transtable_local, &batadv_debuginfo_vis_data, diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index b421cc49d2cd..15d67abc10a4 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -48,7 +48,7 @@ batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) struct batadv_gw_node *gw_node; rcu_read_lock(); - gw_node = rcu_dereference(bat_priv->curr_gw); + gw_node = rcu_dereference(bat_priv->gw.curr_gw); if (!gw_node) goto out; @@ -91,23 +91,23 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, { struct batadv_gw_node *curr_gw_node; - spin_lock_bh(&bat_priv->gw_list_lock); + spin_lock_bh(&bat_priv->gw.list_lock); if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) new_gw_node = NULL; - curr_gw_node = rcu_dereference_protected(bat_priv->curr_gw, 1); - rcu_assign_pointer(bat_priv->curr_gw, new_gw_node); + curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); + rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node); if (curr_gw_node) batadv_gw_node_free_ref(curr_gw_node); - spin_unlock_bh(&bat_priv->gw_list_lock); + spin_unlock_bh(&bat_priv->gw.list_lock); } void batadv_gw_deselect(struct batadv_priv *bat_priv) { - atomic_set(&bat_priv->gw_reselect, 1); + atomic_set(&bat_priv->gw.reselect, 1); } static struct batadv_gw_node * @@ -117,12 +117,17 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; + uint32_t gw_divisor; uint8_t max_tq = 0; int down, up; + uint8_t tq_avg; struct batadv_orig_node *orig_node; + gw_divisor = BATADV_TQ_LOCAL_WINDOW_SIZE * BATADV_TQ_LOCAL_WINDOW_SIZE; + gw_divisor *= 64; + rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; @@ -134,19 +139,19 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (!atomic_inc_not_zero(&gw_node->refcount)) goto next; + tq_avg = router->tq_avg; + switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ batadv_gw_bandwidth_to_kbit(orig_node->gw_flags, &down, &up); - tmp_gw_factor = (router->tq_avg * router->tq_avg * - down * 100 * 100) / - (BATADV_TQ_LOCAL_WINDOW_SIZE * - BATADV_TQ_LOCAL_WINDOW_SIZE * 64); + tmp_gw_factor = tq_avg * tq_avg * down * 100 * 100; + tmp_gw_factor /= gw_divisor; if ((tmp_gw_factor > max_gw_factor) || ((tmp_gw_factor == max_gw_factor) && - (router->tq_avg > max_tq))) { + (tq_avg > max_tq))) { if (curr_gw) batadv_gw_node_free_ref(curr_gw); curr_gw = gw_node; @@ -161,7 +166,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) * soon as a better gateway appears which has * $routing_class more tq points) */ - if (router->tq_avg > max_tq) { + if (tq_avg > max_tq) { if (curr_gw) batadv_gw_node_free_ref(curr_gw); curr_gw = gw_node; @@ -170,8 +175,8 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) break; } - if (router->tq_avg > max_tq) - max_tq = router->tq_avg; + if (tq_avg > max_tq) + max_tq = tq_avg; if (tmp_gw_factor > max_gw_factor) max_gw_factor = tmp_gw_factor; @@ -200,11 +205,11 @@ void batadv_gw_election(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) goto out; - if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect)) - goto out; - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw) + goto out; + next_gw = batadv_gw_get_best_gw_node(bat_priv); if (curr_gw == next_gw) @@ -321,9 +326,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, gw_node->orig_node = orig_node; atomic_set(&gw_node->refcount, 1); - spin_lock_bh(&bat_priv->gw_list_lock); - hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); - spin_unlock_bh(&bat_priv->gw_list_lock); + spin_lock_bh(&bat_priv->gw.list_lock); + hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); + spin_unlock_bh(&bat_priv->gw.list_lock); batadv_gw_bandwidth_to_kbit(new_gwflags, &down, &up); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -350,7 +355,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, curr_gw = batadv_gw_get_selected_gw_node(bat_priv); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { if (gw_node->orig_node != orig_node) continue; @@ -404,10 +409,10 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - spin_lock_bh(&bat_priv->gw_list_lock); + spin_lock_bh(&bat_priv->gw.list_lock); hlist_for_each_entry_safe(gw_node, node, node_tmp, - &bat_priv->gw_list, list) { + &bat_priv->gw.list, list) { if (((!gw_node->deleted) || (time_before(jiffies, gw_node->deleted + timeout))) && atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) @@ -420,7 +425,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) batadv_gw_node_free_ref(gw_node); } - spin_unlock_bh(&bat_priv->gw_list_lock); + spin_unlock_bh(&bat_priv->gw.list_lock); /* gw_deselect() needs to acquire the gw_list_lock */ if (do_deselect) @@ -496,7 +501,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) primary_if->net_dev->dev_addr, net_dev->name); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { + hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 282bf6e9353e..d112fd6750b0 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -103,13 +103,14 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, { struct batadv_vis_packet *vis_packet; struct batadv_hard_iface *primary_if; + struct sk_buff *skb; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; - vis_packet = (struct batadv_vis_packet *) - bat_priv->my_vis_info->skb_packet->data; + skb = bat_priv->vis.my_info->skb_packet; + vis_packet = (struct batadv_vis_packet *)skb->data; memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN); memcpy(vis_packet->sender_orig, primary_if->net_dev->dev_addr, ETH_ALEN); @@ -313,7 +314,13 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; hard_iface->if_status = BATADV_IF_INACTIVE; - batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces); + ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces); + if (ret < 0) { + bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); + bat_priv->num_ifaces--; + hard_iface->if_status = BATADV_IF_NOT_IN_USE; + goto err_dev; + } hard_iface->batman_adv_ptype.type = ethertype; hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 13c88b25ab31..b4aa470bc4a6 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -58,9 +58,6 @@ static int __init batadv_init(void) batadv_iv_init(); - /* the name should not be longer than 10 chars - see - * http://lwn.net/Articles/23634/ - */ batadv_event_workqueue = create_singlethread_workqueue("bat_events"); if (!batadv_event_workqueue) @@ -97,20 +94,20 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); - spin_lock_init(&bat_priv->tt_changes_list_lock); - spin_lock_init(&bat_priv->tt_req_list_lock); - spin_lock_init(&bat_priv->tt_roam_list_lock); - spin_lock_init(&bat_priv->tt_buff_lock); - spin_lock_init(&bat_priv->gw_list_lock); - spin_lock_init(&bat_priv->vis_hash_lock); - spin_lock_init(&bat_priv->vis_list_lock); + spin_lock_init(&bat_priv->tt.changes_list_lock); + spin_lock_init(&bat_priv->tt.req_list_lock); + spin_lock_init(&bat_priv->tt.roam_list_lock); + spin_lock_init(&bat_priv->tt.last_changeset_lock); + spin_lock_init(&bat_priv->gw.list_lock); + spin_lock_init(&bat_priv->vis.hash_lock); + spin_lock_init(&bat_priv->vis.list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); - INIT_HLIST_HEAD(&bat_priv->gw_list); - INIT_LIST_HEAD(&bat_priv->tt_changes_list); - INIT_LIST_HEAD(&bat_priv->tt_req_list); - INIT_LIST_HEAD(&bat_priv->tt_roam_list); + INIT_HLIST_HEAD(&bat_priv->gw.list); + INIT_LIST_HEAD(&bat_priv->tt.changes_list); + INIT_LIST_HEAD(&bat_priv->tt.req_list); + INIT_LIST_HEAD(&bat_priv->tt.roam_list); ret = batadv_originator_init(bat_priv); if (ret < 0) @@ -131,7 +128,7 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - atomic_set(&bat_priv->gw_reselect, 0); + atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); return 0; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 5d8fa0757947..d57b746219de 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2012.3.0" +#define BATADV_SOURCE_VERSION "2012.4.0" #endif /* B.A.T.M.A.N. parameters */ @@ -41,13 +41,14 @@ * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE */ #define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */ -#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in miliseconds */ -#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in miliseconds */ +#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ +#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ +#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ #define BATADV_TQ_LOCAL_WINDOW_SIZE 64 -/* miliseconds we have to keep pending tt_req */ +/* milliseconds we have to keep pending tt_req */ #define BATADV_TT_REQUEST_TIMEOUT 3000 #define BATADV_TQ_GLOBAL_WINDOW_SIZE 5 @@ -59,7 +60,7 @@ #define BATADV_TT_OGM_APPEND_MAX 3 /* Time in which a client can roam at most ROAMING_MAX_COUNT times in - * miliseconds + * milliseconds */ #define BATADV_ROAMING_MAX_TIME 20000 #define BATADV_ROAMING_MAX_COUNT 5 @@ -123,15 +124,6 @@ enum batadv_uev_type { /* Append 'batman-adv: ' before kernel messages */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -/* all messages related to routing / flooding / broadcasting / etc */ -enum batadv_dbg_level { - BATADV_DBG_BATMAN = 1 << 0, - BATADV_DBG_ROUTES = 1 << 1, /* route added / changed / deleted */ - BATADV_DBG_TT = 1 << 2, /* translation table operations */ - BATADV_DBG_BLA = 1 << 3, /* bridge loop avoidance */ - BATADV_DBG_ALL = 15, -}; - /* Kernel headers */ #include <linux/mutex.h> /* mutex */ @@ -173,6 +165,15 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); +/* all messages related to routing / flooding / broadcasting / etc */ +enum batadv_dbg_level { + BATADV_DBG_BATMAN = BIT(0), + BATADV_DBG_ROUTES = BIT(1), /* route added / changed / deleted */ + BATADV_DBG_TT = BIT(2), /* translation table operations */ + BATADV_DBG_BLA = BIT(3), /* bridge loop avoidance */ + BATADV_DBG_ALL = 15, +}; + #ifdef CONFIG_BATMAN_ADV_DEBUG int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) __printf(2, 3); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 8d3e55a96adc..2d23a14c220e 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -37,10 +37,10 @@ enum batadv_packettype { #define BATADV_COMPAT_VERSION 14 enum batadv_iv_flags { - BATADV_NOT_BEST_NEXT_HOP = 1 << 3, - BATADV_PRIMARIES_FIRST_HOP = 1 << 4, - BATADV_VIS_SERVER = 1 << 5, - BATADV_DIRECTLINK = 1 << 6, + BATADV_NOT_BEST_NEXT_HOP = BIT(3), + BATADV_PRIMARIES_FIRST_HOP = BIT(4), + BATADV_VIS_SERVER = BIT(5), + BATADV_DIRECTLINK = BIT(6), }; /* ICMP message types */ @@ -60,8 +60,8 @@ enum batadv_vis_packettype { /* fragmentation defines */ enum batadv_unicast_frag_flags { - BATADV_UNI_FRAG_HEAD = 1 << 0, - BATADV_UNI_FRAG_LARGETAIL = 1 << 1, + BATADV_UNI_FRAG_HEAD = BIT(0), + BATADV_UNI_FRAG_LARGETAIL = BIT(1), }; /* TT_QUERY subtypes */ @@ -74,26 +74,27 @@ enum batadv_tt_query_packettype { /* TT_QUERY flags */ enum batadv_tt_query_flags { - BATADV_TT_FULL_TABLE = 1 << 2, + BATADV_TT_FULL_TABLE = BIT(2), }; /* BATADV_TT_CLIENT flags. - * Flags from 1 to 1 << 7 are sent on the wire, while flags from 1 << 8 to - * 1 << 15 are used for local computation only + * Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to + * BIT(15) are used for local computation only */ enum batadv_tt_client_flags { - BATADV_TT_CLIENT_DEL = 1 << 0, - BATADV_TT_CLIENT_ROAM = 1 << 1, - BATADV_TT_CLIENT_WIFI = 1 << 2, - BATADV_TT_CLIENT_NOPURGE = 1 << 8, - BATADV_TT_CLIENT_NEW = 1 << 9, - BATADV_TT_CLIENT_PENDING = 1 << 10, + BATADV_TT_CLIENT_DEL = BIT(0), + BATADV_TT_CLIENT_ROAM = BIT(1), + BATADV_TT_CLIENT_WIFI = BIT(2), + BATADV_TT_CLIENT_TEMP = BIT(3), + BATADV_TT_CLIENT_NOPURGE = BIT(8), + BATADV_TT_CLIENT_NEW = BIT(9), + BATADV_TT_CLIENT_PENDING = BIT(10), }; /* claim frame types for the bridge loop avoidance */ enum batadv_bla_claimframe { - BATADV_CLAIM_TYPE_ADD = 0x00, - BATADV_CLAIM_TYPE_DEL = 0x01, + BATADV_CLAIM_TYPE_CLAIM = 0x00, + BATADV_CLAIM_TYPE_UNCLAIM = 0x01, BATADV_CLAIM_TYPE_ANNOUNCE = 0x02, BATADV_CLAIM_TYPE_REQUEST = 0x03, }; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index bc2b88bbea1f..939fc01371df 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -579,32 +579,45 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } -int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) +static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_tt_query_packet *tt_query; - uint16_t tt_size; struct ethhdr *ethhdr; - char tt_flag; - size_t packet_size; /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, - sizeof(struct batadv_tt_query_packet)))) - goto out; - - /* I could need to modify it */ - if (skb_cow(skb, sizeof(struct batadv_tt_query_packet)) < 0) - goto out; + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return -1; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; + return -1; /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) + return -1; + + /* not for me */ + if (!batadv_is_my_mac(ethhdr->h_dest)) + return -1; + + return 0; +} + +int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_tt_query_packet *tt_query; + uint16_t tt_size; + int hdr_size = sizeof(*tt_query); + char tt_flag; + size_t packet_size; + + if (batadv_check_unicast_packet(skb, hdr_size) < 0) + return NET_RX_DROP; + + /* I could need to modify it */ + if (skb_cow(skb, sizeof(struct batadv_tt_query_packet)) < 0) goto out; tt_query = (struct batadv_tt_query_packet *)skb->data; @@ -721,7 +734,7 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) * been incremented yet. This flag will make me check all the incoming * packets for the correct destination. */ - bat_priv->tt_poss_change = true; + bat_priv->tt.poss_change = true; batadv_orig_node_free_ref(orig_node); out: @@ -819,31 +832,6 @@ err: return NULL; } -static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) -{ - struct ethhdr *ethhdr; - - /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, hdr_size))) - return -1; - - ethhdr = (struct ethhdr *)skb_mac_header(skb); - - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) - return -1; - - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) - return -1; - - /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) - return -1; - - return 0; -} - static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -947,8 +935,8 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, unicast_packet = (struct batadv_unicast_packet *)skb->data; if (batadv_is_my_mac(unicast_packet->dest)) { - tt_poss_change = bat_priv->tt_poss_change; - curr_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + tt_poss_change = bat_priv->tt.poss_change; + curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); } else { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); @@ -993,8 +981,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, } else { memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); - curr_ttvn = (uint8_t) - atomic_read(&orig_node->last_ttvn); + curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); batadv_orig_node_free_ref(orig_node); } @@ -1025,8 +1012,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, /* packet for me */ if (batadv_is_my_mac(unicast_packet->dest)) { - batadv_interface_rx(recv_if->soft_iface, skb, recv_if, - hdr_size); + batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, + NULL); + return NET_RX_SUCCESS; } @@ -1063,7 +1051,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, return NET_RX_SUCCESS; batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if, - sizeof(struct batadv_unicast_packet)); + sizeof(struct batadv_unicast_packet), NULL); return NET_RX_SUCCESS; } @@ -1150,7 +1138,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto out; /* broadcast for me */ - batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); + batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, + orig_node); ret = NET_RX_SUCCESS; goto out; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 3b4b2daa3b3e..570a8bce0364 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -190,13 +190,13 @@ out: static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { struct batadv_hard_iface *hard_iface; - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; struct sk_buff *skb1; struct net_device *soft_iface; struct batadv_priv *bat_priv; + delayed_work = container_of(work, struct delayed_work, work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); soft_iface = forw_packet->if_incoming->soft_iface; @@ -239,11 +239,11 @@ out: void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; struct batadv_priv *bat_priv; + delayed_work = container_of(work, struct delayed_work, work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 109ea2aae96c..b9a28d2dd3e8 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -93,25 +93,35 @@ static int batadv_interface_release(struct net_device *dev) static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); - return &bat_priv->stats; + struct net_device_stats *stats = &bat_priv->stats; + + stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); + stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); + stats->tx_dropped = batadv_sum_counter(bat_priv, BATADV_CNT_TX_DROPPED); + stats->rx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_RX); + stats->rx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_RX_BYTES); + return stats; } static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct sockaddr *addr = p; + uint8_t old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + memcpy(old_addr, dev->dev_addr, ETH_ALEN); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { - batadv_tt_local_remove(bat_priv, dev->dev_addr, + batadv_tt_local_remove(bat_priv, old_addr, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); } - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); dev->addr_assign_type &= ~NET_ADDR_RANDOM; return 0; } @@ -142,6 +152,7 @@ static int batadv_interface_tx(struct sk_buff *skb, int data_len = skb->len, ret; short vid __maybe_unused = -1; bool do_bcast = false; + uint32_t seqno; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -223,8 +234,8 @@ static int batadv_interface_tx(struct sk_buff *skb, primary_if->net_dev->dev_addr, ETH_ALEN); /* set broadcast sequence number */ - bcast_packet->seqno = - htonl(atomic_inc_return(&bat_priv->bcast_seqno)); + seqno = atomic_inc_return(&bat_priv->bcast_seqno); + bcast_packet->seqno = htonl(seqno); batadv_add_bcast_packet_to_list(bat_priv, skb, 1); @@ -246,14 +257,14 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped_freed; } - bat_priv->stats.tx_packets++; - bat_priv->stats.tx_bytes += data_len; + batadv_inc_counter(bat_priv, BATADV_CNT_TX); + batadv_add_counter(bat_priv, BATADV_CNT_TX_BYTES, data_len); goto end; dropped: kfree_skb(skb); dropped_freed: - bat_priv->stats.tx_dropped++; + batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: if (primary_if) batadv_hardif_free_ref(primary_if); @@ -262,7 +273,7 @@ end: void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, - int hdr_size) + int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct ethhdr *ethhdr; @@ -308,11 +319,16 @@ void batadv_interface_rx(struct net_device *soft_iface, /* skb->ip_summed = CHECKSUM_UNNECESSARY; */ - bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; + batadv_inc_counter(bat_priv, BATADV_CNT_RX); + batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, + skb->len + ETH_HLEN); soft_iface->last_rx = jiffies; + if (orig_node) + batadv_tt_add_temporary_global_entry(bat_priv, orig_node, + ethhdr->h_source); + if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest)) goto dropped; @@ -379,15 +395,22 @@ struct net_device *batadv_softif_create(const char *name) if (!soft_iface) goto out; + bat_priv = netdev_priv(soft_iface); + + /* batadv_interface_stats() needs to be available as soon as + * register_netdevice() has been called + */ + bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); + if (!bat_priv->bat_counters) + goto free_soft_iface; + ret = register_netdevice(soft_iface); if (ret < 0) { pr_err("Unable to register the batman interface '%s': %i\n", name, ret); - goto free_soft_iface; + goto free_bat_counters; } - bat_priv = netdev_priv(soft_iface); - atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); atomic_set(&bat_priv->bridge_loop_avoidance, 0); @@ -405,29 +428,26 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); - atomic_set(&bat_priv->ttvn, 0); - atomic_set(&bat_priv->tt_local_changes, 0); - atomic_set(&bat_priv->tt_ogm_append_cnt, 0); - atomic_set(&bat_priv->bla_num_requests, 0); - - bat_priv->tt_buff = NULL; - bat_priv->tt_buff_len = 0; - bat_priv->tt_poss_change = false; + atomic_set(&bat_priv->tt.vn, 0); + atomic_set(&bat_priv->tt.local_changes, 0); + atomic_set(&bat_priv->tt.ogm_append_cnt, 0); +#ifdef CONFIG_BATMAN_ADV_BLA + atomic_set(&bat_priv->bla.num_requests, 0); +#endif + bat_priv->tt.last_changeset = NULL; + bat_priv->tt.last_changeset_len = 0; + bat_priv->tt.poss_change = false; bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; - bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); - if (!bat_priv->bat_counters) - goto unreg_soft_iface; - ret = batadv_algo_select(bat_priv, batadv_routing_algo); if (ret < 0) - goto free_bat_counters; + goto unreg_soft_iface; ret = batadv_sysfs_add_meshif(soft_iface); if (ret < 0) - goto free_bat_counters; + goto unreg_soft_iface; ret = batadv_debugfs_add_meshif(soft_iface); if (ret < 0) @@ -443,12 +463,13 @@ unreg_debugfs: batadv_debugfs_del_meshif(soft_iface); unreg_sysfs: batadv_sysfs_del_meshif(soft_iface); -free_bat_counters: - free_percpu(bat_priv->bat_counters); unreg_soft_iface: + free_percpu(bat_priv->bat_counters); unregister_netdevice(soft_iface); return NULL; +free_bat_counters: + free_percpu(bat_priv->bat_counters); free_soft_iface: free_netdev(soft_iface); out: @@ -518,6 +539,11 @@ static u32 batadv_get_link(struct net_device *dev) static const struct { const char name[ETH_GSTRING_LEN]; } batadv_counters_strings[] = { + { "tx" }, + { "tx_bytes" }, + { "tx_dropped" }, + { "rx" }, + { "rx_bytes" }, { "forward" }, { "forward_bytes" }, { "mgmt_tx" }, diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 852c683b06a1..07a08fed28b9 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -21,8 +21,9 @@ #define _NET_BATMAN_ADV_SOFT_INTERFACE_H_ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); -void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, - struct batadv_hard_iface *recv_if, int hdr_size); +void batadv_interface_rx(struct net_device *soft_iface, + struct sk_buff *skb, struct batadv_hard_iface *recv_if, + int hdr_size, struct batadv_orig_node *orig_node); struct net_device *batadv_softif_create(const char *name); void batadv_softif_destroy(struct net_device *soft_iface); int batadv_softif_is_valid(const struct net_device *net_dev); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a438f4b582fc..112edd371b2f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -34,6 +34,10 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, static void batadv_tt_purge(struct work_struct *work); static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry); +static void batadv_tt_global_del(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *addr, + const char *message, bool roaming); /* returns 1 if they are the same mac addr */ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) @@ -46,8 +50,8 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) static void batadv_tt_start_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->tt_work, batadv_tt_purge); - queue_delayed_work(batadv_event_workqueue, &bat_priv->tt_work, + INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); + queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, msecs_to_jiffies(5000)); } @@ -88,7 +92,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const void *data) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local_entry = NULL; - tt_common_entry = batadv_tt_hash_find(bat_priv->tt_local_hash, data); + tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, data); if (tt_common_entry) tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -102,7 +106,7 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global_entry = NULL; - tt_common_entry = batadv_tt_hash_find(bat_priv->tt_global_hash, data); + tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, data); if (tt_common_entry) tt_global_entry = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -152,6 +156,8 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) static void batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) { + if (!atomic_dec_and_test(&orig_entry->refcount)) + return; /* to avoid race conditions, immediately decrease the tt counter */ atomic_dec(&orig_entry->orig_node->tt_size); call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); @@ -175,8 +181,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, del_op_requested = flags & BATADV_TT_CLIENT_DEL; /* check for ADD+DEL or DEL+ADD events */ - spin_lock_bh(&bat_priv->tt_changes_list_lock); - list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + spin_lock_bh(&bat_priv->tt.changes_list_lock); + list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (!batadv_compare_eth(entry->change.addr, addr)) continue; @@ -197,20 +203,21 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, del: list_del(&entry->list); kfree(entry); + kfree(tt_change_node); event_removed = true; goto unlock; } /* track the change in the OGMinterval list */ - list_add_tail(&tt_change_node->list, &bat_priv->tt_changes_list); + list_add_tail(&tt_change_node->list, &bat_priv->tt.changes_list); unlock: - spin_unlock_bh(&bat_priv->tt_changes_list_lock); + spin_unlock_bh(&bat_priv->tt.changes_list_lock); if (event_removed) - atomic_dec(&bat_priv->tt_local_changes); + atomic_dec(&bat_priv->tt.local_changes); else - atomic_inc(&bat_priv->tt_local_changes); + atomic_inc(&bat_priv->tt.local_changes); } int batadv_tt_len(int changes_num) @@ -220,12 +227,12 @@ int batadv_tt_len(int changes_num) static int batadv_tt_local_init(struct batadv_priv *bat_priv) { - if (bat_priv->tt_local_hash) + if (bat_priv->tt.local_hash) return 0; - bat_priv->tt_local_hash = batadv_hash_new(1024); + bat_priv->tt.local_hash = batadv_hash_new(1024); - if (!bat_priv->tt_local_hash) + if (!bat_priv->tt.local_hash) return -ENOMEM; return 0; @@ -257,7 +264,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (ttvn: %d)\n", addr, - (uint8_t)atomic_read(&bat_priv->ttvn)); + (uint8_t)atomic_read(&bat_priv->tt.vn)); memcpy(tt_local_entry->common.addr, addr, ETH_ALEN); tt_local_entry->common.flags = BATADV_NO_FLAGS; @@ -265,6 +272,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, tt_local_entry->common.flags |= BATADV_TT_CLIENT_WIFI; atomic_set(&tt_local_entry->common.refcount, 2); tt_local_entry->last_seen = jiffies; + tt_local_entry->common.added_at = tt_local_entry->last_seen; /* the batman interface mac address should never be purged */ if (batadv_compare_eth(addr, soft_iface->dev_addr)) @@ -276,7 +284,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, */ tt_local_entry->common.flags |= BATADV_TT_CLIENT_NEW; - hash_added = batadv_hash_add(bat_priv->tt_local_hash, batadv_compare_tt, + hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_orig, &tt_local_entry->common, &tt_local_entry->common.hash_entry); @@ -347,7 +355,7 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv, primary_if = batadv_primary_if_get_selected(bat_priv); req_len = min_packet_len; - req_len += batadv_tt_len(atomic_read(&bat_priv->tt_local_changes)); + req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented @@ -380,10 +388,10 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, if (new_len > 0) tot_changes = new_len / batadv_tt_len(1); - spin_lock_bh(&bat_priv->tt_changes_list_lock); - atomic_set(&bat_priv->tt_local_changes, 0); + spin_lock_bh(&bat_priv->tt.changes_list_lock); + atomic_set(&bat_priv->tt.local_changes, 0); - list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (count < tot_changes) { memcpy(tt_buff + batadv_tt_len(count), @@ -393,25 +401,25 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, list_del(&entry->list); kfree(entry); } - spin_unlock_bh(&bat_priv->tt_changes_list_lock); + spin_unlock_bh(&bat_priv->tt.changes_list_lock); /* Keep the buffer for possible tt_request */ - spin_lock_bh(&bat_priv->tt_buff_lock); - kfree(bat_priv->tt_buff); - bat_priv->tt_buff_len = 0; - bat_priv->tt_buff = NULL; + spin_lock_bh(&bat_priv->tt.last_changeset_lock); + kfree(bat_priv->tt.last_changeset); + bat_priv->tt.last_changeset_len = 0; + bat_priv->tt.last_changeset = NULL; /* check whether this new OGM has no changes due to size problems */ if (new_len > 0) { /* if kmalloc() fails we will reply with the full table * instead of providing the diff */ - bat_priv->tt_buff = kmalloc(new_len, GFP_ATOMIC); - if (bat_priv->tt_buff) { - memcpy(bat_priv->tt_buff, tt_buff, new_len); - bat_priv->tt_buff_len = new_len; + bat_priv->tt.last_changeset = kmalloc(new_len, GFP_ATOMIC); + if (bat_priv->tt.last_changeset) { + memcpy(bat_priv->tt.last_changeset, tt_buff, new_len); + bat_priv->tt.last_changeset_len = new_len; } } - spin_unlock_bh(&bat_priv->tt_buff_lock); + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); return count; } @@ -420,7 +428,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common_entry; struct batadv_hard_iface *primary_if; struct hlist_node *node; @@ -445,7 +453,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", - net_dev->name, (uint8_t)atomic_read(&bat_priv->ttvn)); + net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -543,7 +551,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, static void batadv_tt_local_purge(struct batadv_priv *bat_priv) { - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; @@ -569,10 +577,10 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) struct hlist_head *head; uint32_t i; - if (!bat_priv->tt_local_hash) + if (!bat_priv->tt.local_hash) return; - hash = bat_priv->tt_local_hash; + hash = bat_priv->tt.local_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -592,17 +600,17 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) batadv_hash_destroy(hash); - bat_priv->tt_local_hash = NULL; + bat_priv->tt.local_hash = NULL; } static int batadv_tt_global_init(struct batadv_priv *bat_priv) { - if (bat_priv->tt_global_hash) + if (bat_priv->tt.global_hash) return 0; - bat_priv->tt_global_hash = batadv_hash_new(1024); + bat_priv->tt.global_hash = batadv_hash_new(1024); - if (!bat_priv->tt_global_hash) + if (!bat_priv->tt.global_hash) return -ENOMEM; return 0; @@ -612,62 +620,99 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_change_node *entry, *safe; - spin_lock_bh(&bat_priv->tt_changes_list_lock); + spin_lock_bh(&bat_priv->tt.changes_list_lock); - list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { list_del(&entry->list); kfree(entry); } - atomic_set(&bat_priv->tt_local_changes, 0); - spin_unlock_bh(&bat_priv->tt_changes_list_lock); + atomic_set(&bat_priv->tt.local_changes, 0); + spin_unlock_bh(&bat_priv->tt.changes_list_lock); } -/* find out if an orig_node is already in the list of a tt_global_entry. - * returns 1 if found, 0 otherwise +/* retrieves the orig_tt_list_entry belonging to orig_node from the + * batadv_tt_global_entry list + * + * returns it with an increased refcounter, NULL if not found */ -static bool -batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) +static struct batadv_tt_orig_list_entry * +batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, + const struct batadv_orig_node *orig_node) { - struct batadv_tt_orig_list_entry *tmp_orig_entry; + struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL; const struct hlist_head *head; struct hlist_node *node; - bool found = false; rcu_read_lock(); head = &entry->orig_list; hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) { - if (tmp_orig_entry->orig_node == orig_node) { - found = true; - break; - } + if (tmp_orig_entry->orig_node != orig_node) + continue; + if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) + continue; + + orig_entry = tmp_orig_entry; + break; } rcu_read_unlock(); + + return orig_entry; +} + +/* find out if an orig_node is already in the list of a tt_global_entry. + * returns true if found, false otherwise + */ +static bool +batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, + const struct batadv_orig_node *orig_node) +{ + struct batadv_tt_orig_list_entry *orig_entry; + bool found = false; + + orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); + if (orig_entry) { + found = true; + batadv_tt_orig_list_entry_free_ref(orig_entry); + } + return found; } static void -batadv_tt_global_add_orig_entry(struct batadv_tt_global_entry *tt_global_entry, +batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, struct batadv_orig_node *orig_node, int ttvn) { struct batadv_tt_orig_list_entry *orig_entry; + orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node); + if (orig_entry) { + /* refresh the ttvn: the current value could be a bogus one that + * was added during a "temporary client detection" + */ + orig_entry->ttvn = ttvn; + goto out; + } + orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC); if (!orig_entry) - return; + goto out; INIT_HLIST_NODE(&orig_entry->list); atomic_inc(&orig_node->refcount); atomic_inc(&orig_node->tt_size); orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; + atomic_set(&orig_entry->refcount, 2); - spin_lock_bh(&tt_global_entry->list_lock); + spin_lock_bh(&tt_global->list_lock); hlist_add_head_rcu(&orig_entry->list, - &tt_global_entry->orig_list); - spin_unlock_bh(&tt_global_entry->list_lock); + &tt_global->orig_list); + spin_unlock_bh(&tt_global->list_lock); +out: + if (orig_entry) + batadv_tt_orig_list_entry_free_ref(orig_entry); } /* caller must hold orig_node refcount */ @@ -694,11 +739,12 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, common->flags = flags; tt_global_entry->roam_at = 0; atomic_set(&common->refcount, 2); + common->added_at = jiffies; INIT_HLIST_HEAD(&tt_global_entry->orig_list); spin_lock_init(&tt_global_entry->list_lock); - hash_added = batadv_hash_add(bat_priv->tt_global_hash, + hash_added = batadv_hash_add(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_orig, common, &common->hash_entry); @@ -708,11 +754,20 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, batadv_tt_global_entry_free_ref(tt_global_entry); goto out_remove; } - - batadv_tt_global_add_orig_entry(tt_global_entry, orig_node, - ttvn); } else { - /* there is already a global entry, use this one. */ + /* If there is already a global entry, we can use this one for + * our processing. + * But if we are trying to add a temporary client we can exit + * directly because the temporary information should never + * override any already known client state (whatever it is) + */ + if (flags & BATADV_TT_CLIENT_TEMP) + goto out; + + /* if the client was temporary added before receiving the first + * OGM announcing it, we have to clear the TEMP flag + */ + tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_TEMP; /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a @@ -726,12 +781,9 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM; tt_global_entry->roam_at = 0; } - - if (!batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) - batadv_tt_global_add_orig_entry(tt_global_entry, - orig_node, ttvn); } + /* add the new orig_entry (if needed) or update it */ + batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn); batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", @@ -770,11 +822,12 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, hlist_for_each_entry_rcu(orig_entry, node, head, list) { flags = tt_common_entry->flags; last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn); - seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c]\n", + seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c%c]\n", tt_global_entry->common.addr, orig_entry->ttvn, orig_entry->orig_node->orig, last_ttvn, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), - (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.')); + (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); } } @@ -782,7 +835,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; struct batadv_hard_iface *primary_if; @@ -883,7 +936,7 @@ batadv_tt_global_del_struct(struct batadv_priv *bat_priv, "Deleting global tt entry %pM: %s\n", tt_global_entry->common.addr, message); - batadv_hash_remove(bat_priv->tt_global_hash, batadv_compare_tt, + batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_orig, tt_global_entry->common.addr); batadv_tt_global_entry_free_ref(tt_global_entry); @@ -994,7 +1047,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global; struct batadv_tt_common_entry *tt_common_entry; uint32_t i; - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_node *node, *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -1029,49 +1082,63 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, orig_node->tt_initialised = false; } -static void batadv_tt_global_roam_purge_list(struct batadv_priv *bat_priv, - struct hlist_head *head) +static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global, + char **msg) { - struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tt_global_entry *tt_global_entry; - struct hlist_node *node, *node_tmp; - - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, - hash_entry) { - tt_global_entry = container_of(tt_common_entry, - struct batadv_tt_global_entry, - common); - if (!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM)) - continue; - if (!batadv_has_timed_out(tt_global_entry->roam_at, - BATADV_TT_CLIENT_ROAM_TIMEOUT)) - continue; + bool purge = false; + unsigned long roam_timeout = BATADV_TT_CLIENT_ROAM_TIMEOUT; + unsigned long temp_timeout = BATADV_TT_CLIENT_TEMP_TIMEOUT; - batadv_dbg(BATADV_DBG_TT, bat_priv, - "Deleting global tt entry (%pM): Roaming timeout\n", - tt_global_entry->common.addr); + if ((tt_global->common.flags & BATADV_TT_CLIENT_ROAM) && + batadv_has_timed_out(tt_global->roam_at, roam_timeout)) { + purge = true; + *msg = "Roaming timeout\n"; + } - hlist_del_rcu(node); - batadv_tt_global_entry_free_ref(tt_global_entry); + if ((tt_global->common.flags & BATADV_TT_CLIENT_TEMP) && + batadv_has_timed_out(tt_global->common.added_at, temp_timeout)) { + purge = true; + *msg = "Temporary client timeout\n"; } + + return purge; } -static void batadv_tt_global_roam_purge(struct batadv_priv *bat_priv) +static void batadv_tt_global_purge(struct batadv_priv *bat_priv) { - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_head *head; + struct hlist_node *node, *node_tmp; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; + char *msg = NULL; + struct batadv_tt_common_entry *tt_common; + struct batadv_tt_global_entry *tt_global; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - batadv_tt_global_roam_purge_list(bat_priv, head); + hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hash_entry) { + tt_global = container_of(tt_common, + struct batadv_tt_global_entry, + common); + + if (!batadv_tt_global_to_purge(tt_global, &msg)) + continue; + + batadv_dbg(BATADV_DBG_TT, bat_priv, + "Deleting global tt entry (%pM): %s\n", + tt_global->common.addr, msg); + + hlist_del_rcu(node); + + batadv_tt_global_entry_free_ref(tt_global); + } spin_unlock_bh(list_lock); } - } static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) @@ -1084,10 +1151,10 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) struct hlist_head *head; uint32_t i; - if (!bat_priv->tt_global_hash) + if (!bat_priv->tt.global_hash) return; - hash = bat_priv->tt_global_hash; + hash = bat_priv->tt.global_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1107,7 +1174,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) batadv_hash_destroy(hash); - bat_priv->tt_global_hash = NULL; + bat_priv->tt.global_hash = NULL; } static bool @@ -1186,7 +1253,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { uint16_t total = 0, total_one; - struct batadv_hashtable *hash = bat_priv->tt_global_hash; + struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_node *node; @@ -1209,6 +1276,12 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, */ if (tt_common->flags & BATADV_TT_CLIENT_ROAM) continue; + /* Temporary clients have not been announced yet, so + * they have to be skipped while computing the global + * crc + */ + if (tt_common->flags & BATADV_TT_CLIENT_TEMP) + continue; /* find out if this global entry is announced by this * originator @@ -1233,7 +1306,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) { uint16_t total = 0, total_one; - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct hlist_node *node; struct hlist_head *head; @@ -1266,14 +1339,14 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node, *safe; - spin_lock_bh(&bat_priv->tt_req_list_lock); + spin_lock_bh(&bat_priv->tt.req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); } static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, @@ -1303,15 +1376,15 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node, *safe; - spin_lock_bh(&bat_priv->tt_req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + spin_lock_bh(&bat_priv->tt.req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { list_del(&node->list); kfree(node); } } - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); } /* returns the pointer to the new tt_req_node struct if no request @@ -1323,8 +1396,8 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv, { struct batadv_tt_req_node *tt_req_node_tmp, *tt_req_node = NULL; - spin_lock_bh(&bat_priv->tt_req_list_lock); - list_for_each_entry(tt_req_node_tmp, &bat_priv->tt_req_list, list) { + spin_lock_bh(&bat_priv->tt.req_list_lock); + list_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) { if (batadv_compare_eth(tt_req_node_tmp, orig_node) && !batadv_has_timed_out(tt_req_node_tmp->issued_at, BATADV_TT_REQUEST_TIMEOUT)) @@ -1338,9 +1411,9 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv, memcpy(tt_req_node->addr, orig_node->orig, ETH_ALEN); tt_req_node->issued_at = jiffies; - list_add(&tt_req_node->list, &bat_priv->tt_req_list); + list_add(&tt_req_node->list, &bat_priv->tt.req_list); unlock: - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); return tt_req_node; } @@ -1362,7 +1435,8 @@ static int batadv_tt_global_valid(const void *entry_ptr, const struct batadv_tt_global_entry *tt_global_entry; const struct batadv_orig_node *orig_node = data_ptr; - if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM) + if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM || + tt_common_entry->flags & BATADV_TT_CLIENT_TEMP) return 0; tt_global_entry = container_of(tt_common_entry, @@ -1506,9 +1580,9 @@ out: if (ret) kfree_skb(skb); if (ret && tt_req_node) { - spin_lock_bh(&bat_priv->tt_req_list_lock); + spin_lock_bh(&bat_priv->tt.req_list_lock); list_del(&tt_req_node->list); - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); kfree(tt_req_node); } return ret; @@ -1529,6 +1603,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, uint16_t tt_len, tt_tot; struct sk_buff *skb = NULL; struct batadv_tt_query_packet *tt_response; + uint8_t *packet_pos; size_t len; batadv_dbg(BATADV_DBG_TT, bat_priv, @@ -1582,8 +1657,8 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, goto unlock; skb_reserve(skb, ETH_HLEN); - tt_response = (struct batadv_tt_query_packet *)skb_put(skb, - len); + packet_pos = skb_put(skb, len); + tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; tt_response->tt_data = htons(tt_tot); @@ -1599,7 +1674,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); skb = batadv_tt_response_fill_table(tt_len, ttvn, - bat_priv->tt_global_hash, + bat_priv->tt.global_hash, primary_if, batadv_tt_global_valid, req_dst_orig_node); @@ -1662,6 +1737,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, uint16_t tt_len, tt_tot; struct sk_buff *skb = NULL; struct batadv_tt_query_packet *tt_response; + uint8_t *packet_pos; size_t len; batadv_dbg(BATADV_DBG_TT, bat_priv, @@ -1670,7 +1746,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, (tt_request->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); - my_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); req_ttvn = tt_request->ttvn; orig_node = batadv_orig_hash_find(bat_priv, tt_request->src); @@ -1689,7 +1765,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, * is too big send the whole local translation table */ if (tt_request->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn || - !bat_priv->tt_buff) + !bat_priv->tt.last_changeset) full_table = true; else full_table = false; @@ -1698,8 +1774,8 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, * I'll send only one packet with as much TT entries as I can */ if (!full_table) { - spin_lock_bh(&bat_priv->tt_buff_lock); - tt_len = bat_priv->tt_buff_len; + spin_lock_bh(&bat_priv->tt.last_changeset_lock); + tt_len = bat_priv->tt.last_changeset_len; tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; @@ -1708,22 +1784,22 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, goto unlock; skb_reserve(skb, ETH_HLEN); - tt_response = (struct batadv_tt_query_packet *)skb_put(skb, - len); + packet_pos = skb_put(skb, len); + tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; tt_response->tt_data = htons(tt_tot); tt_buff = skb->data + sizeof(*tt_response); - memcpy(tt_buff, bat_priv->tt_buff, - bat_priv->tt_buff_len); - spin_unlock_bh(&bat_priv->tt_buff_lock); + memcpy(tt_buff, bat_priv->tt.last_changeset, + bat_priv->tt.last_changeset_len); + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); } else { - tt_len = (uint16_t)atomic_read(&bat_priv->num_local_tt); + tt_len = (uint16_t)atomic_read(&bat_priv->tt.local_entry_num); tt_len *= sizeof(struct batadv_tt_change); - ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); skb = batadv_tt_response_fill_table(tt_len, ttvn, - bat_priv->tt_local_hash, + bat_priv->tt.local_hash, primary_if, batadv_tt_local_valid_entry, NULL); @@ -1755,7 +1831,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, goto out; unlock: - spin_unlock_bh(&bat_priv->tt_buff_lock); + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); out: if (orig_node) batadv_orig_node_free_ref(orig_node); @@ -1908,14 +1984,14 @@ void batadv_handle_tt_response(struct batadv_priv *bat_priv, } /* Delete the tt_req_node from pending tt_requests list */ - spin_lock_bh(&bat_priv->tt_req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + spin_lock_bh(&bat_priv->tt.req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (!batadv_compare_eth(node->addr, tt_response->src)) continue; list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_req_list_lock); + spin_unlock_bh(&bat_priv->tt.req_list_lock); /* Recalculate the CRC for this orig_node and store it */ orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node); @@ -1949,22 +2025,22 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_roam_node *node, *safe; - spin_lock_bh(&bat_priv->tt_roam_list_lock); + spin_lock_bh(&bat_priv->tt.roam_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_roam_list_lock); + spin_unlock_bh(&bat_priv->tt.roam_list_lock); } static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) { struct batadv_tt_roam_node *node, *safe; - spin_lock_bh(&bat_priv->tt_roam_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + spin_lock_bh(&bat_priv->tt.roam_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { if (!batadv_has_timed_out(node->first_time, BATADV_ROAMING_MAX_TIME)) continue; @@ -1972,7 +2048,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) list_del(&node->list); kfree(node); } - spin_unlock_bh(&bat_priv->tt_roam_list_lock); + spin_unlock_bh(&bat_priv->tt.roam_list_lock); } /* This function checks whether the client already reached the @@ -1987,11 +2063,11 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, struct batadv_tt_roam_node *tt_roam_node; bool ret = false; - spin_lock_bh(&bat_priv->tt_roam_list_lock); + spin_lock_bh(&bat_priv->tt.roam_list_lock); /* The new tt_req will be issued only if I'm not waiting for a * reply from the same orig_node yet */ - list_for_each_entry(tt_roam_node, &bat_priv->tt_roam_list, list) { + list_for_each_entry(tt_roam_node, &bat_priv->tt.roam_list, list) { if (!batadv_compare_eth(tt_roam_node->addr, client)) continue; @@ -2016,12 +2092,12 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, BATADV_ROAMING_MAX_COUNT - 1); memcpy(tt_roam_node->addr, client, ETH_ALEN); - list_add(&tt_roam_node->list, &bat_priv->tt_roam_list); + list_add(&tt_roam_node->list, &bat_priv->tt.roam_list); ret = true; } unlock: - spin_unlock_bh(&bat_priv->tt_roam_list_lock); + spin_unlock_bh(&bat_priv->tt.roam_list_lock); return ret; } @@ -2085,13 +2161,15 @@ out: static void batadv_tt_purge(struct work_struct *work) { struct delayed_work *delayed_work; + struct batadv_priv_tt *priv_tt; struct batadv_priv *bat_priv; delayed_work = container_of(work, struct delayed_work, work); - bat_priv = container_of(delayed_work, struct batadv_priv, tt_work); + priv_tt = container_of(delayed_work, struct batadv_priv_tt, work); + bat_priv = container_of(priv_tt, struct batadv_priv, tt); batadv_tt_local_purge(bat_priv); - batadv_tt_global_roam_purge(bat_priv); + batadv_tt_global_purge(bat_priv); batadv_tt_req_purge(bat_priv); batadv_tt_roam_purge(bat_priv); @@ -2100,7 +2178,7 @@ static void batadv_tt_purge(struct work_struct *work) void batadv_tt_free(struct batadv_priv *bat_priv) { - cancel_delayed_work_sync(&bat_priv->tt_work); + cancel_delayed_work_sync(&bat_priv->tt.work); batadv_tt_local_table_free(bat_priv); batadv_tt_global_table_free(bat_priv); @@ -2108,7 +2186,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv) batadv_tt_changes_list_free(bat_priv); batadv_tt_roam_list_free(bat_priv); - kfree(bat_priv->tt_buff); + kfree(bat_priv->tt.last_changeset); } /* This function will enable or disable the specified flags for all the entries @@ -2152,7 +2230,7 @@ out: /* Purge out all the tt local entries marked with BATADV_TT_CLIENT_PENDING */ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) { - struct batadv_hashtable *hash = bat_priv->tt_local_hash; + struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; struct hlist_node *node, *node_tmp; @@ -2177,7 +2255,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) "Deleting local tt entry (%pM): pending\n", tt_common->addr); - atomic_dec(&bat_priv->num_local_tt); + atomic_dec(&bat_priv->tt.local_entry_num); hlist_del_rcu(node); tt_local = container_of(tt_common, struct batadv_tt_local_entry, @@ -2195,26 +2273,26 @@ static int batadv_tt_commit_changes(struct batadv_priv *bat_priv, { uint16_t changed_num = 0; - if (atomic_read(&bat_priv->tt_local_changes) < 1) + if (atomic_read(&bat_priv->tt.local_changes) < 1) return -ENOENT; - changed_num = batadv_tt_set_flags(bat_priv->tt_local_hash, + changed_num = batadv_tt_set_flags(bat_priv->tt.local_hash, BATADV_TT_CLIENT_NEW, false); /* all reset entries have to be counted as local entries */ - atomic_add(changed_num, &bat_priv->num_local_tt); + atomic_add(changed_num, &bat_priv->tt.local_entry_num); batadv_tt_local_purge_pending_clients(bat_priv); - bat_priv->tt_crc = batadv_tt_local_crc(bat_priv); + bat_priv->tt.local_crc = batadv_tt_local_crc(bat_priv); /* Increment the TTVN only once per OGM interval */ - atomic_inc(&bat_priv->ttvn); + atomic_inc(&bat_priv->tt.vn); batadv_dbg(BATADV_DBG_TT, bat_priv, "Local changes committed, updating to ttvn %u\n", - (uint8_t)atomic_read(&bat_priv->ttvn)); - bat_priv->tt_poss_change = false; + (uint8_t)atomic_read(&bat_priv->tt.vn)); + bat_priv->tt.poss_change = false; /* reset the sending counter */ - atomic_set(&bat_priv->tt_ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX); + atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX); return batadv_tt_changes_fill_buff(bat_priv, packet_buff, packet_buff_len, packet_min_len); @@ -2234,7 +2312,7 @@ int batadv_tt_append_diff(struct batadv_priv *bat_priv, /* if the changes have been sent often enough */ if ((tt_num_changes < 0) && - (!batadv_atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt))) { + (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))) { batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, packet_min_len, packet_min_len); tt_num_changes = 0; @@ -2365,3 +2443,22 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, out: return ret; } + +bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *addr) +{ + bool ret = false; + + if (!batadv_tt_global_add(bat_priv, orig_node, addr, + BATADV_TT_CLIENT_TEMP, + atomic_read(&orig_node->last_ttvn))) + goto out; + + batadv_dbg(BATADV_DBG_TT, bat_priv, + "Added temporary global client (addr: %pM orig: %pM)\n", + addr, orig_node->orig); + ret = true; +out: + return ret; +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index ffa87355096b..811fffd4760c 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -59,6 +59,8 @@ int batadv_tt_append_diff(struct batadv_priv *bat_priv, int packet_min_len); bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, uint8_t *addr); - +bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *addr); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 12635fd2c3d3..2ed82caacdca 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -145,6 +145,11 @@ struct batadv_bcast_duplist_entry { #endif enum batadv_counters { + BATADV_CNT_TX, + BATADV_CNT_TX_BYTES, + BATADV_CNT_TX_DROPPED, + BATADV_CNT_RX, + BATADV_CNT_RX_BYTES, BATADV_CNT_FORWARD, BATADV_CNT_FORWARD_BYTES, BATADV_CNT_MGMT_TX, @@ -160,6 +165,67 @@ enum batadv_counters { BATADV_CNT_NUM, }; +/** + * struct batadv_priv_tt - per mesh interface translation table data + * @vn: translation table version number + * @local_changes: changes registered in an originator interval + * @poss_change: Detect an ongoing roaming phase. If true, then this node + * received a roaming_adv and has to inspect every packet directed to it to + * check whether it still is the true destination or not. This flag will be + * reset to false as soon as the this node's ttvn is increased + * @changes_list: tracks tt local changes within an originator interval + * @req_list: list of pending tt_requests + * @local_crc: Checksum of the local table, recomputed before sending a new OGM + */ +struct batadv_priv_tt { + atomic_t vn; + atomic_t ogm_append_cnt; + atomic_t local_changes; + bool poss_change; + struct list_head changes_list; + struct batadv_hashtable *local_hash; + struct batadv_hashtable *global_hash; + struct list_head req_list; + struct list_head roam_list; + spinlock_t changes_list_lock; /* protects changes */ + spinlock_t req_list_lock; /* protects req_list */ + spinlock_t roam_list_lock; /* protects roam_list */ + atomic_t local_entry_num; + uint16_t local_crc; + unsigned char *last_changeset; + int16_t last_changeset_len; + spinlock_t last_changeset_lock; /* protects last_changeset */ + struct delayed_work work; +}; + +#ifdef CONFIG_BATMAN_ADV_BLA +struct batadv_priv_bla { + atomic_t num_requests; /* number of bla requests in flight */ + struct batadv_hashtable *claim_hash; + struct batadv_hashtable *backbone_hash; + struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; + int bcast_duplist_curr; + struct batadv_bla_claim_dst claim_dest; + struct delayed_work work; +}; +#endif + +struct batadv_priv_gw { + struct hlist_head list; + spinlock_t list_lock; /* protects gw_list and curr_gw */ + struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ + atomic_t reselect; +}; + +struct batadv_priv_vis { + struct list_head send_list; + struct batadv_hashtable *hash; + spinlock_t hash_lock; /* protects hash */ + spinlock_t list_lock; /* protects info::recv_list */ + struct delayed_work work; + struct batadv_vis_info *my_info; +}; + struct batadv_priv { atomic_t mesh_state; struct net_device_stats stats; @@ -179,64 +245,24 @@ struct batadv_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - atomic_t ttvn; /* translation table version number */ - atomic_t tt_ogm_append_cnt; - atomic_t tt_local_changes; /* changes registered in a OGM interval */ - atomic_t bla_num_requests; /* number of bla requests in flight */ - /* The tt_poss_change flag is used to detect an ongoing roaming phase. - * If true, then I received a Roaming_adv and I have to inspect every - * packet directed to me to check whether I am still the true - * destination or not. This flag will be reset to false as soon as I - * increase my TTVN - */ - bool tt_poss_change; char num_ifaces; struct batadv_debug_log *debug_log; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; struct hlist_head forw_bcast_list; - struct hlist_head gw_list; - struct list_head tt_changes_list; /* tracks changes in a OGM int */ - struct list_head vis_send_list; struct batadv_hashtable *orig_hash; - struct batadv_hashtable *tt_local_hash; - struct batadv_hashtable *tt_global_hash; -#ifdef CONFIG_BATMAN_ADV_BLA - struct batadv_hashtable *claim_hash; - struct batadv_hashtable *backbone_hash; -#endif - struct list_head tt_req_list; /* list of pending tt_requests */ - struct list_head tt_roam_list; - struct batadv_hashtable *vis_hash; -#ifdef CONFIG_BATMAN_ADV_BLA - struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; - int bcast_duplist_curr; - struct batadv_bla_claim_dst claim_dest; -#endif spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects */ - spinlock_t tt_changes_list_lock; /* protects tt_changes */ - spinlock_t tt_req_list_lock; /* protects tt_req_list */ - spinlock_t tt_roam_list_lock; /* protects tt_roam_list */ - spinlock_t gw_list_lock; /* protects gw_list and curr_gw */ - spinlock_t vis_hash_lock; /* protects vis_hash */ - spinlock_t vis_list_lock; /* protects vis_info::recv_list */ - atomic_t num_local_tt; - /* Checksum of the local table, recomputed before sending a new OGM */ - uint16_t tt_crc; - unsigned char *tt_buff; - int16_t tt_buff_len; - spinlock_t tt_buff_lock; /* protects tt_buff */ - struct delayed_work tt_work; struct delayed_work orig_work; - struct delayed_work vis_work; - struct delayed_work bla_work; - struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ - atomic_t gw_reselect; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ - struct batadv_vis_info *my_vis_info; struct batadv_algo_ops *bat_algo_ops; +#ifdef CONFIG_BATMAN_ADV_BLA + struct batadv_priv_bla bla; +#endif + struct batadv_priv_gw gw; + struct batadv_priv_tt tt; + struct batadv_priv_vis vis; }; struct batadv_socket_client { @@ -258,6 +284,7 @@ struct batadv_tt_common_entry { uint8_t addr[ETH_ALEN]; struct hlist_node hash_entry; uint16_t flags; + unsigned long added_at; atomic_t refcount; struct rcu_head rcu; }; @@ -277,6 +304,7 @@ struct batadv_tt_global_entry { struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; uint8_t ttvn; + atomic_t refcount; struct rcu_head rcu; struct hlist_node list; }; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 00164645b3f7..f39723281ca1 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -39,6 +39,7 @@ batadv_frag_merge_packet(struct list_head *head, struct batadv_unicast_packet *unicast_packet; int hdr_len = sizeof(*unicast_packet); int uni_diff = sizeof(*up) - hdr_len; + uint8_t *packet_pos; up = (struct batadv_unicast_frag_packet *)skb->data; /* set skb to the first part and tmp_skb to the second part */ @@ -65,8 +66,8 @@ batadv_frag_merge_packet(struct list_head *head, kfree_skb(tmp_skb); memmove(skb->data + uni_diff, skb->data, hdr_len); - unicast_packet = (struct batadv_unicast_packet *)skb_pull(skb, - uni_diff); + packet_pos = skb_pull(skb, uni_diff); + unicast_packet = (struct batadv_unicast_packet *)packet_pos; unicast_packet->header.packet_type = BATADV_UNICAST; return skb; @@ -121,6 +122,7 @@ batadv_frag_search_packet(struct list_head *head, { struct batadv_frag_packet_list_entry *tfp; struct batadv_unicast_frag_packet *tmp_up = NULL; + int is_head_tmp, is_head; uint16_t search_seqno; if (up->flags & BATADV_UNI_FRAG_HEAD) @@ -128,6 +130,8 @@ batadv_frag_search_packet(struct list_head *head, else search_seqno = ntohs(up->seqno)-1; + is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); + list_for_each_entry(tfp, head, list) { if (!tfp->skb) @@ -139,9 +143,8 @@ batadv_frag_search_packet(struct list_head *head, tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data; if (tfp->seqno == search_seqno) { - - if ((tmp_up->flags & BATADV_UNI_FRAG_HEAD) != - (up->flags & BATADV_UNI_FRAG_HEAD)) + is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD); + if (is_head_tmp != is_head) return tfp; else goto mov_tail; @@ -334,8 +337,7 @@ find_router: /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); /* set the destination tt version number */ - unicast_packet->ttvn = - (uint8_t)atomic_read(&orig_node->last_ttvn); + unicast_packet->ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); /* inform the destination node that we are still missing a correct route * for this client. The destination will receive this packet and will diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 2a2ea0681469..5abd1454fb07 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -41,13 +41,13 @@ static void batadv_free_info(struct kref *ref) bat_priv = info->bat_priv; list_del_init(&info->send_list); - spin_lock_bh(&bat_priv->vis_list_lock); + spin_lock_bh(&bat_priv->vis.list_lock); list_for_each_entry_safe(entry, tmp, &info->recv_list, list) { list_del(&entry->list); kfree(entry); } - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); kfree_skb(info->skb_packet); kfree(info); } @@ -94,7 +94,7 @@ static uint32_t batadv_vis_info_choose(const void *data, uint32_t size) static struct batadv_vis_info * batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) { - struct batadv_hashtable *hash = bat_priv->vis_hash; + struct batadv_hashtable *hash = bat_priv->vis.hash; struct hlist_head *head; struct hlist_node *node; struct batadv_vis_info *vis_info, *vis_info_tmp = NULL; @@ -252,7 +252,7 @@ int batadv_vis_seq_print_text(struct seq_file *seq, void *offset) struct hlist_head *head; struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->vis_hash; + struct batadv_hashtable *hash = bat_priv->vis.hash; uint32_t i; int ret = 0; int vis_server = atomic_read(&bat_priv->vis_mode); @@ -264,12 +264,12 @@ int batadv_vis_seq_print_text(struct seq_file *seq, void *offset) if (vis_server == BATADV_VIS_TYPE_CLIENT_UPDATE) goto out; - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; batadv_vis_seq_print_text_bucket(seq, head); } - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); out: if (primary_if) @@ -285,7 +285,7 @@ static void batadv_send_list_add(struct batadv_priv *bat_priv, { if (list_empty(&info->send_list)) { kref_get(&info->refcount); - list_add_tail(&info->send_list, &bat_priv->vis_send_list); + list_add_tail(&info->send_list, &bat_priv->vis.send_list); } } @@ -311,9 +311,9 @@ static void batadv_recv_list_add(struct batadv_priv *bat_priv, return; memcpy(entry->mac, mac, ETH_ALEN); - spin_lock_bh(&bat_priv->vis_list_lock); + spin_lock_bh(&bat_priv->vis.list_lock); list_add_tail(&entry->list, recv_list); - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); } /* returns 1 if this mac is in the recv_list */ @@ -323,14 +323,14 @@ static int batadv_recv_list_is_in(struct batadv_priv *bat_priv, { const struct batadv_recvlist_node *entry; - spin_lock_bh(&bat_priv->vis_list_lock); + spin_lock_bh(&bat_priv->vis.list_lock); list_for_each_entry(entry, recv_list, list) { if (batadv_compare_eth(entry->mac, mac)) { - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); return 1; } } - spin_unlock_bh(&bat_priv->vis_list_lock); + spin_unlock_bh(&bat_priv->vis.list_lock); return 0; } @@ -354,7 +354,7 @@ batadv_add_packet(struct batadv_priv *bat_priv, *is_new = 0; /* sanity check */ - if (!bat_priv->vis_hash) + if (!bat_priv->vis.hash) return NULL; /* see if the packet is already in vis_hash */ @@ -385,7 +385,7 @@ batadv_add_packet(struct batadv_priv *bat_priv, } } /* remove old entry */ - batadv_hash_remove(bat_priv->vis_hash, batadv_vis_info_cmp, + batadv_hash_remove(bat_priv->vis.hash, batadv_vis_info_cmp, batadv_vis_info_choose, old_info); batadv_send_list_del(old_info); kref_put(&old_info->refcount, batadv_free_info); @@ -426,7 +426,7 @@ batadv_add_packet(struct batadv_priv *bat_priv, batadv_recv_list_add(bat_priv, &info->recv_list, packet->sender_orig); /* try to add it */ - hash_added = batadv_hash_add(bat_priv->vis_hash, batadv_vis_info_cmp, + hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp, batadv_vis_info_choose, info, &info->hash_entry); if (hash_added != 0) { @@ -449,7 +449,7 @@ void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv, make_broadcast = (vis_server == BATADV_VIS_TYPE_SERVER_SYNC); - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); info = batadv_add_packet(bat_priv, vis_packet, vis_info_len, &is_new, make_broadcast); if (!info) @@ -461,7 +461,7 @@ void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv, if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && is_new) batadv_send_list_add(bat_priv, info); end: - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); } /* handle an incoming client update packet and schedule forward if needed. */ @@ -484,7 +484,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, batadv_is_my_mac(vis_packet->target_orig)) are_target = 1; - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); info = batadv_add_packet(bat_priv, vis_packet, vis_info_len, &is_new, are_target); @@ -505,7 +505,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, } end: - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); } /* Walk the originators and find the VIS server with the best tq. Set the packet @@ -574,10 +574,11 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_neigh_node *router; - struct batadv_vis_info *info = bat_priv->my_vis_info; + struct batadv_vis_info *info = bat_priv->vis.my_info; struct batadv_vis_packet *packet; struct batadv_vis_info_entry *entry; struct batadv_tt_common_entry *tt_common_entry; + uint8_t *packet_pos; int best_tq = -1; uint32_t i; @@ -618,8 +619,8 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) goto next; /* fill one entry into buffer. */ - entry = (struct batadv_vis_info_entry *) - skb_put(info->skb_packet, sizeof(*entry)); + packet_pos = skb_put(info->skb_packet, sizeof(*entry)); + entry = (struct batadv_vis_info_entry *)packet_pos; memcpy(entry->src, router->if_incoming->net_dev->dev_addr, ETH_ALEN); @@ -636,7 +637,7 @@ next: rcu_read_unlock(); } - hash = bat_priv->tt_local_hash; + hash = bat_priv->tt.local_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -644,9 +645,8 @@ next: rcu_read_lock(); hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { - entry = (struct batadv_vis_info_entry *) - skb_put(info->skb_packet, - sizeof(*entry)); + packet_pos = skb_put(info->skb_packet, sizeof(*entry)); + entry = (struct batadv_vis_info_entry *)packet_pos; memset(entry->src, 0, ETH_ALEN); memcpy(entry->dest, tt_common_entry->addr, ETH_ALEN); entry->quality = 0; /* 0 means TT */ @@ -671,7 +671,7 @@ unlock: static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) { uint32_t i; - struct batadv_hashtable *hash = bat_priv->vis_hash; + struct batadv_hashtable *hash = bat_priv->vis.hash; struct hlist_node *node, *node_tmp; struct hlist_head *head; struct batadv_vis_info *info; @@ -682,7 +682,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) hlist_for_each_entry_safe(info, node, node_tmp, head, hash_entry) { /* never purge own data. */ - if (info == bat_priv->my_vis_info) + if (info == bat_priv->vis.my_info) continue; if (batadv_has_timed_out(info->first_seen, @@ -814,34 +814,36 @@ out: /* called from timer; send (and maybe generate) vis packet. */ static void batadv_send_vis_packets(struct work_struct *work) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); + struct delayed_work *delayed_work; struct batadv_priv *bat_priv; + struct batadv_priv_vis *priv_vis; struct batadv_vis_info *info; - bat_priv = container_of(delayed_work, struct batadv_priv, vis_work); - spin_lock_bh(&bat_priv->vis_hash_lock); + delayed_work = container_of(work, struct delayed_work, work); + priv_vis = container_of(delayed_work, struct batadv_priv_vis, work); + bat_priv = container_of(priv_vis, struct batadv_priv, vis); + spin_lock_bh(&bat_priv->vis.hash_lock); batadv_purge_vis_packets(bat_priv); if (batadv_generate_vis_packet(bat_priv) == 0) { /* schedule if generation was successful */ - batadv_send_list_add(bat_priv, bat_priv->my_vis_info); + batadv_send_list_add(bat_priv, bat_priv->vis.my_info); } - while (!list_empty(&bat_priv->vis_send_list)) { - info = list_first_entry(&bat_priv->vis_send_list, + while (!list_empty(&bat_priv->vis.send_list)) { + info = list_first_entry(&bat_priv->vis.send_list, typeof(*info), send_list); kref_get(&info->refcount); - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_send_vis_packet(bat_priv, info); - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); batadv_send_list_del(info); kref_put(&info->refcount, batadv_free_info); } - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_start_vis_timer(bat_priv); } @@ -856,37 +858,37 @@ int batadv_vis_init(struct batadv_priv *bat_priv) unsigned long first_seen; struct sk_buff *tmp_skb; - if (bat_priv->vis_hash) + if (bat_priv->vis.hash) return 0; - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); - bat_priv->vis_hash = batadv_hash_new(256); - if (!bat_priv->vis_hash) { + bat_priv->vis.hash = batadv_hash_new(256); + if (!bat_priv->vis.hash) { pr_err("Can't initialize vis_hash\n"); goto err; } - bat_priv->my_vis_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC); - if (!bat_priv->my_vis_info) + bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC); + if (!bat_priv->vis.my_info) goto err; len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; - bat_priv->my_vis_info->skb_packet = dev_alloc_skb(len); - if (!bat_priv->my_vis_info->skb_packet) + bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); + if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->my_vis_info->skb_packet, ETH_HLEN); - tmp_skb = bat_priv->my_vis_info->skb_packet; + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); + tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); /* prefill the vis info */ first_seen = jiffies - msecs_to_jiffies(BATADV_VIS_INTERVAL); - bat_priv->my_vis_info->first_seen = first_seen; - INIT_LIST_HEAD(&bat_priv->my_vis_info->recv_list); - INIT_LIST_HEAD(&bat_priv->my_vis_info->send_list); - kref_init(&bat_priv->my_vis_info->refcount); - bat_priv->my_vis_info->bat_priv = bat_priv; + bat_priv->vis.my_info->first_seen = first_seen; + INIT_LIST_HEAD(&bat_priv->vis.my_info->recv_list); + INIT_LIST_HEAD(&bat_priv->vis.my_info->send_list); + kref_init(&bat_priv->vis.my_info->refcount); + bat_priv->vis.my_info->bat_priv = bat_priv; packet->header.version = BATADV_COMPAT_VERSION; packet->header.packet_type = BATADV_VIS; packet->header.ttl = BATADV_TTL; @@ -894,28 +896,28 @@ int batadv_vis_init(struct batadv_priv *bat_priv) packet->reserved = 0; packet->entries = 0; - INIT_LIST_HEAD(&bat_priv->vis_send_list); + INIT_LIST_HEAD(&bat_priv->vis.send_list); - hash_added = batadv_hash_add(bat_priv->vis_hash, batadv_vis_info_cmp, + hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp, batadv_vis_info_choose, - bat_priv->my_vis_info, - &bat_priv->my_vis_info->hash_entry); + bat_priv->vis.my_info, + &bat_priv->vis.my_info->hash_entry); if (hash_added != 0) { pr_err("Can't add own vis packet into hash\n"); /* not in hash, need to remove it manually. */ - kref_put(&bat_priv->my_vis_info->refcount, batadv_free_info); + kref_put(&bat_priv->vis.my_info->refcount, batadv_free_info); goto err; } - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_start_vis_timer(bat_priv); return 0; free_info: - kfree(bat_priv->my_vis_info); - bat_priv->my_vis_info = NULL; + kfree(bat_priv->vis.my_info); + bat_priv->vis.my_info = NULL; err: - spin_unlock_bh(&bat_priv->vis_hash_lock); + spin_unlock_bh(&bat_priv->vis.hash_lock); batadv_vis_quit(bat_priv); return -ENOMEM; } @@ -933,23 +935,23 @@ static void batadv_free_info_ref(struct hlist_node *node, void *arg) /* shutdown vis-server */ void batadv_vis_quit(struct batadv_priv *bat_priv) { - if (!bat_priv->vis_hash) + if (!bat_priv->vis.hash) return; - cancel_delayed_work_sync(&bat_priv->vis_work); + cancel_delayed_work_sync(&bat_priv->vis.work); - spin_lock_bh(&bat_priv->vis_hash_lock); + spin_lock_bh(&bat_priv->vis.hash_lock); /* properly remove, kill timers ... */ - batadv_hash_delete(bat_priv->vis_hash, batadv_free_info_ref, NULL); - bat_priv->vis_hash = NULL; - bat_priv->my_vis_info = NULL; - spin_unlock_bh(&bat_priv->vis_hash_lock); + batadv_hash_delete(bat_priv->vis.hash, batadv_free_info_ref, NULL); + bat_priv->vis.hash = NULL; + bat_priv->vis.my_info = NULL; + spin_unlock_bh(&bat_priv->vis.hash_lock); } /* schedule packets for (re)transmission */ static void batadv_start_vis_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->vis_work, batadv_send_vis_packets); - queue_delayed_work(batadv_event_workqueue, &bat_priv->vis_work, + INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets); + queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, msecs_to_jiffies(BATADV_VIS_INTERVAL)); } diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h index 84e716ed8963..873282fa86da 100644 --- a/net/batman-adv/vis.h +++ b/net/batman-adv/vis.h @@ -20,7 +20,7 @@ #ifndef _NET_BATMAN_ADV_VIS_H_ #define _NET_BATMAN_ADV_VIS_H_ -/* timeout of vis packets in miliseconds */ +/* timeout of vis packets in milliseconds */ #define BATADV_VIS_TIMEOUT 200000 int batadv_vis_seq_print_text(struct seq_file *seq, void *offset); diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 4ff0bf3ba9a5..0760d1fed6f0 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -316,7 +316,7 @@ send_rsp: static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb, struct a2mp_cmd *hdr) { - BT_DBG("ident %d code %d", hdr->ident, hdr->code); + BT_DBG("ident %d code 0x%2.2x", hdr->ident, hdr->code); skb_pull(skb, le16_to_cpu(hdr->len)); return 0; @@ -325,17 +325,19 @@ static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb, /* Handle A2MP signalling */ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { - struct a2mp_cmd *hdr = (void *) skb->data; + struct a2mp_cmd *hdr; struct amp_mgr *mgr = chan->data; int err = 0; amp_mgr_get(mgr); while (skb->len >= sizeof(*hdr)) { - struct a2mp_cmd *hdr = (void *) skb->data; - u16 len = le16_to_cpu(hdr->len); + u16 len; - BT_DBG("code 0x%02x id %d len %d", hdr->code, hdr->ident, len); + hdr = (void *) skb->data; + len = le16_to_cpu(hdr->len); + + BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len); skb_pull(skb, sizeof(*hdr)); @@ -393,7 +395,9 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) if (err) { struct a2mp_cmd_rej rej; + rej.reason = __constant_cpu_to_le16(0); + hdr = (void *) skb->data; BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err); @@ -412,7 +416,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) static void a2mp_chan_close_cb(struct l2cap_chan *chan) { - l2cap_chan_destroy(chan); + l2cap_chan_put(chan); } static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f7db5792ec64..9d49ee6d7219 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -28,6 +28,7 @@ #include <asm/ioctls.h> #include <net/bluetooth/bluetooth.h> +#include <linux/proc_fs.h> #define VERSION "2.16" @@ -532,6 +533,144 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) } EXPORT_SYMBOL(bt_sock_wait_state); +#ifdef CONFIG_PROC_FS +struct bt_seq_state { + struct bt_sock_list *l; +}; + +static void *bt_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(seq->private->l->lock) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + read_lock(&l->lock); + return seq_hlist_start_head(&l->head, *pos); +} + +static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + return seq_hlist_next(v, &l->head, pos); +} + +static void bt_seq_stop(struct seq_file *seq, void *v) + __releases(seq->private->l->lock) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + read_unlock(&l->lock); +} + +static int bt_seq_show(struct seq_file *seq, void *v) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + bdaddr_t src_baswapped, dst_baswapped; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Src Dst Parent"); + + if (l->custom_seq_show) { + seq_putc(seq, ' '); + l->custom_seq_show(seq, v); + } + + seq_putc(seq, '\n'); + } else { + struct sock *sk = sk_entry(v); + struct bt_sock *bt = bt_sk(sk); + baswap(&src_baswapped, &bt->src); + baswap(&dst_baswapped, &bt->dst); + + seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %pM %pM %-6lu", + sk, + atomic_read(&sk->sk_refcnt), + sk_rmem_alloc_get(sk), + sk_wmem_alloc_get(sk), + sock_i_uid(sk), + sock_i_ino(sk), + &src_baswapped, + &dst_baswapped, + bt->parent? sock_i_ino(bt->parent): 0LU); + + if (l->custom_seq_show) { + seq_putc(seq, ' '); + l->custom_seq_show(seq, v); + } + + seq_putc(seq, '\n'); + } + return 0; +} + +static struct seq_operations bt_seq_ops = { + .start = bt_seq_start, + .next = bt_seq_next, + .stop = bt_seq_stop, + .show = bt_seq_show, +}; + +static int bt_seq_open(struct inode *inode, struct file *file) +{ + struct bt_sock_list *sk_list; + struct bt_seq_state *s; + + sk_list = PDE(inode)->data; + s = __seq_open_private(file, &bt_seq_ops, + sizeof(struct bt_seq_state)); + if (!s) + return -ENOMEM; + + s->l = sk_list; + return 0; +} + +int bt_procfs_init(struct module* module, struct net *net, const char *name, + struct bt_sock_list* sk_list, + int (* seq_show)(struct seq_file *, void *)) +{ + struct proc_dir_entry * pde; + + sk_list->custom_seq_show = seq_show; + + sk_list->fops.owner = module; + sk_list->fops.open = bt_seq_open; + sk_list->fops.read = seq_read; + sk_list->fops.llseek = seq_lseek; + sk_list->fops.release = seq_release_private; + + pde = proc_net_fops_create(net, name, 0, &sk_list->fops); + if (!pde) + return -ENOMEM; + + pde->data = sk_list; + + return 0; +} + +void bt_procfs_cleanup(struct net *net, const char *name) +{ + proc_net_remove(net, name); +} +#else +int bt_procfs_init(struct module* module, struct net *net, const char *name, + struct bt_sock_list* sk_list, + int (* seq_show)(struct seq_file *, void *)) +{ + return 0; +} + +void bt_procfs_cleanup(struct net *net, const char *name) +{ +} +#endif +EXPORT_SYMBOL(bt_procfs_init); +EXPORT_SYMBOL(bt_procfs_cleanup); + static struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5e5f5b410e0b..e7154a58465f 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -29,6 +29,10 @@ #include "bnep.h" +static struct bt_sock_list bnep_sk_list = { + .lock = __RW_LOCK_UNLOCKED(bnep_sk_list.lock) +}; + static int bnep_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -38,6 +42,8 @@ static int bnep_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&bnep_sk_list, sk); + sock_orphan(sk); sock_put(sk); return 0; @@ -58,7 +64,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case BNEPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -84,7 +90,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case BNEPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; @@ -204,6 +210,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&bnep_sk_list, sk); return 0; } @@ -222,19 +229,30 @@ int __init bnep_sock_init(void) return err; err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register BNEP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create BNEP proc file"); + bt_sock_unregister(BTPROTO_BNEP); + goto error; + } + + BT_INFO("BNEP socket layer initialized"); return 0; error: - BT_ERR("Can't register BNEP socket"); proto_unregister(&bnep_proto); return err; } void __exit bnep_sock_cleanup(void) { + bt_procfs_cleanup(&init_net, "bnep"); if (bt_sock_unregister(BTPROTO_BNEP) < 0) BT_ERR("Can't unregister BNEP socket"); diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 311668d14571..aacb802d1ee4 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -42,6 +42,10 @@ #include "cmtp.h" +static struct bt_sock_list cmtp_sk_list = { + .lock = __RW_LOCK_UNLOCKED(cmtp_sk_list.lock) +}; + static int cmtp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -51,6 +55,8 @@ static int cmtp_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&cmtp_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -72,7 +78,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case CMTPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -97,7 +103,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case CMTPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; @@ -214,6 +220,8 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&cmtp_sk_list, sk); + return 0; } @@ -232,19 +240,30 @@ int cmtp_init_sockets(void) return err; err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register CMTP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create CMTP proc file"); + bt_sock_unregister(BTPROTO_HIDP); + goto error; + } + + BT_INFO("CMTP socket layer initialized"); return 0; error: - BT_ERR("Can't register CMTP socket"); proto_unregister(&cmtp_proto); return err; } void cmtp_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "cmtp"); if (bt_sock_unregister(BTPROTO_CMTP) < 0) BT_ERR("Can't unregister CMTP socket"); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5ad7da217474..b9196a44f759 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -29,8 +29,9 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/a2mp.h> +#include <net/bluetooth/smp.h> -static void hci_le_connect(struct hci_conn *conn) +static void hci_le_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_create_conn cp; @@ -54,12 +55,12 @@ static void hci_le_connect(struct hci_conn *conn) hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp); } -static void hci_le_connect_cancel(struct hci_conn *conn) +static void hci_le_create_connection_cancel(struct hci_conn *conn) { hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); } -void hci_acl_connect(struct hci_conn *conn) +static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; struct inquiry_entry *ie; @@ -103,7 +104,7 @@ void hci_acl_connect(struct hci_conn *conn) hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); } -static void hci_acl_connect_cancel(struct hci_conn *conn) +static void hci_acl_create_connection_cancel(struct hci_conn *conn) { struct hci_cp_create_conn_cancel cp; @@ -129,7 +130,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp); } -void hci_add_sco(struct hci_conn *conn, __u16 handle) +static void hci_add_sco(struct hci_conn *conn, __u16 handle) { struct hci_dev *hdev = conn->hdev; struct hci_cp_add_sco cp; @@ -245,9 +246,9 @@ static void hci_conn_timeout(struct work_struct *work) case BT_CONNECT2: if (conn->out) { if (conn->type == ACL_LINK) - hci_acl_connect_cancel(conn); + hci_acl_create_connection_cancel(conn); else if (conn->type == LE_LINK) - hci_le_connect_cancel(conn); + hci_le_create_connection_cancel(conn); } break; case BT_CONFIG: @@ -470,40 +471,37 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) } EXPORT_SYMBOL(hci_get_route); -/* Create SCO, ACL or LE connection. - * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u8 dst_type, __u8 sec_level, __u8 auth_type) +static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, + u8 dst_type, u8 sec_level, u8 auth_type) { - struct hci_conn *acl; - struct hci_conn *sco; struct hci_conn *le; - BT_DBG("%s dst %s", hdev->name, batostr(dst)); + le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + if (!le) { + le = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (le) + return ERR_PTR(-EBUSY); - if (type == LE_LINK) { - le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); - if (!le) { - le = hci_conn_hash_lookup_state(hdev, LE_LINK, - BT_CONNECT); - if (le) - return ERR_PTR(-EBUSY); + le = hci_conn_add(hdev, LE_LINK, dst); + if (!le) + return ERR_PTR(-ENOMEM); - le = hci_conn_add(hdev, LE_LINK, dst); - if (!le) - return ERR_PTR(-ENOMEM); + le->dst_type = bdaddr_to_le(dst_type); + hci_le_create_connection(le); + } - le->dst_type = bdaddr_to_le(dst_type); - hci_le_connect(le); - } + le->pending_sec_level = sec_level; + le->auth_type = auth_type; - le->pending_sec_level = sec_level; - le->auth_type = auth_type; + hci_conn_hold(le); - hci_conn_hold(le); + return le; +} - return le; - } +static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, + u8 sec_level, u8 auth_type) +{ + struct hci_conn *acl; acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { @@ -518,10 +516,20 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, acl->sec_level = BT_SECURITY_LOW; acl->pending_sec_level = sec_level; acl->auth_type = auth_type; - hci_acl_connect(acl); + hci_acl_create_connection(acl); } - if (type == ACL_LINK) + return acl; +} + +static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, + bdaddr_t *dst, u8 sec_level, u8 auth_type) +{ + struct hci_conn *acl; + struct hci_conn *sco; + + acl = hci_connect_acl(hdev, dst, sec_level, auth_type); + if (IS_ERR(acl)) return acl; sco = hci_conn_hash_lookup_ba(hdev, type, dst); @@ -555,6 +563,25 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, return sco; } +/* Create SCO, ACL or LE connection. */ +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, + __u8 dst_type, __u8 sec_level, __u8 auth_type) +{ + BT_DBG("%s dst %s type 0x%x", hdev->name, batostr(dst), type); + + switch (type) { + case LE_LINK: + return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); + case ACL_LINK: + return hci_connect_acl(hdev, dst, sec_level, auth_type); + case SCO_LINK: + case ESCO_LINK: + return hci_connect_sco(hdev, type, dst, sec_level, auth_type); + } + + return ERR_PTR(-EINVAL); +} + /* Check link security requirement */ int hci_conn_check_link_mode(struct hci_conn *conn) { @@ -619,6 +646,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("hcon %p", conn); + if (conn->type == LE_LINK) + return smp_conn_security(conn, sec_level); + /* For sdp we don't need the link key. */ if (sec_level == BT_SECURITY_SDP) return 1; @@ -771,7 +801,7 @@ void hci_conn_check_pending(struct hci_dev *hdev) conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); if (conn) - hci_acl_connect(conn); + hci_acl_create_connection(conn); hci_dev_unlock(hdev); } @@ -909,7 +939,7 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn) return chan; } -int hci_chan_del(struct hci_chan *chan) +void hci_chan_del(struct hci_chan *chan) { struct hci_conn *conn = chan->conn; struct hci_dev *hdev = conn->hdev; @@ -922,8 +952,6 @@ int hci_chan_del(struct hci_chan *chan) skb_queue_purge(&chan->data_q); kfree(chan); - - return 0; } void hci_chan_list_flush(struct hci_conn *conn) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d4de5db18d5a..8a0ce706aebd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -231,6 +231,9 @@ static void amp_init(struct hci_dev *hdev) /* Read Local AMP Info */ hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); + + /* Read Data Blk size */ + hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); } static void hci_init_req(struct hci_dev *hdev, unsigned long opt) @@ -268,7 +271,6 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) BT_ERR("Unknown device type %d", hdev->dev_type); break; } - } static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt) @@ -696,7 +698,8 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); mgmt_powered(hdev, 1); hci_dev_unlock(hdev); @@ -734,6 +737,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_work_sync(&hdev->le_scan); + cancel_delayed_work(&hdev->power_off); + hci_req_cancel(hdev, ENODEV); hci_req_lock(hdev); @@ -797,7 +802,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) * and no tasks are scheduled. */ hdev->close(hdev); - if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) && + mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); mgmt_powered(hdev, 0); hci_dev_unlock(hdev); @@ -1650,6 +1656,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); INIT_LIST_HEAD(&hdev->remote_oob_data); + INIT_LIST_HEAD(&hdev->conn_hash.list); INIT_WORK(&hdev->rx_work, hci_rx_work); INIT_WORK(&hdev->cmd_work, hci_cmd_work); @@ -1672,7 +1679,6 @@ struct hci_dev *hci_alloc_dev(void) hci_init_sysfs(hdev); discovery_init(hdev); - hci_conn_hash_init(hdev); return hdev; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 41ff978a33f9..2022b43c7353 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -29,6 +29,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/mgmt.h> /* Handle HCI Event packets */ @@ -303,7 +304,7 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (status != 0) { + if (status) { mgmt_write_scan_failed(hdev, param, status); hdev->discov_timeout = 0; goto done; @@ -513,7 +514,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (hdev->features[3] & LMP_RSSI_INQ) events[4] |= 0x02; /* Inquiry Result with RSSI */ - if (hdev->features[5] & LMP_SNIFF_SUBR) + if (lmp_sniffsubr_capable(hdev)) events[5] |= 0x20; /* Sniff Subrating */ if (hdev->features[5] & LMP_PAUSE_ENC) @@ -522,13 +523,13 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (hdev->features[6] & LMP_EXT_INQ) events[5] |= 0x40; /* Extended Inquiry Result */ - if (hdev->features[6] & LMP_NO_FLUSH) + if (lmp_no_flush_capable(hdev)) events[7] |= 0x01; /* Enhanced Flush Complete */ if (hdev->features[7] & LMP_LSTO) events[6] |= 0x80; /* Link Supervision Timeout Changed */ - if (hdev->features[6] & LMP_SIMPLE_PAIR) { + if (lmp_ssp_capable(hdev)) { events[6] |= 0x01; /* IO Capability Request */ events[6] |= 0x02; /* IO Capability Response */ events[6] |= 0x04; /* User Confirmation Request */ @@ -541,7 +542,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) * Features Notification */ } - if (hdev->features[4] & LMP_LE) + if (lmp_le_capable(hdev)) events[7] |= 0x20; /* LE Meta-Event */ hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); @@ -623,11 +624,11 @@ static void hci_setup_link_policy(struct hci_dev *hdev) struct hci_cp_write_def_link_policy cp; u16 link_policy = 0; - if (hdev->features[0] & LMP_RSWITCH) + if (lmp_rswitch_capable(hdev)) link_policy |= HCI_LP_RSWITCH; if (hdev->features[0] & LMP_HOLD) link_policy |= HCI_LP_HOLD; - if (hdev->features[0] & LMP_SNIFF) + if (lmp_sniff_capable(hdev)) link_policy |= HCI_LP_SNIFF; if (hdev->features[1] & LMP_PARK) link_policy |= HCI_LP_PARK; @@ -686,7 +687,7 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, hdev->esco_type |= (ESCO_HV3); } - if (hdev->features[3] & LMP_ESCO) + if (lmp_esco_capable(hdev)) hdev->esco_type |= (ESCO_EV3); if (hdev->features[4] & LMP_EV4) @@ -746,7 +747,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, break; } - if (test_bit(HCI_INIT, &hdev->flags) && hdev->features[4] & LMP_LE) + if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev)) hci_set_le_support(hdev); done: @@ -925,7 +926,7 @@ static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_pin_code_reply_complete(hdev, &rp->bdaddr, rp->status); - if (rp->status != 0) + if (rp->status) goto unlock; cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY); @@ -1365,6 +1366,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev) return false; e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED); + if (!e) + return false; + if (hci_resolve_name(hdev, e) == 0) { e->name_state = NAME_PENDING; return true; @@ -1393,12 +1397,20 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, return; e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING); - if (e) { + /* If the device was not found in a list of found devices names of which + * are pending. there is no need to continue resolving a next name as it + * will be done upon receiving another Remote Name Request Complete + * Event */ + if (!e) + return; + + list_del(&e->list); + if (name) { e->name_state = NAME_KNOWN; - list_del(&e->list); - if (name) - mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, - e->data.rssi, name, name_len); + mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, + e->data.rssi, name, name_len); + } else { + e->name_state = NAME_NOT_KNOWN; } if (hci_resolve_next_name(hdev)) @@ -1614,43 +1626,30 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) { - struct hci_cp_le_create_conn *cp; struct hci_conn *conn; BT_DBG("%s status 0x%2.2x", hdev->name, status); - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN); - if (!cp) - return; + if (status) { + hci_dev_lock(hdev); - hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (!conn) { + hci_dev_unlock(hdev); + return; + } - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&conn->dst), + conn); - BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr), - conn); + conn->state = BT_CLOSED; + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + hci_proto_connect_cfm(conn, status); + hci_conn_del(conn); - if (status) { - if (conn && conn->state == BT_CONNECT) { - conn->state = BT_CLOSED; - mgmt_connect_failed(hdev, &cp->peer_addr, conn->type, - conn->dst_type, status); - hci_proto_connect_cfm(conn, status); - hci_conn_del(conn); - } - } else { - if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr); - if (conn) { - conn->dst_type = cp->peer_addr_type; - conn->out = true; - } else { - BT_ERR("No memory for new connection"); - } - } + hci_dev_unlock(hdev); } - - hci_dev_unlock(hdev); } static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) @@ -1762,7 +1761,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); - conn->disc_timeout = HCI_DISCONN_TIMEOUT; + + if (!conn->out && !hci_conn_ssp_enabled(conn) && + !hci_find_link_key(hdev, &ev->bdaddr)) + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + else + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; @@ -1888,6 +1892,22 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) } } +static u8 hci_to_mgmt_reason(u8 err) +{ + switch (err) { + case HCI_ERROR_CONNECTION_TIMEOUT: + return MGMT_DEV_DISCONN_TIMEOUT; + case HCI_ERROR_REMOTE_USER_TERM: + case HCI_ERROR_REMOTE_LOW_RESOURCES: + case HCI_ERROR_REMOTE_POWER_OFF: + return MGMT_DEV_DISCONN_REMOTE; + case HCI_ERROR_LOCAL_HOST_TERM: + return MGMT_DEV_DISCONN_LOCAL_HOST; + default: + return MGMT_DEV_DISCONN_UNKNOWN; + } +} + static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_disconn_complete *ev = (void *) skb->data; @@ -1906,12 +1926,15 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags) && (conn->type == ACL_LINK || conn->type == LE_LINK)) { - if (ev->status != 0) + if (ev->status) { mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); - else + } else { + u8 reason = hci_to_mgmt_reason(ev->reason); + mgmt_device_disconnected(hdev, &conn->dst, conn->type, - conn->dst_type); + conn->dst_type, reason); + } } if (ev->status == 0) { @@ -3252,12 +3275,67 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev, BT_DBG("%s", hdev->name); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0); +} - hci_dev_unlock(hdev); +static void hci_user_passkey_notify_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_user_passkey_notify *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (!conn) + return; + + conn->passkey_notify = __le32_to_cpu(ev->passkey); + conn->passkey_entered = 0; + + if (test_bit(HCI_MGMT, &hdev->dev_flags)) + mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, + conn->dst_type, conn->passkey_notify, + conn->passkey_entered); +} + +static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_keypress_notify *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (!conn) + return; + + switch (ev->type) { + case HCI_KEYPRESS_STARTED: + conn->passkey_entered = 0; + return; + + case HCI_KEYPRESS_ENTERED: + conn->passkey_entered++; + break; + + case HCI_KEYPRESS_ERASED: + conn->passkey_entered--; + break; + + case HCI_KEYPRESS_CLEARED: + conn->passkey_entered = 0; + break; + + case HCI_KEYPRESS_COMPLETED: + return; + } + + if (test_bit(HCI_MGMT, &hdev->dev_flags)) + mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, + conn->dst_type, conn->passkey_notify, + conn->passkey_entered); } static void hci_simple_pair_complete_evt(struct hci_dev *hdev, @@ -3279,7 +3357,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, * initiated the authentication. A traditional auth_complete * event gets always produced as initiator and is also mapped to * the mgmt_auth_failed event */ - if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && ev->status != 0) + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && ev->status) mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); @@ -3350,11 +3428,23 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (ev->status) { - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (!conn) + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (!conn) { + conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); + if (!conn) { + BT_ERR("No memory for new connection"); goto unlock; + } + conn->dst_type = ev->bdaddr_type; + + if (ev->role == LE_CONN_ROLE_MASTER) { + conn->out = true; + conn->link_mode |= HCI_LM_MASTER; + } + } + + if (ev->status) { mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); hci_proto_connect_cfm(conn, ev->status); @@ -3363,18 +3453,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) goto unlock; } - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr); - if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); - if (!conn) { - BT_ERR("No memory for new connection"); - hci_dev_unlock(hdev); - return; - } - - conn->dst_type = ev->bdaddr_type; - } - if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) mgmt_device_connected(hdev, &ev->bdaddr, conn->type, conn->dst_type, 0, NULL, 0, NULL); @@ -3624,6 +3702,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_user_passkey_request_evt(hdev, skb); break; + case HCI_EV_USER_PASSKEY_NOTIFY: + hci_user_passkey_notify_evt(hdev, skb); + break; + + case HCI_EV_KEYPRESS_NOTIFY: + hci_keypress_notify_evt(hdev, skb); + break; + case HCI_EV_SIMPLE_PAIR_COMPLETE: hci_simple_pair_complete_evt(hdev, skb); break; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a7f04de03d79..07f073935811 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -490,7 +490,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return -EPERM; @@ -510,12 +510,12 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_add(hdev, (void __user *) arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_del(hdev, (void __user *) arg); default: @@ -546,22 +546,22 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCIDEVUP: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_open(arg); case HCIDEVDOWN: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_close(arg); case HCIDEVRESET: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset(arg); case HCIDEVRESTAT: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset_stat(arg); case HCISETSCAN: @@ -573,7 +573,7 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCISETACLMTU: case HCISETSCOMTU: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_cmd(cmd, argp); case HCIINQUIRY: @@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; + haddr->hci_channel= 0; release_sock(sk); return 0; @@ -1009,6 +1010,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, { struct hci_filter *f = &hci_pi(sk)->filter; + memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); @@ -1100,21 +1102,30 @@ int __init hci_sock_init(void) return err; err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("HCI socket registration failed"); + goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create HCI proc file"); + bt_sock_unregister(BTPROTO_HCI); goto error; + } BT_INFO("HCI socket layer initialized"); return 0; error: - BT_ERR("HCI socket registration failed"); proto_unregister(&hci_sk_proto); return err; } void hci_sock_cleanup(void) { + bt_procfs_cleanup(&init_net, "hci"); if (bt_sock_unregister(BTPROTO_HCI) < 0) BT_ERR("HCI socket unregistration failed"); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 18b3f6892a36..82a829d90b0f 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -25,6 +25,10 @@ #include "hidp.h" +static struct bt_sock_list hidp_sk_list = { + .lock = __RW_LOCK_UNLOCKED(hidp_sk_list.lock) +}; + static int hidp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -34,6 +38,8 @@ static int hidp_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&hidp_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -56,7 +62,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case HIDPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -91,7 +97,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case HIDPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; @@ -253,6 +259,8 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&hidp_sk_list, sk); + return 0; } @@ -271,8 +279,19 @@ int __init hidp_init_sockets(void) return err; err = bt_sock_register(BTPROTO_HIDP, &hidp_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register HIDP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create HIDP proc file"); + bt_sock_unregister(BTPROTO_HIDP); + goto error; + } + + BT_INFO("HIDP socket layer initialized"); return 0; @@ -284,6 +303,7 @@ error: void __exit hidp_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "hidp"); if (bt_sock_unregister(BTPROTO_HIDP) < 0) BT_ERR("Can't unregister HIDP socket"); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a8964db04bfb..a91239dcda41 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -406,7 +406,7 @@ struct l2cap_chan *l2cap_chan_create(void) chan->state = BT_OPEN; - atomic_set(&chan->refcnt, 1); + kref_init(&chan->kref); /* This flag is cleared in l2cap_chan_ready() */ set_bit(CONF_NOT_COMPLETE, &chan->conf_state); @@ -416,13 +416,31 @@ struct l2cap_chan *l2cap_chan_create(void) return chan; } -void l2cap_chan_destroy(struct l2cap_chan *chan) +static void l2cap_chan_destroy(struct kref *kref) { + struct l2cap_chan *chan = container_of(kref, struct l2cap_chan, kref); + + BT_DBG("chan %p", chan); + write_lock(&chan_list_lock); list_del(&chan->global_l); write_unlock(&chan_list_lock); - l2cap_chan_put(chan); + kfree(chan); +} + +void l2cap_chan_hold(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); + + kref_get(&c->kref); +} + +void l2cap_chan_put(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount)); + + kref_put(&c->kref, l2cap_chan_destroy); } void l2cap_chan_set_defaults(struct l2cap_chan *chan) @@ -1008,7 +1026,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c if (!conn) return; - if (chan->mode == L2CAP_MODE_ERTM) { + if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) { __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); @@ -1181,6 +1199,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) sk = chan->sk; hci_conn_hold(conn->hcon); + conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); @@ -1198,14 +1217,15 @@ clean: static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan *chan; + struct hci_conn *hcon = conn->hcon; BT_DBG("conn %p", conn); - if (!conn->hcon->out && conn->hcon->type == LE_LINK) + if (!hcon->out && hcon->type == LE_LINK) l2cap_le_conn_ready(conn); - if (conn->hcon->out && conn->hcon->type == LE_LINK) - smp_conn_security(conn, conn->hcon->pending_sec_level); + if (hcon->out && hcon->type == LE_LINK) + smp_conn_security(hcon, hcon->pending_sec_level); mutex_lock(&conn->chan_lock); @@ -1218,8 +1238,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) continue; } - if (conn->hcon->type == LE_LINK) { - if (smp_conn_security(conn, chan->sec_level)) + if (hcon->type == LE_LINK) { + if (smp_conn_security(hcon, chan->sec_level)) l2cap_chan_ready(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { @@ -1429,7 +1449,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, int err; BT_DBG("%s -> %s (type %u) psm 0x%2.2x", batostr(src), batostr(dst), - dst_type, __le16_to_cpu(chan->psm)); + dst_type, __le16_to_cpu(psm)); hdev = hci_get_route(dst, src); if (!hdev) @@ -5329,7 +5349,7 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) return exact ? lm1 : lm2; } -int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) +void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_conn *conn; @@ -5342,7 +5362,6 @@ int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) } else l2cap_conn_del(hcon, bt_to_errno(status)); - return 0; } int l2cap_disconn_ind(struct hci_conn *hcon) @@ -5356,12 +5375,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon) return conn->disc_reason; } -int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) +void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); - return 0; } static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) @@ -5404,6 +5422,11 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid, state_to_string(chan->state)); + if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) { + l2cap_chan_unlock(chan); + continue; + } + if (chan->scid == L2CAP_CID_LE_DATA) { if (!status && encrypt) { chan->sec_level = hcon->sec_level; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a4bb27e8427e..083f2bf065d4 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -34,6 +34,10 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/smp.h> +static struct bt_sock_list l2cap_sk_list = { + .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) +}; + static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio); @@ -245,6 +249,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l BT_DBG("sock %p, sk %p", sock, sk); + memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; *len = sizeof(struct sockaddr_l2); @@ -615,7 +620,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch break; } - if (smp_conn_security(conn, sec.level)) + if (smp_conn_security(conn->hcon, sec.level)) break; sk->sk_state = BT_CONFIG; chan->state = BT_CONFIG; @@ -823,7 +828,7 @@ static void l2cap_sock_kill(struct sock *sk) /* Kill poor orphan */ - l2cap_chan_destroy(l2cap_pi(sk)->chan); + l2cap_chan_put(l2cap_pi(sk)->chan); sock_set_flag(sk, SOCK_DEAD); sock_put(sk); } @@ -886,6 +891,8 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&l2cap_sk_list, sk); + err = l2cap_sock_shutdown(sock, 2); sock_orphan(sk); @@ -1174,7 +1181,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p chan = l2cap_chan_create(); if (!chan) { - l2cap_sock_kill(sk); + sk_free(sk); return NULL; } @@ -1210,6 +1217,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; l2cap_sock_init(sk, NULL); + bt_sock_link(&l2cap_sk_list, sk); return 0; } @@ -1248,21 +1256,30 @@ int __init l2cap_init_sockets(void) return err; err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("L2CAP socket registration failed"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create L2CAP proc file"); + bt_sock_unregister(BTPROTO_L2CAP); + goto error; + } BT_INFO("L2CAP socket layer initialized"); return 0; error: - BT_ERR("L2CAP socket registration failed"); proto_unregister(&l2cap_proto); return err; } void l2cap_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "l2cap"); if (bt_sock_unregister(BTPROTO_L2CAP) < 0) BT_ERR("L2CAP socket unregistration failed"); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ad6613d17ca6..aa2ea0a8142c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -35,7 +35,7 @@ bool enable_hs; #define MGMT_VERSION 1 -#define MGMT_REVISION 1 +#define MGMT_REVISION 2 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -99,6 +99,7 @@ static const u16 mgmt_events[] = { MGMT_EV_DEVICE_BLOCKED, MGMT_EV_DEVICE_UNBLOCKED, MGMT_EV_DEVICE_UNPAIRED, + MGMT_EV_PASSKEY_NOTIFY, }; /* @@ -193,6 +194,11 @@ static u8 mgmt_status_table[] = { MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */ }; +bool mgmt_valid_hdev(struct hci_dev *hdev) +{ + return hdev->dev_type == HCI_BREDR; +} + static u8 mgmt_status(u8 hci_status) { if (hci_status < ARRAY_SIZE(mgmt_status_table)) @@ -317,7 +323,6 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_index_list *rp; - struct list_head *p; struct hci_dev *d; size_t rp_len; u16 count; @@ -328,7 +333,10 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, read_lock(&hci_dev_list_lock); count = 0; - list_for_each(p, &hci_dev_list) { + list_for_each_entry(d, &hci_dev_list, list) { + if (!mgmt_valid_hdev(d)) + continue; + count++; } @@ -346,6 +354,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (test_bit(HCI_SETUP, &d->dev_flags)) continue; + if (!mgmt_valid_hdev(d)) + continue; + rp->index[i++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -370,10 +381,10 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_DISCOVERABLE; settings |= MGMT_SETTING_PAIRABLE; - if (hdev->features[6] & LMP_SIMPLE_PAIR) + if (lmp_ssp_capable(hdev)) settings |= MGMT_SETTING_SSP; - if (!(hdev->features[4] & LMP_NO_BREDR)) { + if (lmp_bredr_capable(hdev)) { settings |= MGMT_SETTING_BREDR; settings |= MGMT_SETTING_LINK_SECURITY; } @@ -381,7 +392,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (enable_hs) settings |= MGMT_SETTING_HS; - if (hdev->features[4] & LMP_LE) + if (lmp_le_capable(hdev)) settings |= MGMT_SETTING_LE; return settings; @@ -403,7 +414,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_PAIRABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_PAIRABLE; - if (!(hdev->features[4] & LMP_NO_BREDR)) + if (lmp_bredr_capable(hdev)) settings |= MGMT_SETTING_BREDR; if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) @@ -1111,7 +1122,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); - if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) { + if (!lmp_ssp_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_NOT_SUPPORTED); goto failed; @@ -1195,7 +1206,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); - if (!(hdev->features[4] & LMP_LE)) { + if (!lmp_le_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_NOT_SUPPORTED); goto unlock; @@ -2191,7 +2202,7 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) { + if (!lmp_ssp_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_NOT_SUPPORTED); goto unlock; @@ -2820,6 +2831,9 @@ static void cmd_status_rsp(struct pending_cmd *cmd, void *data) int mgmt_index_added(struct hci_dev *hdev) { + if (!mgmt_valid_hdev(hdev)) + return -ENOTSUPP; + return mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); } @@ -2827,6 +2841,9 @@ int mgmt_index_removed(struct hci_dev *hdev) { u8 status = MGMT_STATUS_INVALID_INDEX; + if (!mgmt_valid_hdev(hdev)) + return -ENOTSUPP; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); @@ -2875,6 +2892,22 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) if (scan) hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + u8 ssp = 1; + + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); + } + + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 1; + cp.simul = !!(hdev->features[6] & LMP_SIMUL_LE_BR); + + hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); @@ -3061,16 +3094,17 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data) } int mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, - u8 link_type, u8 addr_type) + u8 link_type, u8 addr_type, u8 reason) { - struct mgmt_addr_info ev; + struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; int err; mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk); - bacpy(&ev.bdaddr, bdaddr); - ev.type = link_to_bdaddr(link_type, addr_type); + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = link_to_bdaddr(link_type, addr_type); + ev.reason = reason; err = mgmt_event(MGMT_EV_DEVICE_DISCONNECTED, hdev, &ev, sizeof(ev), sk); @@ -3259,6 +3293,22 @@ int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, MGMT_OP_USER_PASSKEY_NEG_REPLY); } +int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 link_type, u8 addr_type, u32 passkey, + u8 entered) +{ + struct mgmt_ev_passkey_notify ev; + + BT_DBG("%s", hdev->name); + + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = link_to_bdaddr(link_type, addr_type); + ev.passkey = __cpu_to_le32(passkey); + ev.entered = entered; + + return mgmt_event(MGMT_EV_PASSKEY_NOTIFY, hdev, &ev, sizeof(ev), NULL); +} + int mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7e1e59645c05..b3226f3658cf 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * BT_DBG("sock %p, sk %p", sock, sk); + memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) @@ -822,6 +823,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c } sec.level = rfcomm_pi(sk)->sec_level; + sec.key_size = 0; len = min_t(unsigned int, len, sizeof(sec)); if (copy_to_user(optval, (char *) &sec, len)) @@ -1033,8 +1035,17 @@ int __init rfcomm_init_sockets(void) return err; err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("RFCOMM socket layer registration failed"); + goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create RFCOMM proc file"); + bt_sock_unregister(BTPROTO_RFCOMM); goto error; + } if (bt_debugfs) { rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444, @@ -1048,13 +1059,14 @@ int __init rfcomm_init_sockets(void) return 0; error: - BT_ERR("RFCOMM socket layer registration failed"); proto_unregister(&rfcomm_proto); return err; } void __exit rfcomm_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "rfcomm"); + debugfs_remove(rfcomm_sock_debugfs); if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index cb960773c002..ccc248791d50 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -278,8 +278,8 @@ out: if (err < 0) goto free; - dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL); - + dev->tty_dev = tty_port_register_device(&dev->port, rfcomm_tty_driver, + dev->id, NULL); if (IS_ERR(dev->tty_dev)) { err = PTR_ERR(dev->tty_dev); list_del(&dev->list); @@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*di); - dl = kmalloc(size, GFP_KERNEL); + dl = kzalloc(size, GFP_KERNEL); if (!dl) return -ENOMEM; @@ -705,9 +705,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) break; } - tty_unlock(); + tty_unlock(tty); schedule(); - tty_lock(); + tty_lock(tty); } set_current_state(TASK_RUNNING); remove_wait_queue(&dev->wait, &wait); @@ -861,7 +861,7 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned l static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { - struct ktermios *new = tty->termios; + struct ktermios *new = &tty->termios; int old_baud_rate = tty_termios_baud_rate(old); int new_baud_rate = tty_termios_baud_rate(new); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 40bbe25dcff7..dc42b917aaaf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err) sco_sock_clear_timer(sk); sco_chan_del(sk, err); bh_unlock_sock(sk); + + sco_conn_lock(conn); + conn->sk = NULL; + sco_pi(sk)->conn = NULL; + sco_conn_unlock(conn); + + if (conn->hcon) + hci_conn_put(conn->hcon); + sco_sock_kill(sk); } @@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err) BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - if (conn) { - sco_conn_lock(conn); - conn->sk = NULL; - sco_pi(sk)->conn = NULL; - sco_conn_unlock(conn); - - if (conn->hcon) - hci_conn_put(conn->hcon); - } - sk->sk_state = BT_CLOSED; sk->sk_err = err; sk->sk_state_change(sk); @@ -913,7 +912,7 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) return lm; } -int sco_connect_cfm(struct hci_conn *hcon, __u8 status) +void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (!status) { @@ -924,16 +923,13 @@ int sco_connect_cfm(struct hci_conn *hcon, __u8 status) sco_conn_ready(conn); } else sco_conn_del(hcon, bt_to_errno(status)); - - return 0; } -int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) +void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); - return 0; } int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) @@ -1026,6 +1022,13 @@ int __init sco_init(void) goto error; } + err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create SCO proc file"); + bt_sock_unregister(BTPROTO_SCO); + goto error; + } + if (bt_debugfs) { sco_debugfs = debugfs_create_file("sco", 0444, bt_debugfs, NULL, &sco_debugfs_fops); @@ -1044,6 +1047,8 @@ error: void __exit sco_exit(void) { + bt_procfs_cleanup(&init_net, "sco"); + debugfs_remove(sco_debugfs); if (bt_sock_unregister(BTPROTO_SCO) < 0) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 16ef0dc85a0a..8c225ef349cd 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -267,10 +267,10 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason, u8 send) mgmt_auth_failed(conn->hcon->hdev, conn->dst, hcon->type, hcon->dst_type, reason); - if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) { - cancel_delayed_work_sync(&conn->security_timer); + cancel_delayed_work_sync(&conn->security_timer); + + if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) smp_chan_destroy(conn); - } } #define JUST_WORKS 0x00 @@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) smp = smp_chan_create(conn); + else + smp = conn->smp_chan; - smp = conn->smp_chan; + if (!smp) + return SMP_UNSPECIFIED; smp->preq[0] = SMP_CMD_PAIRING_REQ; memcpy(&smp->preq[1], req, sizeof(*req)); @@ -757,9 +760,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } -int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level) +int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) { - struct hci_conn *hcon = conn->hcon; + struct l2cap_conn *conn = hcon->l2cap_data; struct smp_chan *smp = conn->smp_chan; __u8 authreq; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 333484537600..070e8a68cfc6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_mdb_entry *mdst; struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + rcu_read_lock(); #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { br_nf_pre_routing_finish_bridge_slow(skb); + rcu_read_unlock(); return NETDEV_TX_OK; } #endif @@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - rcu_read_lock(); if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb); else if (is_multicast_ether_addr(dest)) { @@ -206,24 +207,23 @@ static void br_poll_controller(struct net_device *br_dev) static void br_netpoll_cleanup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) br_netpoll_disable(p); - } } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, + gfp_t gfp) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; int err = 0; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) { if (!p->dev) continue; - - err = br_netpoll_enable(p); + err = br_netpoll_enable(p, gfp); if (err) goto fail; } @@ -236,17 +236,17 @@ fail: goto out; } -int br_netpoll_enable(struct net_bridge_port *p) +int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { struct netpoll *np; int err = 0; - np = kzalloc(sizeof(*p->np), GFP_KERNEL); + np = kzalloc(sizeof(*p->np), gfp); err = -ENOMEM; if (!np) goto out; - err = __netpoll_setup(np, p->dev); + err = __netpoll_setup(np, p->dev, gfp); if (err) { kfree(np); goto out; @@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); - - __netpoll_cleanup(np); - kfree(np); + __netpoll_free_rcu(np); } #endif diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d21f32383517..d9576e6de2b8 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -312,7 +312,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, fe->is_local = f->is_local; if (!f->is_static) - fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated); + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); ++fe; ++num; } @@ -467,14 +467,14 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, - u32 pid, u32 seq, int type, unsigned int flags) + u32 portid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -555,7 +555,7 @@ int br_fdb_dump(struct sk_buff *skb, goto skip; if (fdb_fill_info(skb, br, f, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) < 0) @@ -608,8 +608,9 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, } /* Add new permanent fdb entry with RTM_NEWNEIGH */ -int br_fdb_add(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr, u16 nlh_flags) +int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 nlh_flags) { struct net_bridge_port *p; int err = 0; @@ -639,7 +640,7 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev, return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr) +static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) { struct net_bridge *br = p->br; struct hlist_head *head = &br->hash[br_mac_hash(addr)]; @@ -655,7 +656,7 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr) /* Remove neighbor entry with RTM_DELNEIGH */ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr) + const unsigned char *addr) { struct net_bridge_port *p; int err; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e9466d412707..02015a505d2a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { skb->dev = to->dev; - if (unlikely(netpoll_tx_running(to->dev))) { + if (unlikely(netpoll_tx_running(to->br->dev))) { if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index e1144e1617be..1c8fdc3558cd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) + if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) goto err3; err = netdev_set_master(dev, br->dev); @@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) if (!p || p->br != br) return -EINVAL; + /* Since more than one interface can be attached to a bridge, + * there still maybe an alternate path for netconsole to use; + * therefore there is no reason for a NETDEV_RELEASE event. + */ del_nbp(p); spin_lock_bh(&br->lock); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe41260fbf38..093f527276a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -127,7 +127,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) goto skip; if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) break; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a768b2408edf..9b278c4ebee1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, netpoll_send_skb(np, skb); } -extern int br_netpoll_enable(struct net_bridge_port *p); +extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); extern void br_netpoll_disable(struct net_bridge_port *p); #else static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) @@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, { } -static inline int br_netpoll_enable(struct net_bridge_port *p) +static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { return 0; } @@ -363,10 +363,10 @@ extern void br_fdb_update(struct net_bridge *br, extern int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr); -extern int br_fdb_add(struct ndmsg *nlh, + const unsigned char *addr); +extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, - unsigned char *addr, + const unsigned char *addr, u16 nlh_flags); extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index a6747e673426..c3530a81a33b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -170,5 +170,5 @@ void br_stp_port_timer_init(struct net_bridge_port *p) unsigned long br_timer_value(const struct timer_list *timer) { return timer_pending(timer) - ? jiffies_to_clock_t(timer->expires - jiffies) : 0; + ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0; } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 6229b62749e8..13b36bdc76a7 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -27,7 +27,7 @@ struct brport_attribute { }; #define BRPORT_ATTR(_name,_mode,_show,_store) \ -struct brport_attribute brport_attr_##_name = { \ +const struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ .mode = _mode }, \ .show = _show, \ @@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, store_multicast_router); #endif -static struct brport_attribute *brport_attrs[] = { +static const struct brport_attribute *brport_attrs[] = { &brport_attr_path_cost, &brport_attr_priority, &brport_attr_port_id, @@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = { int br_sysfs_addif(struct net_bridge_port *p) { struct net_bridge *br = p->br; - struct brport_attribute **a; + const struct brport_attribute **a; int err; err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index f88ee537fb2b..92de5e5f9db2 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -80,7 +80,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, unsigned int bitmask; spin_lock_bh(&ebt_log_lock); - printk("<%c>%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", + printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : "", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 19063473c71f..3476ec469740 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -298,8 +298,7 @@ static int __init ebt_ulog_init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, - THIS_MODULE, &cfg); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); if (!ebtulognl) ret = -ENOMEM; else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 42e6bd094574..3c2e9dced9e0 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __net_init frame_filter_net_init(struct net *net) { net->xt.frame_filter = ebt_register_table(net, &frame_filter); - if (IS_ERR(net->xt.frame_filter)) - return PTR_ERR(net->xt.frame_filter); - return 0; + return PTR_RET(net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 6dc2f878ae05..10871bc77908 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __net_init frame_nat_net_init(struct net *net) { net->xt.frame_nat = ebt_register_table(net, &frame_nat); - if (IS_ERR(net->xt.frame_nat)) - return PTR_ERR(net->xt.frame_nat); - return 0; + return PTR_RET(net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 78f1cdad5b33..095259f83902 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) err = sk_filter(sk, skb); if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { set_rx_flow_off(cf_sk); net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index dd485f6128e8..ba217e90765e 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -211,9 +211,10 @@ void caif_client_register_refcnt(struct cflayer *adapt_layer, void (*put)(struct cflayer *lyr)) { struct cfsrvl *service; - service = container_of(adapt_layer->dn, struct cfsrvl, layer); - WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL); + if (WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL)) + return; + service = container_of(adapt_layer->dn, struct cfsrvl, layer); service->hold = hold; service->put = put; } diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 69771c04ba8f..e597733affb8 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) /* check the version of IP */ ip_version = skb_header_pointer(skb, 0, 1, &buf); + if (!ip_version) { + kfree_skb(skb); + return -EINVAL; + } switch (*ip_version >> 4) { case 4: diff --git a/net/can/gw.c b/net/can/gw.c index b54d5e695b03..127879c55fb6 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -549,7 +549,7 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).pid, + if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ba4323bce0e9..a8020293f342 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -17,6 +17,7 @@ #include <linux/string.h> +#include <linux/ceph/ceph_features.h> #include <linux/ceph/libceph.h> #include <linux/ceph/debugfs.h> #include <linux/ceph/decode.h> @@ -83,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) return -1; } } else { - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); memcpy(&client->fsid, fsid, sizeof(*fsid)); } return 0; @@ -460,27 +460,23 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, client->auth_err = 0; client->extra_mon_dispatch = NULL; - client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT | + client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT | supported_features; - client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT | + client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT | required_features; /* msgr */ if (ceph_test_opt(client, MYIP)) myaddr = &client->options->my_addr; - client->msgr = ceph_messenger_create(myaddr, - client->supported_features, - client->required_features); - if (IS_ERR(client->msgr)) { - err = PTR_ERR(client->msgr); - goto fail; - } - client->msgr->nocrc = ceph_test_opt(client, NOCRC); + ceph_messenger_init(&client->msgr, myaddr, + client->supported_features, + client->required_features, + ceph_test_opt(client, NOCRC)); /* subsystems */ err = ceph_monc_init(&client->monc, client); if (err < 0) - goto fail_msgr; + goto fail; err = ceph_osdc_init(&client->osdc, client); if (err < 0) goto fail_monc; @@ -489,8 +485,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); -fail_msgr: - ceph_messenger_destroy(client->msgr); fail: kfree(client); return ERR_PTR(err); @@ -501,6 +495,8 @@ void ceph_destroy_client(struct ceph_client *client) { dout("destroy_client %p\n", client); + atomic_set(&client->msgr.stopping, 1); + /* unmount */ ceph_osdc_stop(&client->osdc); @@ -508,8 +504,6 @@ void ceph_destroy_client(struct ceph_client *client) ceph_debugfs_client_cleanup(client); - ceph_messenger_destroy(client->msgr); - ceph_destroy_options(client->options); kfree(client); diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index d7edc24333b8..35fce755ce10 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map, int item = 0; int itemtype; int collide, reject; - const unsigned int orig_tries = 5; /* attempts before we fall back to search */ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", bucket->id, x, outpos, numrep); @@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map, reject = 1; goto reject; } - if (flocal >= (in->size>>1) && - flocal > orig_tries) + if (map->choose_local_fallback_tries > 0 && + flocal >= (in->size>>1) && + flocal > map->choose_local_fallback_tries) item = bucket_perm_choose(in, x, r); else item = crush_bucket_choose(in, x, r); @@ -422,13 +422,14 @@ reject: ftotal++; flocal++; - if (collide && flocal < 3) + if (collide && flocal <= map->choose_local_tries) /* retry locally a few times */ retry_bucket = 1; - else if (flocal <= in->size + orig_tries) + else if (map->choose_local_fallback_tries > 0 && + flocal <= in->size + map->choose_local_fallback_tries) /* exhaustive bucket search */ retry_bucket = 1; - else if (ftotal < 20) + else if (ftotal <= map->choose_total_tries) /* then retry descent */ retry_descent = 1; else diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index b780cb7947dd..9da7fdd3cd8a 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) { struct ceph_crypto_key *ckey = key->payload.data; ceph_crypto_key_destroy(ckey); + kfree(ckey); } struct key_type key_type_ceph = { diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 1919d1550d75..3572dc518bc9 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -16,7 +16,8 @@ struct ceph_crypto_key { static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) { - kfree(key->key); + if (key) + kfree(key->key); } extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst, diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 54b531a01121..38b5dc1823d4 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client) snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, client->monc.auth->global_id); + dout("ceph_debugfs_client_init %p %s\n", client, name); + + BUG_ON(client->debugfs_dir); client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); if (!client->debugfs_dir) goto out; @@ -234,6 +237,7 @@ out: void ceph_debugfs_client_cleanup(struct ceph_client *client) { + dout("ceph_debugfs_client_cleanup %p\n", client); debugfs_remove(client->debugfs_osdmap); debugfs_remove(client->debugfs_monmap); debugfs_remove(client->osdc.debugfs_file); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 10255e81be79..159aa8bef9e7 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -29,6 +29,74 @@ * the sender. */ +/* + * We track the state of the socket on a given connection using + * values defined below. The transition to a new socket state is + * handled by a function which verifies we aren't coming from an + * unexpected state. + * + * -------- + * | NEW* | transient initial state + * -------- + * | con_sock_state_init() + * v + * ---------- + * | CLOSED | initialized, but no socket (and no + * ---------- TCP connection) + * ^ \ + * | \ con_sock_state_connecting() + * | ---------------------- + * | \ + * + con_sock_state_closed() \ + * |+--------------------------- \ + * | \ \ \ + * | ----------- \ \ + * | | CLOSING | socket event; \ \ + * | ----------- await close \ \ + * | ^ \ | + * | | \ | + * | + con_sock_state_closing() \ | + * | / \ | | + * | / --------------- | | + * | / \ v v + * | / -------------- + * | / -----------------| CONNECTING | socket created, TCP + * | | / -------------- connect initiated + * | | | con_sock_state_connected() + * | | v + * ------------- + * | CONNECTED | TCP connection established + * ------------- + * + * State values for ceph_connection->sock_state; NEW is assumed to be 0. + */ + +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */ +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */ +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */ +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */ +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */ + +/* + * connection states + */ +#define CON_STATE_CLOSED 1 /* -> PREOPEN */ +#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */ +#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */ +#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */ +#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */ +#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */ + +/* + * ceph_connection flag bits + */ +#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop + * messages on errors */ +#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */ +#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */ +#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */ +#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */ + /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; @@ -147,72 +215,130 @@ void ceph_msgr_flush(void) } EXPORT_SYMBOL(ceph_msgr_flush); +/* Connection socket state transition functions */ + +static void con_sock_state_init(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); + if (WARN_ON(old_state != CON_SOCK_STATE_NEW)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CLOSED); +} + +static void con_sock_state_connecting(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING); + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CONNECTING); +} + +static void con_sock_state_connected(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED); + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CONNECTED); +} + +static void con_sock_state_closing(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING); + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING && + old_state != CON_SOCK_STATE_CONNECTED && + old_state != CON_SOCK_STATE_CLOSING)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CLOSING); +} + +static void con_sock_state_closed(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED && + old_state != CON_SOCK_STATE_CLOSING && + old_state != CON_SOCK_STATE_CONNECTING && + old_state != CON_SOCK_STATE_CLOSED)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CLOSED); +} /* * socket callback functions */ /* data available on socket, or listen socket received a connect */ -static void ceph_data_ready(struct sock *sk, int count_unused) +static void ceph_sock_data_ready(struct sock *sk, int count_unused) { struct ceph_connection *con = sk->sk_user_data; + if (atomic_read(&con->msgr->stopping)) { + return; + } if (sk->sk_state != TCP_CLOSE_WAIT) { - dout("ceph_data_ready on %p state = %lu, queueing work\n", + dout("%s on %p state = %lu, queueing work\n", __func__, con, con->state); queue_con(con); } } /* socket has buffer space for writing */ -static void ceph_write_space(struct sock *sk) +static void ceph_sock_write_space(struct sock *sk) { struct ceph_connection *con = sk->sk_user_data; /* only queue to workqueue if there is data we want to write, * and there is sufficient space in the socket buffer to accept - * more data. clear SOCK_NOSPACE so that ceph_write_space() + * more data. clear SOCK_NOSPACE so that ceph_sock_write_space() * doesn't get called again until try_write() fills the socket * buffer. See net/ipv4/tcp_input.c:tcp_check_space() * and net/core/stream.c:sk_stream_write_space(). */ - if (test_bit(WRITE_PENDING, &con->state)) { + if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) { if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { - dout("ceph_write_space %p queueing write work\n", con); + dout("%s %p queueing write work\n", __func__, con); clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); queue_con(con); } } else { - dout("ceph_write_space %p nothing to write\n", con); + dout("%s %p nothing to write\n", __func__, con); } } /* socket's state has changed */ -static void ceph_state_change(struct sock *sk) +static void ceph_sock_state_change(struct sock *sk) { struct ceph_connection *con = sk->sk_user_data; - dout("ceph_state_change %p state = %lu sk_state = %u\n", + dout("%s %p state = %lu sk_state = %u\n", __func__, con, con->state, sk->sk_state); - if (test_bit(CLOSED, &con->state)) - return; - switch (sk->sk_state) { case TCP_CLOSE: - dout("ceph_state_change TCP_CLOSE\n"); + dout("%s TCP_CLOSE\n", __func__); case TCP_CLOSE_WAIT: - dout("ceph_state_change TCP_CLOSE_WAIT\n"); - if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { - if (test_bit(CONNECTING, &con->state)) - con->error_msg = "connection failed"; - else - con->error_msg = "socket closed"; - queue_con(con); - } + dout("%s TCP_CLOSE_WAIT\n", __func__); + con_sock_state_closing(con); + set_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + queue_con(con); break; case TCP_ESTABLISHED: - dout("ceph_state_change TCP_ESTABLISHED\n"); + dout("%s TCP_ESTABLISHED\n", __func__); + con_sock_state_connected(con); queue_con(con); break; default: /* Everything else is uninteresting */ @@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock, { struct sock *sk = sock->sk; sk->sk_user_data = con; - sk->sk_data_ready = ceph_data_ready; - sk->sk_write_space = ceph_write_space; - sk->sk_state_change = ceph_state_change; + sk->sk_data_ready = ceph_sock_data_ready; + sk->sk_write_space = ceph_sock_write_space; + sk->sk_state_change = ceph_sock_state_change; } @@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); + con_sock_state_connecting(con); ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), O_NONBLOCK); if (ret == -EINPROGRESS) { @@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } con->sock = sock; - return 0; } @@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, */ static int con_close_socket(struct ceph_connection *con) { - int rc; + int rc = 0; dout("con_close_socket on %p sock %p\n", con, con->sock); - if (!con->sock) - return 0; - set_bit(SOCK_CLOSED, &con->state); - rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); - sock_release(con->sock); - con->sock = NULL; - clear_bit(SOCK_CLOSED, &con->state); + if (con->sock) { + rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); + sock_release(con->sock); + con->sock = NULL; + } + + /* + * Forcibly clear the SOCK_CLOSED flag. It gets set + * independent of the connection mutex, and we could have + * received a socket close event before we had the chance to + * shut the socket down. + */ + clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + + con_sock_state_closed(con); return rc; } @@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con) static void ceph_msg_remove(struct ceph_msg *msg) { list_del_init(&msg->list_head); + BUG_ON(msg->con == NULL); + msg->con->ops->put(msg->con); + msg->con = NULL; + ceph_msg_put(msg); } static void ceph_msg_remove_list(struct list_head *head) @@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con) ceph_msg_remove_list(&con->out_sent); if (con->in_msg) { + BUG_ON(con->in_msg->con != con); + con->in_msg->con = NULL; ceph_msg_put(con->in_msg); con->in_msg = NULL; + con->ops->put(con); } con->connect_seq = 0; @@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con) */ void ceph_con_close(struct ceph_connection *con) { + mutex_lock(&con->mutex); dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr.in_addr)); - set_bit(CLOSED, &con->state); /* in case there's queued work */ - clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ - clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ - clear_bit(KEEPALIVE_PENDING, &con->state); - clear_bit(WRITE_PENDING, &con->state); - mutex_lock(&con->mutex); + con->state = CON_STATE_CLOSED; + + clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */ + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); + clear_bit(CON_FLAG_BACKOFF, &con->flags); + reset_connection(con); con->peer_global_seq = 0; cancel_delayed_work(&con->work); + con_close_socket(con); mutex_unlock(&con->mutex); - queue_con(con); } EXPORT_SYMBOL(ceph_con_close); /* * Reopen a closed connection, with a new peer address. */ -void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) +void ceph_con_open(struct ceph_connection *con, + __u8 entity_type, __u64 entity_num, + struct ceph_entity_addr *addr) { + mutex_lock(&con->mutex); dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); - set_bit(OPENING, &con->state); - clear_bit(CLOSED, &con->state); + + BUG_ON(con->state != CON_STATE_CLOSED); + con->state = CON_STATE_PREOPEN; + + con->peer_name.type = (__u8) entity_type; + con->peer_name.num = cpu_to_le64(entity_num); + memcpy(&con->peer_addr, addr, sizeof(*addr)); con->delay = 0; /* reset backoff memory */ + mutex_unlock(&con->mutex); queue_con(con); } EXPORT_SYMBOL(ceph_con_open); @@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con) } /* - * generic get/put - */ -struct ceph_connection *ceph_con_get(struct ceph_connection *con) -{ - int nref = __atomic_add_unless(&con->nref, 1, 0); - - dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1); - - return nref ? con : NULL; -} - -void ceph_con_put(struct ceph_connection *con) -{ - int nref = atomic_dec_return(&con->nref); - - BUG_ON(nref < 0); - if (nref == 0) { - BUG_ON(con->sock); - kfree(con); - } - dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref); -} - -/* * initialize a new connection. */ -void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) +void ceph_con_init(struct ceph_connection *con, void *private, + const struct ceph_connection_operations *ops, + struct ceph_messenger *msgr) { dout("con_init %p\n", con); memset(con, 0, sizeof(*con)); - atomic_set(&con->nref, 1); + con->private = private; + con->ops = ops; con->msgr = msgr; + + con_sock_state_init(con); + mutex_init(&con->mutex); INIT_LIST_HEAD(&con->out_queue); INIT_LIST_HEAD(&con->out_sent); INIT_DELAYED_WORK(&con->work, con_work); + + con->state = CON_STATE_CLOSED; } EXPORT_SYMBOL(ceph_con_init); @@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) return ret; } -static void ceph_con_out_kvec_reset(struct ceph_connection *con) +static void con_out_kvec_reset(struct ceph_connection *con) { con->out_kvec_left = 0; con->out_kvec_bytes = 0; con->out_kvec_cur = &con->out_kvec[0]; } -static void ceph_con_out_kvec_add(struct ceph_connection *con, +static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { int index; @@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con, con->out_kvec_bytes += size; } +#ifdef CONFIG_BLOCK +static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) +{ + if (!bio) { + *iter = NULL; + *seg = 0; + return; + } + *iter = bio; + *seg = bio->bi_idx; +} + +static void iter_bio_next(struct bio **bio_iter, int *seg) +{ + if (*bio_iter == NULL) + return; + + BUG_ON(*seg >= (*bio_iter)->bi_vcnt); + + (*seg)++; + if (*seg == (*bio_iter)->bi_vcnt) + init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); +} +#endif + +static void prepare_write_message_data(struct ceph_connection *con) +{ + struct ceph_msg *msg = con->out_msg; + + BUG_ON(!msg); + BUG_ON(!msg->hdr.data_len); + + /* initialize page iterator */ + con->out_msg_pos.page = 0; + if (msg->pages) + con->out_msg_pos.page_pos = msg->page_alignment; + else + con->out_msg_pos.page_pos = 0; +#ifdef CONFIG_BLOCK + if (msg->bio) + init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); +#endif + con->out_msg_pos.data_pos = 0; + con->out_msg_pos.did_page_crc = false; + con->out_more = 1; /* data + footer will follow */ +} + /* * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. @@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con) struct ceph_msg *m = con->out_msg; int v = con->out_kvec_left; + m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; + dout("prepare_write_message_footer %p\n", con); con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; @@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con) struct ceph_msg *m; u32 crc; - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); con->out_kvec_is_msg = true; con->out_msg_done = false; @@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con) * TCP packet that's a good thing. */ if (con->in_seq > con->in_seq_acked) { con->in_seq_acked = con->in_seq; - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + con_out_kvec_add(con, sizeof (con->out_temp_ack), &con->out_temp_ack); } + BUG_ON(list_empty(&con->out_queue)); m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); con->out_msg = m; + BUG_ON(m->con != con); /* put message on sent list */ ceph_msg_get(m); @@ -576,18 +764,18 @@ static void prepare_write_message(struct ceph_connection *con) BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); /* tag + hdr + front + middle */ - ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); - ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); - ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); + con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); + con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) - ceph_con_out_kvec_add(con, m->middle->vec.iov_len, + con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); /* fill in crc (except data pages), footer */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); - con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; + con->out_msg->footer.flags = 0; crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); @@ -597,28 +785,19 @@ static void prepare_write_message(struct ceph_connection *con) con->out_msg->footer.middle_crc = cpu_to_le32(crc); } else con->out_msg->footer.middle_crc = 0; - con->out_msg->footer.data_crc = 0; - dout("prepare_write_message front_crc %u data_crc %u\n", + dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); /* is there a data payload? */ - if (le32_to_cpu(m->hdr.data_len) > 0) { - /* initialize page iterator */ - con->out_msg_pos.page = 0; - if (m->pages) - con->out_msg_pos.page_pos = m->page_alignment; - else - con->out_msg_pos.page_pos = 0; - con->out_msg_pos.data_pos = 0; - con->out_msg_pos.did_page_crc = false; - con->out_more = 1; /* data + footer will follow */ - } else { + con->out_msg->footer.data_crc = 0; + if (m->hdr.data_len) + prepare_write_message_data(con); + else /* no, queue up footer too and be done */ prepare_write_message_footer(con); - } - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } /* @@ -630,16 +809,16 @@ static void prepare_write_ack(struct ceph_connection *con) con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + con_out_kvec_add(con, sizeof (con->out_temp_ack), &con->out_temp_ack); con->out_more = 1; /* more will follow.. eventually.. */ - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } /* @@ -648,9 +827,9 @@ static void prepare_write_ack(struct ceph_connection *con) static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); - ceph_con_out_kvec_reset(con); - ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); - set_bit(WRITE_PENDING, &con->state); + con_out_kvec_reset(con); + con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } /* @@ -665,27 +844,21 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection if (!con->ops->get_authorizer) { con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->out_connect.authorizer_len = 0; - return NULL; } /* Can't hold the mutex while getting authorizer */ - mutex_unlock(&con->mutex); - auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); - mutex_lock(&con->mutex); if (IS_ERR(auth)) return auth; - if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state)) + if (con->state != CON_STATE_NEGOTIATING) return ERR_PTR(-EAGAIN); con->auth_reply_buf = auth->authorizer_reply_buf; con->auth_reply_buf_len = auth->authorizer_reply_buf_len; - - return auth; } @@ -694,12 +867,12 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection */ static void prepare_write_banner(struct ceph_connection *con) { - ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); - ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), + con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); + con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), &con->msgr->my_enc_addr); con->out_more = 0; - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } static int prepare_write_connect(struct ceph_connection *con) @@ -742,14 +915,14 @@ static int prepare_write_connect(struct ceph_connection *con) con->out_connect.authorizer_len = auth ? cpu_to_le32(auth->authorizer_buf_len) : 0; - ceph_con_out_kvec_add(con, sizeof (con->out_connect), + con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); if (auth && auth->authorizer_buf_len) - ceph_con_out_kvec_add(con, auth->authorizer_buf_len, + con_out_kvec_add(con, auth->authorizer_buf_len, auth->authorizer_buf); con->out_more = 0; - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); return 0; } @@ -797,30 +970,34 @@ out: return ret; /* done! */ } -#ifdef CONFIG_BLOCK -static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) +static void out_msg_pos_next(struct ceph_connection *con, struct page *page, + size_t len, size_t sent, bool in_trail) { - if (!bio) { - *iter = NULL; - *seg = 0; - return; - } - *iter = bio; - *seg = bio->bi_idx; -} + struct ceph_msg *msg = con->out_msg; -static void iter_bio_next(struct bio **bio_iter, int *seg) -{ - if (*bio_iter == NULL) - return; + BUG_ON(!msg); + BUG_ON(!sent); - BUG_ON(*seg >= (*bio_iter)->bi_vcnt); + con->out_msg_pos.data_pos += sent; + con->out_msg_pos.page_pos += sent; + if (sent < len) + return; - (*seg)++; - if (*seg == (*bio_iter)->bi_vcnt) - init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); -} + BUG_ON(sent != len); + con->out_msg_pos.page_pos = 0; + con->out_msg_pos.page++; + con->out_msg_pos.did_page_crc = false; + if (in_trail) + list_move_tail(&page->lru, + &msg->trail->head); + else if (msg->pagelist) + list_move_tail(&page->lru, + &msg->pagelist->head); +#ifdef CONFIG_BLOCK + else if (msg->bio) + iter_bio_next(&msg->bio_iter, &msg->bio_seg); #endif +} /* * Write as much message data payload as we can. If we finish, queue @@ -837,41 +1014,36 @@ static int write_partial_msg_pages(struct ceph_connection *con) bool do_datacrc = !con->msgr->nocrc; int ret; int total_max_write; - int in_trail = 0; - size_t trail_len = (msg->trail ? msg->trail->length : 0); + bool in_trail = false; + const size_t trail_len = (msg->trail ? msg->trail->length : 0); + const size_t trail_off = data_len - trail_len; dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", - con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, + con, msg, con->out_msg_pos.page, msg->nr_pages, con->out_msg_pos.page_pos); -#ifdef CONFIG_BLOCK - if (msg->bio && !msg->bio_iter) - init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); -#endif - + /* + * Iterate through each page that contains data to be + * written, and send as much as possible for each. + * + * If we are calculating the data crc (the default), we will + * need to map the page. If we have no pages, they have + * been revoked, so use the zero page. + */ while (data_len > con->out_msg_pos.data_pos) { struct page *page = NULL; int max_write = PAGE_SIZE; int bio_offset = 0; - total_max_write = data_len - trail_len - - con->out_msg_pos.data_pos; - - /* - * if we are calculating the data crc (the default), we need - * to map the page. if our pages[] has been revoked, use the - * zero page. - */ - - /* have we reached the trail part of the data? */ - if (con->out_msg_pos.data_pos >= data_len - trail_len) { - in_trail = 1; + in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off; + if (!in_trail) + total_max_write = trail_off - con->out_msg_pos.data_pos; + if (in_trail) { total_max_write = data_len - con->out_msg_pos.data_pos; page = list_first_entry(&msg->trail->head, struct page, lru); - max_write = PAGE_SIZE; } else if (msg->pages) { page = msg->pages[con->out_msg_pos.page]; } else if (msg->pagelist) { @@ -894,52 +1066,32 @@ static int write_partial_msg_pages(struct ceph_connection *con) if (do_datacrc && !con->out_msg_pos.did_page_crc) { void *base; - u32 crc; - u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); + u32 crc = le32_to_cpu(msg->footer.data_crc); char *kaddr; kaddr = kmap(page); BUG_ON(kaddr == NULL); base = kaddr + con->out_msg_pos.page_pos + bio_offset; - crc = crc32c(tmpcrc, base, len); - con->out_msg->footer.data_crc = cpu_to_le32(crc); + crc = crc32c(crc, base, len); + kunmap(page); + msg->footer.data_crc = cpu_to_le32(crc); con->out_msg_pos.did_page_crc = true; } ret = ceph_tcp_sendpage(con->sock, page, con->out_msg_pos.page_pos + bio_offset, len, 1); - - if (do_datacrc) - kunmap(page); - if (ret <= 0) goto out; - con->out_msg_pos.data_pos += ret; - con->out_msg_pos.page_pos += ret; - if (ret == len) { - con->out_msg_pos.page_pos = 0; - con->out_msg_pos.page++; - con->out_msg_pos.did_page_crc = false; - if (in_trail) - list_move_tail(&page->lru, - &msg->trail->head); - else if (msg->pagelist) - list_move_tail(&page->lru, - &msg->pagelist->head); -#ifdef CONFIG_BLOCK - else if (msg->bio) - iter_bio_next(&msg->bio_iter, &msg->bio_seg); -#endif - } + out_msg_pos_next(con, page, len, (size_t) ret, in_trail); } dout("write_partial_msg_pages %p msg %p done\n", con, msg); /* prepare and queue up footer, too */ if (!do_datacrc) - con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; - ceph_con_out_kvec_reset(con); + msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; + con_out_kvec_reset(con); prepare_write_message_footer(con); ret = 1; out: @@ -1351,20 +1503,14 @@ static int process_banner(struct ceph_connection *con) ceph_pr_addr(&con->msgr->inst.addr.in_addr)); } - set_bit(NEGOTIATING, &con->state); - prepare_read_connect(con); return 0; } static void fail_protocol(struct ceph_connection *con) { reset_connection(con); - set_bit(CLOSED, &con->state); /* in case there's queued work */ - - mutex_unlock(&con->mutex); - if (con->ops->bad_proto) - con->ops->bad_proto(con); - mutex_lock(&con->mutex); + BUG_ON(con->state != CON_STATE_NEGOTIATING); + con->state = CON_STATE_CLOSED; } static int process_connect(struct ceph_connection *con) @@ -1407,7 +1553,7 @@ static int process_connect(struct ceph_connection *con) return -1; } con->auth_retry = 1; - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1428,7 +1574,7 @@ static int process_connect(struct ceph_connection *con) ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr)); reset_connection(con); - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1440,8 +1586,7 @@ static int process_connect(struct ceph_connection *con) if (con->ops->peer_reset) con->ops->peer_reset(con); mutex_lock(&con->mutex); - if (test_bit(CLOSED, &con->state) || - test_bit(OPENING, &con->state)) + if (con->state != CON_STATE_NEGOTIATING) return -EAGAIN; break; @@ -1454,7 +1599,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->out_connect.connect_seq), le32_to_cpu(con->in_reply.connect_seq)); con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1471,7 +1616,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.global_seq)); get_global_seq(con->msgr, le32_to_cpu(con->in_reply.global_seq)); - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1489,7 +1634,10 @@ static int process_connect(struct ceph_connection *con) fail_protocol(con); return -1; } - clear_bit(CONNECTING, &con->state); + + BUG_ON(con->state != CON_STATE_NEGOTIATING); + con->state = CON_STATE_OPEN; + con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); con->connect_seq++; con->peer_features = server_feat; @@ -1501,7 +1649,9 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.connect_seq)); if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) - set_bit(LOSSYTX, &con->state); + set_bit(CON_FLAG_LOSSYTX, &con->flags); + + con->delay = 0; /* reset backoff memory */ prepare_read_tag(con); break; @@ -1587,10 +1737,7 @@ static int read_partial_message_section(struct ceph_connection *con, return 1; } -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, - struct ceph_msg_header *hdr, - int *skip); - +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); static int read_partial_message_pages(struct ceph_connection *con, struct page **pages, @@ -1633,9 +1780,6 @@ static int read_partial_message_bio(struct ceph_connection *con, void *p; int ret, left; - if (IS_ERR(bv)) - return PTR_ERR(bv); - left = min((int)(data_len - con->in_msg_pos.data_pos), (int)(bv->bv_len - con->in_msg_pos.page_pos)); @@ -1672,7 +1816,6 @@ static int read_partial_message(struct ceph_connection *con) int ret; unsigned int front_len, middle_len, data_len; bool do_datacrc = !con->msgr->nocrc; - int skip; u64 seq; u32 crc; @@ -1723,10 +1866,13 @@ static int read_partial_message(struct ceph_connection *con) /* allocate message? */ if (!con->in_msg) { + int skip = 0; + dout("got hdr type %d front %d data %d\n", con->in_hdr.type, con->in_hdr.front_len, con->in_hdr.data_len); - skip = 0; - con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); + ret = ceph_con_in_msg_alloc(con, &skip); + if (ret < 0) + return ret; if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); @@ -1737,11 +1883,9 @@ static int read_partial_message(struct ceph_connection *con) con->in_seq++; return 0; } - if (!con->in_msg) { - con->error_msg = - "error allocating memory for incoming message"; - return -ENOMEM; - } + + BUG_ON(!con->in_msg); + BUG_ON(con->in_msg->con != con); m = con->in_msg; m->front.iov_len = 0; /* haven't read it yet */ if (m->middle) @@ -1753,6 +1897,11 @@ static int read_partial_message(struct ceph_connection *con) else con->in_msg_pos.page_pos = 0; con->in_msg_pos.data_pos = 0; + +#ifdef CONFIG_BLOCK + if (m->bio) + init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); +#endif } /* front */ @@ -1769,10 +1918,6 @@ static int read_partial_message(struct ceph_connection *con) if (ret <= 0) return ret; } -#ifdef CONFIG_BLOCK - if (m->bio && !m->bio_iter) - init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); -#endif /* (page) data */ while (con->in_msg_pos.data_pos < data_len) { @@ -1783,7 +1928,7 @@ static int read_partial_message(struct ceph_connection *con) return ret; #ifdef CONFIG_BLOCK } else if (m->bio) { - + BUG_ON(!m->bio_iter); ret = read_partial_message_bio(con, &m->bio_iter, &m->bio_seg, data_len, do_datacrc); @@ -1837,8 +1982,11 @@ static void process_message(struct ceph_connection *con) { struct ceph_msg *msg; + BUG_ON(con->in_msg->con != con); + con->in_msg->con = NULL; msg = con->in_msg; con->in_msg = NULL; + con->ops->put(con); /* if first message, set peer_name */ if (con->peer_name.type == 0) @@ -1858,7 +2006,6 @@ static void process_message(struct ceph_connection *con) con->ops->dispatch(con, msg); mutex_lock(&con->mutex); - prepare_read_tag(con); } @@ -1870,22 +2017,19 @@ static int try_write(struct ceph_connection *con) { int ret = 1; - dout("try_write start %p state %lu nref %d\n", con, con->state, - atomic_read(&con->nref)); + dout("try_write start %p state %lu\n", con, con->state); more: dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); /* open the socket first? */ - if (con->sock == NULL) { - ceph_con_out_kvec_reset(con); + if (con->state == CON_STATE_PREOPEN) { + BUG_ON(con->sock); + con->state = CON_STATE_CONNECTING; + + con_out_kvec_reset(con); prepare_write_banner(con); - ret = prepare_write_connect(con); - if (ret < 0) - goto out; prepare_read_banner(con); - set_bit(CONNECTING, &con->state); - clear_bit(NEGOTIATING, &con->state); BUG_ON(con->in_msg); con->in_tag = CEPH_MSGR_TAG_READY; @@ -1932,7 +2076,7 @@ more_kvec: } do_next: - if (!test_bit(CONNECTING, &con->state)) { + if (con->state == CON_STATE_OPEN) { /* is anything else pending? */ if (!list_empty(&con->out_queue)) { prepare_write_message(con); @@ -1942,14 +2086,15 @@ do_next: prepare_write_ack(con); goto more; } - if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { + if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING, + &con->flags)) { prepare_write_keepalive(con); goto more; } } /* Nothing to do! */ - clear_bit(WRITE_PENDING, &con->state); + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); dout("try_write nothing else to write.\n"); ret = 0; out: @@ -1966,38 +2111,46 @@ static int try_read(struct ceph_connection *con) { int ret = -1; - if (!con->sock) - return 0; - - if (test_bit(STANDBY, &con->state)) +more: + dout("try_read start on %p state %lu\n", con, con->state); + if (con->state != CON_STATE_CONNECTING && + con->state != CON_STATE_NEGOTIATING && + con->state != CON_STATE_OPEN) return 0; - dout("try_read start on %p\n", con); + BUG_ON(!con->sock); -more: dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, con->in_base_pos); - /* - * process_connect and process_message drop and re-take - * con->mutex. make sure we handle a racing close or reopen. - */ - if (test_bit(CLOSED, &con->state) || - test_bit(OPENING, &con->state)) { - ret = -EAGAIN; + if (con->state == CON_STATE_CONNECTING) { + dout("try_read connecting\n"); + ret = read_partial_banner(con); + if (ret <= 0) + goto out; + ret = process_banner(con); + if (ret < 0) + goto out; + + BUG_ON(con->state != CON_STATE_CONNECTING); + con->state = CON_STATE_NEGOTIATING; + + /* + * Received banner is good, exchange connection info. + * Do not reset out_kvec, as sending our banner raced + * with receiving peer banner after connect completed. + */ + ret = prepare_write_connect(con); + if (ret < 0) + goto out; + prepare_read_connect(con); + + /* Send connection info before awaiting response */ goto out; } - if (test_bit(CONNECTING, &con->state)) { - if (!test_bit(NEGOTIATING, &con->state)) { - dout("try_read connecting\n"); - ret = read_partial_banner(con); - if (ret <= 0) - goto out; - ret = process_banner(con); - if (ret < 0) - goto out; - } + if (con->state == CON_STATE_NEGOTIATING) { + dout("try_read negotiating\n"); ret = read_partial_connect(con); if (ret <= 0) goto out; @@ -2007,6 +2160,8 @@ more: goto more; } + BUG_ON(con->state != CON_STATE_OPEN); + if (con->in_base_pos < 0) { /* * skipping + discarding content. @@ -2040,7 +2195,8 @@ more: prepare_read_ack(con); break; case CEPH_MSGR_TAG_CLOSE: - set_bit(CLOSED, &con->state); /* fixme */ + con_close_socket(con); + con->state = CON_STATE_CLOSED; goto out; default: goto bad_tag; @@ -2063,6 +2219,8 @@ more: if (con->in_tag == CEPH_MSGR_TAG_READY) goto more; process_message(con); + if (con->state == CON_STATE_OPEN) + prepare_read_tag(con); goto more; } if (con->in_tag == CEPH_MSGR_TAG_ACK) { @@ -2091,12 +2249,6 @@ bad_tag: */ static void queue_con(struct ceph_connection *con) { - if (test_bit(DEAD, &con->state)) { - dout("queue_con %p ignoring: DEAD\n", - con); - return; - } - if (!con->ops->get(con)) { dout("queue_con %p ref count 0\n", con); return; @@ -2121,7 +2273,26 @@ static void con_work(struct work_struct *work) mutex_lock(&con->mutex); restart: - if (test_and_clear_bit(BACKOFF, &con->state)) { + if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { + switch (con->state) { + case CON_STATE_CONNECTING: + con->error_msg = "connection failed"; + break; + case CON_STATE_NEGOTIATING: + con->error_msg = "negotiation failed"; + break; + case CON_STATE_OPEN: + con->error_msg = "socket closed"; + break; + default: + dout("unrecognized con state %d\n", (int)con->state); + con->error_msg = "unrecognized con state"; + BUG(); + } + goto fault; + } + + if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); if (queue_delayed_work(ceph_msgr_wq, &con->work, round_jiffies_relative(con->delay))) { @@ -2135,35 +2306,35 @@ restart: } } - if (test_bit(STANDBY, &con->state)) { + if (con->state == CON_STATE_STANDBY) { dout("con_work %p STANDBY\n", con); goto done; } - if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ - dout("con_work CLOSED\n"); - con_close_socket(con); + if (con->state == CON_STATE_CLOSED) { + dout("con_work %p CLOSED\n", con); + BUG_ON(con->sock); goto done; } - if (test_and_clear_bit(OPENING, &con->state)) { - /* reopen w/ new peer */ + if (con->state == CON_STATE_PREOPEN) { dout("con_work OPENING\n"); - con_close_socket(con); + BUG_ON(con->sock); } - if (test_and_clear_bit(SOCK_CLOSED, &con->state)) - goto fault; - ret = try_read(con); if (ret == -EAGAIN) goto restart; - if (ret < 0) + if (ret < 0) { + con->error_msg = "socket error on read"; goto fault; + } ret = try_write(con); if (ret == -EAGAIN) goto restart; - if (ret < 0) + if (ret < 0) { + con->error_msg = "socket error on write"; goto fault; + } done: mutex_unlock(&con->mutex); @@ -2172,7 +2343,6 @@ done_unlocked: return; fault: - mutex_unlock(&con->mutex); ceph_fault(con); /* error/fault path */ goto done_unlocked; } @@ -2183,26 +2353,31 @@ fault: * exponential backoff */ static void ceph_fault(struct ceph_connection *con) + __releases(con->mutex) { pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); - if (test_bit(LOSSYTX, &con->state)) { - dout("fault on LOSSYTX channel\n"); - goto out; - } - - mutex_lock(&con->mutex); - if (test_bit(CLOSED, &con->state)) - goto out_unlock; + BUG_ON(con->state != CON_STATE_CONNECTING && + con->state != CON_STATE_NEGOTIATING && + con->state != CON_STATE_OPEN); con_close_socket(con); + if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) { + dout("fault on LOSSYTX channel, marking CLOSED\n"); + con->state = CON_STATE_CLOSED; + goto out_unlock; + } + if (con->in_msg) { + BUG_ON(con->in_msg->con != con); + con->in_msg->con = NULL; ceph_msg_put(con->in_msg); con->in_msg = NULL; + con->ops->put(con); } /* Requeue anything that hasn't been acked */ @@ -2211,12 +2386,13 @@ static void ceph_fault(struct ceph_connection *con) /* If there are no messages queued or keepalive pending, place * the connection in a STANDBY state */ if (list_empty(&con->out_queue) && - !test_bit(KEEPALIVE_PENDING, &con->state)) { + !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) { dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); - clear_bit(WRITE_PENDING, &con->state); - set_bit(STANDBY, &con->state); + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con->state = CON_STATE_STANDBY; } else { /* retry after a delay. */ + con->state = CON_STATE_PREOPEN; if (con->delay == 0) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) @@ -2237,13 +2413,12 @@ static void ceph_fault(struct ceph_connection *con) * that when con_work restarts we schedule the * delay then. */ - set_bit(BACKOFF, &con->state); + set_bit(CON_FLAG_BACKOFF, &con->flags); } } out_unlock: mutex_unlock(&con->mutex); -out: /* * in case we faulted due to authentication, invalidate our * current tickets so that we can get new ones. @@ -2260,18 +2435,14 @@ out: /* - * create a new messenger instance + * initialize a new messenger instance */ -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, - u32 supported_features, - u32 required_features) +void ceph_messenger_init(struct ceph_messenger *msgr, + struct ceph_entity_addr *myaddr, + u32 supported_features, + u32 required_features, + bool nocrc) { - struct ceph_messenger *msgr; - - msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); - if (msgr == NULL) - return ERR_PTR(-ENOMEM); - msgr->supported_features = supported_features; msgr->required_features = required_features; @@ -2284,30 +2455,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, msgr->inst.addr.type = 0; get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); encode_my_addr(msgr); + msgr->nocrc = nocrc; - dout("messenger_create %p\n", msgr); - return msgr; -} -EXPORT_SYMBOL(ceph_messenger_create); + atomic_set(&msgr->stopping, 0); -void ceph_messenger_destroy(struct ceph_messenger *msgr) -{ - dout("destroy %p\n", msgr); - kfree(msgr); - dout("destroyed messenger %p\n", msgr); + dout("%s %p\n", __func__, msgr); } -EXPORT_SYMBOL(ceph_messenger_destroy); +EXPORT_SYMBOL(ceph_messenger_init); static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ - if (test_and_clear_bit(STANDBY, &con->state)) { - mutex_lock(&con->mutex); + if (con->state == CON_STATE_STANDBY) { dout("clear_standby %p and ++connect_seq\n", con); + con->state = CON_STATE_PREOPEN; con->connect_seq++; - WARN_ON(test_bit(WRITE_PENDING, &con->state)); - WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); - mutex_unlock(&con->mutex); + WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags)); + WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)); } } @@ -2316,21 +2480,24 @@ static void clear_standby(struct ceph_connection *con) */ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) { - if (test_bit(CLOSED, &con->state)) { - dout("con_send %p closed, dropping %p\n", con, msg); - ceph_msg_put(msg); - return; - } - /* set src+dst */ msg->hdr.src = con->msgr->inst.name; - BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); - msg->needs_out_seq = true; - /* queue */ mutex_lock(&con->mutex); + + if (con->state == CON_STATE_CLOSED) { + dout("con_send %p closed, dropping %p\n", con, msg); + ceph_msg_put(msg); + mutex_unlock(&con->mutex); + return; + } + + BUG_ON(msg->con != NULL); + msg->con = con->ops->get(con); + BUG_ON(msg->con == NULL); + BUG_ON(!list_empty(&msg->list_head)); list_add_tail(&msg->list_head, &con->out_queue); dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, @@ -2339,12 +2506,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) le32_to_cpu(msg->hdr.front_len), le32_to_cpu(msg->hdr.middle_len), le32_to_cpu(msg->hdr.data_len)); + + clear_standby(con); mutex_unlock(&con->mutex); /* if there wasn't anything waiting to send before, queue * new work */ - clear_standby(con); - if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) + if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_send); @@ -2352,24 +2520,34 @@ EXPORT_SYMBOL(ceph_con_send); /* * Revoke a message that was previously queued for send */ -void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) +void ceph_msg_revoke(struct ceph_msg *msg) { + struct ceph_connection *con = msg->con; + + if (!con) + return; /* Message not in our possession */ + mutex_lock(&con->mutex); if (!list_empty(&msg->list_head)) { - dout("con_revoke %p msg %p - was on queue\n", con, msg); + dout("%s %p msg %p - was on queue\n", __func__, con, msg); list_del_init(&msg->list_head); - ceph_msg_put(msg); + BUG_ON(msg->con == NULL); + msg->con->ops->put(msg->con); + msg->con = NULL; msg->hdr.seq = 0; + + ceph_msg_put(msg); } if (con->out_msg == msg) { - dout("con_revoke %p msg %p - was sending\n", con, msg); + dout("%s %p msg %p - was sending\n", __func__, con, msg); con->out_msg = NULL; if (con->out_kvec_is_msg) { con->out_skip = con->out_kvec_bytes; con->out_kvec_is_msg = false; } - ceph_msg_put(msg); msg->hdr.seq = 0; + + ceph_msg_put(msg); } mutex_unlock(&con->mutex); } @@ -2377,17 +2555,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) /* * Revoke a message that we may be reading data into */ -void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) +void ceph_msg_revoke_incoming(struct ceph_msg *msg) { + struct ceph_connection *con; + + BUG_ON(msg == NULL); + if (!msg->con) { + dout("%s msg %p null con\n", __func__, msg); + + return; /* Message not in our possession */ + } + + con = msg->con; mutex_lock(&con->mutex); - if (con->in_msg && con->in_msg == msg) { + if (con->in_msg == msg) { unsigned int front_len = le32_to_cpu(con->in_hdr.front_len); unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len); unsigned int data_len = le32_to_cpu(con->in_hdr.data_len); /* skip rest of message */ - dout("con_revoke_pages %p msg %p revoked\n", con, msg); - con->in_base_pos = con->in_base_pos - + dout("%s %p msg %p revoked\n", __func__, con, msg); + con->in_base_pos = con->in_base_pos - sizeof(struct ceph_msg_header) - front_len - middle_len - @@ -2398,8 +2586,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; } else { - dout("con_revoke_pages %p msg %p pages %p no-op\n", - con, con->in_msg, msg); + dout("%s %p in_msg %p msg %p no-op\n", + __func__, con, con->in_msg, msg); } mutex_unlock(&con->mutex); } @@ -2410,9 +2598,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) void ceph_con_keepalive(struct ceph_connection *con) { dout("con_keepalive %p\n", con); + mutex_lock(&con->mutex); clear_standby(con); - if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && - test_and_set_bit(WRITE_PENDING, &con->state) == 0) + mutex_unlock(&con->mutex); + if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 && + test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); @@ -2431,6 +2621,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, if (m == NULL) goto out; kref_init(&m->kref); + + m->con = NULL; INIT_LIST_HEAD(&m->list_head); m->hdr.tid = 0; @@ -2526,46 +2718,77 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) } /* - * Generic message allocator, for incoming messages. + * Allocate a message for receiving an incoming message on a + * connection, and save the result in con->in_msg. Uses the + * connection's private alloc_msg op if available. + * + * Returns 0 on success, or a negative error code. + * + * On success, if we set *skip = 1: + * - the next message should be skipped and ignored. + * - con->in_msg == NULL + * or if we set *skip = 0: + * - con->in_msg is non-null. + * On error (ENOMEM, EAGAIN, ...), + * - con->in_msg == NULL */ -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, - struct ceph_msg_header *hdr, - int *skip) +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) { + struct ceph_msg_header *hdr = &con->in_hdr; int type = le16_to_cpu(hdr->type); int front_len = le32_to_cpu(hdr->front_len); int middle_len = le32_to_cpu(hdr->middle_len); - struct ceph_msg *msg = NULL; - int ret; + int ret = 0; + + BUG_ON(con->in_msg != NULL); if (con->ops->alloc_msg) { + struct ceph_msg *msg; + mutex_unlock(&con->mutex); msg = con->ops->alloc_msg(con, hdr, skip); mutex_lock(&con->mutex); - if (!msg || *skip) - return NULL; + if (con->state != CON_STATE_OPEN) { + ceph_msg_put(msg); + return -EAGAIN; + } + con->in_msg = msg; + if (con->in_msg) { + con->in_msg->con = con->ops->get(con); + BUG_ON(con->in_msg->con == NULL); + } + if (*skip) { + con->in_msg = NULL; + return 0; + } + if (!con->in_msg) { + con->error_msg = + "error allocating memory for incoming message"; + return -ENOMEM; + } } - if (!msg) { - *skip = 0; - msg = ceph_msg_new(type, front_len, GFP_NOFS, false); - if (!msg) { + if (!con->in_msg) { + con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); + if (!con->in_msg) { pr_err("unable to allocate msg type %d len %d\n", type, front_len); - return NULL; + return -ENOMEM; } - msg->page_alignment = le16_to_cpu(hdr->data_off); + con->in_msg->con = con->ops->get(con); + BUG_ON(con->in_msg->con == NULL); + con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); } - memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); + memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); - if (middle_len && !msg->middle) { - ret = ceph_alloc_middle(con, msg); + if (middle_len && !con->in_msg->middle) { + ret = ceph_alloc_middle(con, con->in_msg); if (ret < 0) { - ceph_msg_put(msg); - return NULL; + ceph_msg_put(con->in_msg); + con->in_msg = NULL; } } - return msg; + return ret; } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d0649a9655be..900ea0f043fc 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) monc->pending_auth = 1; monc->m_auth->front.iov_len = len; monc->m_auth->hdr.front_len = cpu_to_le32(len); - ceph_con_revoke(monc->con, monc->m_auth); + ceph_msg_revoke(monc->m_auth); ceph_msg_get(monc->m_auth); /* keep our ref */ - ceph_con_send(monc->con, monc->m_auth); + ceph_con_send(&monc->con, monc->m_auth); } /* @@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) static void __close_session(struct ceph_mon_client *monc) { dout("__close_session closing mon%d\n", monc->cur_mon); - ceph_con_revoke(monc->con, monc->m_auth); - ceph_con_close(monc->con); + ceph_msg_revoke(monc->m_auth); + ceph_msg_revoke_incoming(monc->m_auth_reply); + ceph_msg_revoke(monc->m_subscribe); + ceph_msg_revoke_incoming(monc->m_subscribe_ack); + ceph_con_close(&monc->con); monc->cur_mon = -1; monc->pending_auth = 0; ceph_auth_reset(monc->auth); @@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc) monc->want_next_osdmap = !!monc->want_next_osdmap; dout("open_session mon%d opening\n", monc->cur_mon); - monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON; - monc->con->peer_name.num = cpu_to_le64(monc->cur_mon); - ceph_con_open(monc->con, + ceph_con_open(&monc->con, + CEPH_ENTITY_TYPE_MON, monc->cur_mon, &monc->monmap->mon_inst[monc->cur_mon].addr); /* initiatiate authentication handshake */ @@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - ceph_con_revoke(monc->con, msg); - ceph_con_send(monc->con, ceph_msg_get(msg)); + ceph_msg_revoke(msg); + ceph_con_send(&monc->con, ceph_msg_get(msg)); monc->sub_sent = jiffies | 1; /* never 0 */ } @@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, if (monc->hunting) { pr_info("mon%d %s session established\n", monc->cur_mon, - ceph_pr_addr(&monc->con->peer_addr.in_addr)); + ceph_pr_addr(&monc->con.peer_addr.in_addr)); monc->hunting = false; } dout("handle_subscribe_ack after %d seconds\n", seconds); @@ -309,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) EXPORT_SYMBOL(ceph_monc_open_session); /* + * We require the fsid and global_id in order to initialize our + * debugfs dir. + */ +static bool have_debugfs_info(struct ceph_mon_client *monc) +{ + dout("have_debugfs_info fsid %d globalid %lld\n", + (int)monc->client->have_fsid, monc->auth->global_id); + return monc->client->have_fsid && monc->auth->global_id > 0; +} + +/* * The monitor responds with mount ack indicate mount success. The * included client ticket allows the client to talk to MDSs and OSDs. */ @@ -318,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, struct ceph_client *client = monc->client; struct ceph_monmap *monmap = NULL, *old = monc->monmap; void *p, *end; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); + dout("handle_monmap\n"); p = msg->front.iov_base; end = p + msg->front.iov_len; @@ -342,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, if (!client->have_fsid) { client->have_fsid = true; + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); - /* - * do debugfs initialization without mutex to avoid - * creating a locking dependency - */ - ceph_debugfs_client_init(client); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } + goto out_unlocked; } out: @@ -439,6 +465,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, m = NULL; } else { dout("get_generic_reply %lld got %p\n", tid, req->reply); + *skip = 0; m = ceph_msg_get(req->reply); /* * we don't need to track the connection reading into @@ -461,7 +488,7 @@ static int do_generic_request(struct ceph_mon_client *monc, req->request->hdr.tid = cpu_to_le64(req->tid); __insert_generic_request(monc, req); monc->num_generic_requests++; - ceph_con_send(monc->con, ceph_msg_get(req->request)); + ceph_con_send(&monc->con, ceph_msg_get(req->request)); mutex_unlock(&monc->mutex); err = wait_for_completion_interruptible(&req->completion); @@ -684,8 +711,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc) for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { req = rb_entry(p, struct ceph_mon_generic_request, node); - ceph_con_revoke(monc->con, req->request); - ceph_con_send(monc->con, ceph_msg_get(req->request)); + ceph_msg_revoke(req->request); + ceph_msg_revoke_incoming(req->reply); + ceph_con_send(&monc->con, ceph_msg_get(req->request)); } } @@ -705,7 +733,7 @@ static void delayed_work(struct work_struct *work) __close_session(monc); __open_session(monc); /* continue hunting */ } else { - ceph_con_keepalive(monc->con); + ceph_con_keepalive(&monc->con); __validate_auth(monc); @@ -760,19 +788,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) goto out; /* connection */ - monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); - if (!monc->con) - goto out_monmap; - ceph_con_init(monc->client->msgr, monc->con); - monc->con->private = monc; - monc->con->ops = &mon_con_ops; - /* authentication */ monc->auth = ceph_auth_init(cl->options->name, cl->options->key); if (IS_ERR(monc->auth)) { err = PTR_ERR(monc->auth); - goto out_con; + goto out_monmap; } monc->auth->want_keys = CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | @@ -801,6 +822,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) if (!monc->m_auth) goto out_auth_reply; + ceph_con_init(&monc->con, monc, &mon_con_ops, + &monc->client->msgr); + monc->cur_mon = -1; monc->hunting = true; monc->sub_renew_after = jiffies; @@ -824,8 +848,6 @@ out_subscribe_ack: ceph_msg_put(monc->m_subscribe_ack); out_auth: ceph_auth_destroy(monc->auth); -out_con: - monc->con->ops->put(monc->con); out_monmap: kfree(monc->monmap); out: @@ -841,10 +863,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc) mutex_lock(&monc->mutex); __close_session(monc); - monc->con->private = NULL; - monc->con->ops->put(monc->con); - monc->con = NULL; - mutex_unlock(&monc->mutex); /* @@ -871,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc, { int ret; int was_auth = 0; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); if (monc->auth->ops) was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; @@ -888,14 +908,29 @@ static void handle_auth_reply(struct ceph_mon_client *monc, } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); - monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; - monc->client->msgr->inst.name.num = + monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; + monc->client->msgr.inst.name.num = cpu_to_le64(monc->auth->global_id); __send_subscribe(monc); __resend_generic_request(monc); } + + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } } static int __validate_auth(struct ceph_mon_client *monc) @@ -1000,6 +1035,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: m = ceph_msg_new(type, front_len, GFP_NOFS, false); + if (!m) + return NULL; /* ENOMEM--return skip == 0 */ break; } @@ -1029,7 +1066,7 @@ static void mon_fault(struct ceph_connection *con) if (!monc->hunting) pr_info("mon%d %s session lost, " "hunting for new mon\n", monc->cur_mon, - ceph_pr_addr(&monc->con->peer_addr.in_addr)); + ceph_pr_addr(&monc->con.peer_addr.in_addr)); __close_session(monc); if (!monc->hunting) { @@ -1044,9 +1081,23 @@ out: mutex_unlock(&monc->mutex); } +/* + * We can ignore refcounting on the connection struct, as all references + * will come from the messenger workqueue, which is drained prior to + * mon_client destruction. + */ +static struct ceph_connection *con_get(struct ceph_connection *con) +{ + return con; +} + +static void con_put(struct ceph_connection *con) +{ +} + static const struct ceph_connection_operations mon_con_ops = { - .get = ceph_con_get, - .put = ceph_con_put, + .get = con_get, + .put = con_put, .dispatch = dispatch, .fault = mon_fault, .alloc_msg = mon_alloc_msg, diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index 11d5f4196a73..ddec1c10ac80 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg) struct ceph_msgpool *pool = arg; struct ceph_msg *msg; - msg = ceph_msg_new(0, pool->front_len, gfp_mask, true); + msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true); if (!msg) { dout("msgpool_alloc %s failed\n", pool->name); } else { @@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg) ceph_msg_put(msg); } -int ceph_msgpool_init(struct ceph_msgpool *pool, +int ceph_msgpool_init(struct ceph_msgpool *pool, int type, int front_len, int size, bool blocking, const char *name) { dout("msgpool %s init\n", name); + pool->type = type; pool->front_len = front_len; pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) @@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, WARN_ON(1); /* try to alloc a fresh message */ - return ceph_msg_new(0, front_len, GFP_NOFS, false); + return ceph_msg_new(pool->type, front_len, GFP_NOFS, false); } msg = mempool_alloc(pool->pool, GFP_NOFS); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ca59e66c9787..42119c05e82c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kref *kref) if (req->r_request) ceph_msg_put(req->r_request); if (req->r_con_filling_msg) { - dout("release_request revoking pages %p from con %p\n", + dout("%s revoking pages %p from con %p\n", __func__, req->r_pages, req->r_con_filling_msg); - ceph_con_revoke_message(req->r_con_filling_msg, - req->r_reply); + ceph_msg_revoke_incoming(req->r_reply); req->r_con_filling_msg->ops->put(req->r_con_filling_msg); } if (req->r_reply) @@ -214,10 +213,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); + rb_init_node(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); INIT_LIST_HEAD(&req->r_req_lru_item); + INIT_LIST_HEAD(&req->r_osd_item); + req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); @@ -243,6 +245,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } ceph_pagelist_init(req->r_trail); } + /* create request message; allow space for oid */ msg_size += MAX_OBJ_NAME_SIZE; if (snapc) @@ -256,7 +259,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, return NULL; } - msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; @@ -624,7 +626,7 @@ static void osd_reset(struct ceph_connection *con) /* * Track open sessions with osds. */ -static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) +static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) { struct ceph_osd *osd; @@ -634,15 +636,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) atomic_set(&osd->o_ref, 1); osd->o_osdc = osdc; + osd->o_osd = onum; INIT_LIST_HEAD(&osd->o_requests); INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_osd_lru); osd->o_incarnation = 1; - ceph_con_init(osdc->client->msgr, &osd->o_con); - osd->o_con.private = osd; - osd->o_con.ops = &osd_con_ops; - osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); INIT_LIST_HEAD(&osd->o_keepalive_item); return osd; @@ -688,7 +688,7 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) static void remove_all_osds(struct ceph_osd_client *osdc) { - dout("__remove_old_osds %p\n", osdc); + dout("%s %p\n", __func__, osdc); mutex_lock(&osdc->request_mutex); while (!RB_EMPTY_ROOT(&osdc->osds)) { struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), @@ -752,7 +752,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) ret = -EAGAIN; } else { ceph_con_close(&osd->o_con); - ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, + &osdc->osdmap->osd_addr[osd->o_osd]); osd->o_incarnation++; } return ret; @@ -853,7 +854,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, if (req->r_osd) { /* make sure the original request isn't in flight. */ - ceph_con_revoke(&req->r_osd->o_con, req->r_request); + ceph_msg_revoke(req->r_request); list_del_init(&req->r_osd_item); if (list_empty(&req->r_osd->o_requests) && @@ -880,7 +881,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, static void __cancel_request(struct ceph_osd_request *req) { if (req->r_sent && req->r_osd) { - ceph_con_revoke(&req->r_osd->o_con, req->r_request); + ceph_msg_revoke(req->r_request); req->r_sent = 0; } } @@ -890,7 +891,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc, { dout("__register_linger_request %p\n", req); list_add_tail(&req->r_linger_item, &osdc->req_linger); - list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests); + if (req->r_osd) + list_add_tail(&req->r_linger_osd, + &req->r_osd->o_linger_requests); } static void __unregister_linger_request(struct ceph_osd_client *osdc, @@ -998,18 +1001,18 @@ static int __map_request(struct ceph_osd_client *osdc, req->r_osd = __lookup_osd(osdc, o); if (!req->r_osd && o >= 0) { err = -ENOMEM; - req->r_osd = create_osd(osdc); + req->r_osd = create_osd(osdc, o); if (!req->r_osd) { list_move(&req->r_req_lru_item, &osdc->req_notarget); goto out; } dout("map_request osd %p is osd%d\n", req->r_osd, o); - req->r_osd->o_osd = o; - req->r_osd->o_con.peer_name.num = cpu_to_le64(o); __insert_osd(osdc, req->r_osd); - ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); + ceph_con_open(&req->r_osd->o_con, + CEPH_ENTITY_TYPE_OSD, o, + &osdc->osdmap->osd_addr[o]); } if (req->r_osd) { @@ -1304,8 +1307,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); mutex_lock(&osdc->request_mutex); - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { + for (p = rb_first(&osdc->requests); p; ) { req = rb_entry(p, struct ceph_osd_request, r_node); + p = rb_next(p); err = __map_request(osdc, req, force_resend); if (err < 0) continue; /* error */ @@ -1313,10 +1317,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%p tid %llu maps to no osd\n", req, req->r_tid); needmap++; /* request a newer map */ } else if (err > 0) { - dout("%p tid %llu requeued on osd%d\n", req, req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1); - if (!req->r_linger) + if (!req->r_linger) { + dout("%p tid %llu requeued on osd%d\n", req, + req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); req->r_flags |= CEPH_OSD_FLAG_RETRY; + } + } + if (req->r_linger && list_empty(&req->r_linger_item)) { + /* + * register as a linger so that we will + * re-submit below and get a new tid + */ + dout("%p tid %llu restart on osd%d\n", + req, req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); + __register_linger_request(osdc, req); + __unregister_request(osdc, req); } } @@ -1391,7 +1408,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) epoch, maplen); newmap = osdmap_apply_incremental(&p, next, osdc->osdmap, - osdc->client->msgr); + &osdc->client->msgr); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad; @@ -1839,11 +1856,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) if (!osdc->req_mempool) goto out; - err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, + err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP, + OSD_OP_FRONT_LEN, 10, true, "osd_op"); if (err < 0) goto out_mempool; - err = ceph_msgpool_init(&osdc->msgpool_op_reply, + err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY, OSD_OPREPLY_FRONT_LEN, 10, true, "osd_op_reply"); if (err < 0) @@ -2019,15 +2037,15 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (!req) { *skip = 1; m = NULL; - pr_info("get_reply unknown tid %llu from osd%d\n", tid, - osd->o_osd); + dout("get_reply unknown tid %llu from osd%d\n", tid, + osd->o_osd); goto out; } if (req->r_con_filling_msg) { - dout("get_reply revoking msg %p from old con %p\n", + dout("%s revoking msg %p from old con %p\n", __func__, req->r_reply, req->r_con_filling_msg); - ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); + ceph_msg_revoke_incoming(req->r_reply); req->r_con_filling_msg->ops->put(req->r_con_filling_msg); req->r_con_filling_msg = NULL; } @@ -2080,6 +2098,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, int type = le16_to_cpu(hdr->type); int front = le32_to_cpu(hdr->front_len); + *skip = 0; switch (type) { case CEPH_MSG_OSD_MAP: case CEPH_MSG_WATCH_NOTIFY: diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 81e3b84a77ef..3124b71a8883 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -135,6 +135,21 @@ bad: return -EINVAL; } +static int skip_name_map(void **p, void *end) +{ + int len; + ceph_decode_32_safe(p, end, len ,bad); + while (len--) { + int strlen; + *p += sizeof(u32); + ceph_decode_32_safe(p, end, strlen, bad); + *p += strlen; +} + return 0; +bad: + return -EINVAL; +} + static struct crush_map *crush_decode(void *pbyval, void *end) { struct crush_map *c; @@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) void **p = &pbyval; void *start = pbyval; u32 magic; + u32 num_name_maps; dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p)); @@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) if (c == NULL) return ERR_PTR(-ENOMEM); + /* set tunables to default values */ + c->choose_local_tries = 2; + c->choose_local_fallback_tries = 5; + c->choose_total_tries = 19; + ceph_decode_need(p, end, 4*sizeof(u32), bad); magic = ceph_decode_32(p); if (magic != CRUSH_MAGIC) { @@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end) } /* ignore trailing name maps. */ + for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) { + err = skip_name_map(p, end); + if (err < 0) + goto done; + } + + /* tunables */ + ceph_decode_need(p, end, 3*sizeof(u32), done); + c->choose_local_tries = ceph_decode_32(p); + c->choose_local_fallback_tries = ceph_decode_32(p); + c->choose_total_tries = ceph_decode_32(p); + dout("crush decode tunable choose_local_tries = %d", + c->choose_local_tries); + dout("crush decode tunable choose_local_fallback_tries = %d", + c->choose_local_fallback_tries); + dout("crush decode tunable choose_total_tries = %d", + c->choose_total_tries); +done: dout("crush_decode success\n"); return c; @@ -488,15 +527,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) ceph_decode_32_safe(p, end, pool, bad); ceph_decode_32_safe(p, end, len, bad); dout(" pool %d len %d\n", pool, len); + ceph_decode_need(p, end, len, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) { + char *name = kstrndup(*p, len, GFP_NOFS); + + if (!name) + return -ENOMEM; kfree(pi->name); - pi->name = kmalloc(len + 1, GFP_NOFS); - if (pi->name) { - memcpy(pi->name, *p, len); - pi->name[len] = '\0'; - dout(" name is %s\n", pi->name); - } + pi->name = name; + dout(" name is %s\n", pi->name); } *p += len; } @@ -666,6 +706,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); ceph_decode_copy(p, &pgid, sizeof(pgid)); n = ceph_decode_32(p); + err = -EINVAL; + if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) + goto bad; ceph_decode_need(p, end, n * sizeof(u32), bad); err = -ENOMEM; pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); @@ -889,6 +932,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, (void) __remove_pg_mapping(&map->pg_temp, pgid); /* insert */ + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { + err = -EINVAL; + goto bad; + } pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); if (!pg) { err = -ENOMEM; diff --git a/net/core/dev.c b/net/core/dev.c index 0ebaea16632f..1e0a1847c3bb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -959,18 +959,30 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_get_valid_name(struct net_device *dev, const char *name) +static int dev_alloc_name_ns(struct net *net, + struct net_device *dev, + const char *name) { - struct net *net; + char buf[IFNAMSIZ]; + int ret; - BUG_ON(!dev_net(dev)); - net = dev_net(dev); + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + +static int dev_get_valid_name(struct net *net, + struct net_device *dev, + const char *name) +{ + BUG_ON(!net); if (!dev_valid_name(name)) return -EINVAL; if (strchr(name, '%')) - return dev_alloc_name(dev, name); + return dev_alloc_name_ns(net, dev, name); else if (__dev_get_by_name(net, name)) return -EEXIST; else if (dev->name != name) @@ -1006,7 +1018,7 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - err = dev_get_valid_name(dev, newname); + err = dev_get_valid_name(net, dev, newname); if (err < 0) return err; @@ -1055,6 +1067,8 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { + char *new_ifalias; + ASSERT_RTNL(); if (len >= IFALIASZ) @@ -1068,9 +1082,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return 0; } - dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!dev->ifalias) + new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); + if (!new_ifalias) return -ENOMEM; + dev->ifalias = new_ifalias; strlcpy(dev->ifalias, alias, len+1); return len; @@ -1106,11 +1121,23 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); -int netdev_bonding_change(struct net_device *dev, unsigned long event) +/** + * netdev_notify_peers - notify network peers about existence of @dev + * @dev: network device + * + * Generate traffic such that interested network peers are aware of + * @dev, such as by generating a gratuitous ARP. This may be used when + * a device wants to inform the rest of the network about some sort of + * reconfiguration such as a failover event or virtual machine + * migration. + */ +void netdev_notify_peers(struct net_device *dev) { - return call_netdevice_notifiers(event, dev); + rtnl_lock(); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + rtnl_unlock(); } -EXPORT_SYMBOL(netdev_bonding_change); +EXPORT_SYMBOL(netdev_notify_peers); /** * dev_load - load a network module @@ -1172,6 +1199,7 @@ static int __dev_open(struct net_device *dev) net_dmaengine_get(); dev_set_rx_mode(dev); dev_activate(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); } return ret; @@ -1390,7 +1418,6 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } @@ -1432,7 +1459,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb) nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } unlock: @@ -1638,6 +1664,19 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } +static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) +{ + if (ptype->af_packet_priv == NULL) + return false; + + if (ptype->id_match) + return ptype->id_match(ptype, skb->sk); + else if ((struct sock *)ptype->af_packet_priv == skb->sk) + return true; + + return false; +} + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1655,8 +1694,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * they originated from - MvS (miquels@drinkel.ow.org) */ if ((ptype->dev == dev || !ptype->dev) && - (ptype->af_packet_priv == NULL || - (struct sock *)ptype->af_packet_priv != skb->sk)) { + (!skb_loop_sk(ptype, skb))) { if (pt_prev) { deliver_skb(skb2, pt_prev, skb->dev); pt_prev = ptype; @@ -2118,7 +2156,8 @@ static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { - if (!can_checksum_protocol(features, protocol)) { + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { @@ -2133,6 +2172,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) __be16 protocol = skb->protocol; netdev_features_t features = skb->dev->features; + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) + features &= ~NETIF_F_GSO_MASK; + if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; @@ -2155,9 +2197,7 @@ EXPORT_SYMBOL(netif_skb_features); /* * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG, or if - * at least one of fragments is in highmem and device does not - * support DMA from it. + * 2. skb is fragmented and the device does not support SG. */ static inline int skb_needs_linearize(struct sk_buff *skb, int features) @@ -2186,9 +2226,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && @@ -2223,6 +2260,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); @@ -2245,6 +2285,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(nskb, dev); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); @@ -2354,8 +2397,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) #endif } -static struct netdev_queue *dev_pick_tx(struct net_device *dev, - struct sk_buff *skb) +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb) { int queue_index; const struct net_device_ops *ops = dev->netdev_ops; @@ -2529,7 +2572,7 @@ int dev_queue_xmit(struct sk_buff *skb) skb_update_prio(skb); - txq = dev_pick_tx(dev, skb); + txq = netdev_pick_tx(dev, skb); q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT @@ -2602,6 +2645,8 @@ EXPORT_SYMBOL(dev_queue_xmit); =======================================================================*/ int netdev_max_backlog __read_mostly = 1000; +EXPORT_SYMBOL(netdev_max_backlog); + int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; int weight_p __read_mostly = 64; /* old backlog weight */ @@ -2628,15 +2673,16 @@ void __skb_get_rxhash(struct sk_buff *skb) if (!skb_flow_dissect(skb, &keys)) return; - if (keys.ports) { - if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]) - swap(keys.port16[0], keys.port16[1]); + if (keys.ports) skb->l4_rxhash = 1; - } /* get a consistent hash (same value on both flow directions) */ - if ((__force u32)keys.dst < (__force u32)keys.src) + if (((__force u32)keys.dst < (__force u32)keys.src) || + (((__force u32)keys.dst == (__force u32)keys.src) && + ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { swap(keys.dst, keys.src); + swap(keys.port16[0], keys.port16[1]); + } hash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src, @@ -3155,6 +3201,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +/* + * Limit the use of PFMEMALLOC reserves to those protocols that implement + * the special handling of PFMEMALLOC skbs. + */ +static bool skb_pfmemalloc_protocol(struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + return true; + default: + return false; + } +} + static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3164,14 +3227,27 @@ static int __netif_receive_skb(struct sk_buff *skb) bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + current->flags |= PF_MEMALLOC; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; orig_dev = skb->dev; @@ -3191,7 +3267,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { skb = vlan_untag(skb); if (unlikely(!skb)) - goto out; + goto unlock; } #ifdef CONFIG_NET_CLS_ACT @@ -3201,6 +3277,9 @@ another_round: } #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) @@ -3209,13 +3288,18 @@ another_round: } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; ncls: #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb) + && !skb_pfmemalloc_protocol(skb)) + goto drop; + rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { @@ -3225,7 +3309,7 @@ ncls: if (vlan_do_receive(&skb, !rx_handler)) goto another_round; else if (unlikely(!skb)) - goto out; + goto unlock; } if (rx_handler) { @@ -3235,7 +3319,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: - goto out; + goto unlock; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3264,10 +3348,11 @@ ncls: if (pt_prev) { if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - ret = -ENOMEM; + goto drop; else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -3276,8 +3361,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } @@ -4450,8 +4537,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; ASSERT_RTNL(); @@ -4482,8 +4569,9 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), - audit_get_loginuid(current), - uid, gid, + from_kuid(&init_user_ns, audit_get_loginuid(current)), + from_kuid(&init_user_ns, uid), + from_kgid(&init_user_ns, gid), audit_get_sessionid(current)); } @@ -4801,6 +4889,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + add_device_randomness(dev->dev_addr, dev->addr_len); return err; } EXPORT_SYMBOL(dev_set_mac_address); @@ -5175,12 +5264,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) */ static int dev_new_index(struct net *net) { - static int ifindex; + int ifindex = net->ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; if (!__dev_get_by_index(net, ifindex)) - return ifindex; + return net->ifindex = ifindex; } } @@ -5258,10 +5347,6 @@ static void rollback_registered_many(struct list_head *head) netdev_unregister_kobject(dev); } - /* Process any work delayed until the end of the batch */ - dev = list_first_entry(head, struct net_device, unreg_list); - call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - synchronize_net(); list_for_each_entry(dev, head, unreg_list) @@ -5519,7 +5604,7 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - ret = dev_get_valid_name(dev, dev->name); + ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) goto out; @@ -5533,7 +5618,12 @@ int register_netdevice(struct net_device *dev) } } - dev->ifindex = dev_new_index(net); + ret = -EBUSY; + if (!dev->ifindex) + dev->ifindex = dev_new_index(net); + else if (__dev_get_by_index(net, dev->ifindex)) + goto err_uninit; + if (dev->iflink == -1) dev->iflink = dev->ifindex; @@ -5576,9 +5666,12 @@ int register_netdevice(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); + linkwatch_init_dev(dev); + dev_init_scheduler(dev); dev_hold(dev); list_netdevice(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); @@ -5682,6 +5775,7 @@ EXPORT_SYMBOL(netdev_refcnt_read); /** * netdev_wait_allrefs - wait until all references are gone. + * @dev: target net_device * * This is called when unregistering network devices. * @@ -5707,9 +5801,12 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users - * should have already handle it the first time */ + __rtnl_unlock(); + rcu_barrier(); + rtnl_lock(); + + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { /* We must not have linkwatch events @@ -5771,9 +5868,8 @@ void netdev_run_todo(void) __rtnl_unlock(); - /* Wait for rcu callbacks to finish before attempting to drain - * the device list. This usually avoids a 250ms wait. - */ + + /* Wait for rcu callbacks to finish before next phase */ if (!list_empty(&list)) rcu_barrier(); @@ -5782,6 +5878,10 @@ void netdev_run_todo(void) = list_first_entry(&list, struct net_device, todo_list); list_del(&dev->todo_list); + rtnl_lock(); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); + __rtnl_unlock(); + if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { pr_err("network todo '%s' but state %d\n", dev->name, dev->reg_state); @@ -5877,6 +5977,8 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) return queue; } +static const struct ethtool_ops default_ethtool_ops; + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -5942,6 +6044,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); dev->gso_max_size = GSO_MAX_SIZE; + dev->gso_max_segs = GSO_MAX_SEGS; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); @@ -5963,6 +6066,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, strcpy(dev->name, name); dev->group = INIT_NETDEV_GROUP; + if (!dev->ethtool_ops) + dev->ethtool_ops = &default_ethtool_ops; return dev; free_all: @@ -6147,7 +6252,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(dev, pat) < 0) + if (dev_get_valid_name(net, dev, pat) < 0) goto out; } @@ -6175,7 +6280,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char the device is just moving and can keep their slaves up. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); + rcu_barrier(); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* @@ -6358,22 +6464,26 @@ const char *netdev_drivername(const struct net_device *dev) return empty; } -int __netdev_printk(const char *level, const struct net_device *dev, +static int __netdev_printk(const char *level, const struct net_device *dev, struct va_format *vaf) { int r; - if (dev && dev->dev.parent) - r = dev_printk(level, dev->dev.parent, "%s: %pV", - netdev_name(dev), vaf); - else if (dev) + if (dev && dev->dev.parent) { + r = dev_printk_emit(level[1] - '0', + dev->dev.parent, + "%s %s %s: %pV", + dev_driver_string(dev->dev.parent), + dev_name(dev->dev.parent), + netdev_name(dev), vaf); + } else if (dev) { r = printk("%s%s: %pV", level, netdev_name(dev), vaf); - else + } else { r = printk("%s(NULL net_device): %pV", level, vaf); + } return r; } -EXPORT_SYMBOL(__netdev_printk); int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...) @@ -6388,6 +6498,7 @@ int netdev_printk(const char *level, const struct net_device *dev, vaf.va = &args; r = __netdev_printk(level, dev, &vaf); + va_end(args); return r; @@ -6407,6 +6518,7 @@ int func(const struct net_device *dev, const char *fmt, ...) \ vaf.va = &args; \ \ r = __netdev_printk(level, dev, &vaf); \ + \ va_end(args); \ \ return r; \ diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index c4cc2bc49f06..87cc17db2d56 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -22,7 +22,7 @@ */ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, - unsigned char *addr, int addr_len, + const unsigned char *addr, int addr_len, unsigned char addr_type, bool global) { struct netdev_hw_addr *ha; @@ -46,7 +46,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, } static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, - unsigned char *addr, int addr_len, + const unsigned char *addr, int addr_len, unsigned char addr_type, bool global) { struct netdev_hw_addr *ha; @@ -72,14 +72,15 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); } -static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, - int addr_len, unsigned char addr_type) +static int __hw_addr_add(struct netdev_hw_addr_list *list, + const unsigned char *addr, int addr_len, + unsigned char addr_type) { return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, - unsigned char *addr, int addr_len, + const unsigned char *addr, int addr_len, unsigned char addr_type, bool global) { struct netdev_hw_addr *ha; @@ -104,8 +105,9 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, return -ENOENT; } -static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, - int addr_len, unsigned char addr_type) +static int __hw_addr_del(struct netdev_hw_addr_list *list, + const unsigned char *addr, int addr_len, + unsigned char addr_type) { return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); } @@ -278,7 +280,7 @@ EXPORT_SYMBOL(dev_addr_init); * * The caller must hold the rtnl_mutex. */ -int dev_addr_add(struct net_device *dev, unsigned char *addr, +int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; @@ -303,7 +305,7 @@ EXPORT_SYMBOL(dev_addr_add); * * The caller must hold the rtnl_mutex. */ -int dev_addr_del(struct net_device *dev, unsigned char *addr, +int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type) { int err; @@ -390,7 +392,7 @@ EXPORT_SYMBOL(dev_addr_del_multiple); * @dev: device * @addr: address to add */ -int dev_uc_add_excl(struct net_device *dev, unsigned char *addr) +int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) { struct netdev_hw_addr *ha; int err; @@ -421,7 +423,7 @@ EXPORT_SYMBOL(dev_uc_add_excl); * Add a secondary unicast address to the device or increase * the reference count if it already exists. */ -int dev_uc_add(struct net_device *dev, unsigned char *addr) +int dev_uc_add(struct net_device *dev, const unsigned char *addr) { int err; @@ -443,7 +445,7 @@ EXPORT_SYMBOL(dev_uc_add); * Release reference to a secondary unicast address and remove it * from the device if the reference count drops to zero. */ -int dev_uc_del(struct net_device *dev, unsigned char *addr) +int dev_uc_del(struct net_device *dev, const unsigned char *addr) { int err; @@ -543,7 +545,7 @@ EXPORT_SYMBOL(dev_uc_init); * @dev: device * @addr: address to add */ -int dev_mc_add_excl(struct net_device *dev, unsigned char *addr) +int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) { struct netdev_hw_addr *ha; int err; @@ -566,7 +568,7 @@ out: } EXPORT_SYMBOL(dev_mc_add_excl); -static int __dev_mc_add(struct net_device *dev, unsigned char *addr, +static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, bool global) { int err; @@ -587,7 +589,7 @@ static int __dev_mc_add(struct net_device *dev, unsigned char *addr, * Add a multicast address to the device or increase * the reference count if it already exists. */ -int dev_mc_add(struct net_device *dev, unsigned char *addr) +int dev_mc_add(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, false); } @@ -600,13 +602,13 @@ EXPORT_SYMBOL(dev_mc_add); * * Add a global multicast address to the device. */ -int dev_mc_add_global(struct net_device *dev, unsigned char *addr) +int dev_mc_add_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_add(dev, addr, true); } EXPORT_SYMBOL(dev_mc_add_global); -static int __dev_mc_del(struct net_device *dev, unsigned char *addr, +static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, bool global) { int err; @@ -628,7 +630,7 @@ static int __dev_mc_del(struct net_device *dev, unsigned char *addr, * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ -int dev_mc_del(struct net_device *dev, unsigned char *addr) +int dev_mc_del(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, false); } @@ -642,7 +644,7 @@ EXPORT_SYMBOL(dev_mc_del); * Release reference to a multicast address and remove it * from the device if the reference count drops to zero. */ -int dev_mc_del_global(struct net_device *dev, unsigned char *addr) +int dev_mc_del_global(struct net_device *dev, const unsigned char *addr) { return __dev_mc_del(dev, addr, true); } diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..ee6153e2cf43 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX + 1] = { + /* This initializer is needed to force linker to place this variable + * into const section. Otherwise it might end into bss section. + * We really want to avoid false sharing on this variable, and catch + * any writes on it. + */ + [RTAX_MAX] = 0xdeadbeef, +}; + void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags) @@ -214,8 +222,8 @@ void __dst_free(struct dst_entry *dst) if (dst_garbage.timer_inc > DST_GC_INC) { dst_garbage.timer_inc = DST_GC_INC; dst_garbage.timer_expires = DST_GC_MIN; - cancel_delayed_work(&dst_gc_work); - schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); + mod_delayed_work(system_wq, &dst_gc_work, + dst_garbage.timer_expires); } spin_unlock_bh(&dst_garbage.lock); } @@ -366,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, struct dst_entry *dst, *last = NULL; switch (event) { - case NETDEV_UNREGISTER: + case NETDEV_UNREGISTER_FINAL: case NETDEV_DOWN: mutex_lock(&dst_gc_mutex); for (dst = dst_busy_list; dst; dst = dst->next) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index cbf033dcaf1f..4d64cc2e3fa9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1426,18 +1426,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) return -EFAULT; - if (!dev->ethtool_ops) { - /* A few commands do not require any driver support, - * are unprivileged, and do not change anything, so we - * can take a shortcut to them. */ - if (ethcmd == ETHTOOL_GDRVINFO) - return ethtool_get_drvinfo(dev, useraddr); - else if (ethcmd == ETHTOOL_GET_TS_INFO) - return ethtool_get_ts_info(dev, useraddr); - else - return -EOPNOTSUPP; - } - /* Allow some commands to be done by anyone */ switch (ethcmd) { case ETHTOOL_GSET: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..58a4ba27dfe3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (unresolved) ops->unresolved_rules++; - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); if (ops->delete) ops->delete(rule); fib_rule_put(rule); @@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI, ops) < 0) break; diff --git a/net/core/filter.c b/net/core/filter.c index d4ce2dc712e3..3d92ebb7fbcf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) int err; struct sk_filter *filter; + /* + * If the skb was allocated from pfmemalloc reserves, only + * allow SOCK_MEMALLOC sockets to use it as this socket is + * helping free memory + */ + if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) + return -ENOMEM; + err = security_sock_rcv_skb(sk, skb); if (err) return err; @@ -159,6 +167,14 @@ unsigned int sk_run_filter(const struct sk_buff *skb, case BPF_S_ALU_DIV_K: A = reciprocal_divide(A, K); continue; + case BPF_S_ALU_MOD_X: + if (X == 0) + return 0; + A %= X; + continue; + case BPF_S_ALU_MOD_K: + A %= K; + continue; case BPF_S_ALU_AND_X: A &= X; continue; @@ -171,6 +187,13 @@ unsigned int sk_run_filter(const struct sk_buff *skb, case BPF_S_ALU_OR_K: A |= K; continue; + case BPF_S_ANC_ALU_XOR_X: + case BPF_S_ALU_XOR_X: + A ^= X; + continue; + case BPF_S_ALU_XOR_K: + A ^= K; + continue; case BPF_S_ALU_LSH_X: A <<= X; continue; @@ -318,9 +341,6 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; - case BPF_S_ANC_ALU_XOR_X: - A ^= X; - continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -461,10 +481,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, + [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, + [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, + [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, + [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, @@ -523,6 +547,11 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) return -EINVAL; ftest->k = reciprocal_value(ftest->k); break; + case BPF_S_ALU_MOD_K: + /* check for division by zero */ + if (ftest->k == 0) + return -EINVAL; + break; case BPF_S_LD_MEM: case BPF_S_LDX_MEM: case BPF_S_ST: diff --git a/net/core/link_watch.c b/net/core/link_watch.c index c3519c6d1b16..8f82a5cc3851 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -76,6 +76,14 @@ static void rfc2863_policy(struct net_device *dev) } +void linkwatch_init_dev(struct net_device *dev) +{ + /* Handle pre-registration link state changes */ + if (!netif_carrier_ok(dev) || netif_dormant(dev)) + rfc2863_policy(dev); +} + + static bool linkwatch_urgent_event(struct net_device *dev) { if (!netif_running(dev)) @@ -120,22 +128,13 @@ static void linkwatch_schedule_work(int urgent) delay = 0; /* - * This is true if we've scheduled it immeditately or if we don't - * need an immediate execution and it's already pending. + * If urgent, schedule immediate execution; otherwise, don't + * override the existing timer. */ - if (schedule_delayed_work(&linkwatch_work, delay) == !delay) - return; - - /* Don't bother if there is nothing urgent. */ - if (!test_bit(LW_URGENT, &linkwatch_flags)) - return; - - /* It's already running which is good enough. */ - if (!__cancel_delayed_work(&linkwatch_work)) - return; - - /* Otherwise we reschedule it again for immediate execution. */ - schedule_delayed_work(&linkwatch_work, 0); + if (test_bit(LW_URGENT, &linkwatch_flags)) + mod_delayed_work(system_wq, &linkwatch_work, 0); + else + schedule_delayed_work(&linkwatch_work, delay); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 117afaf51268..baca771caae2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1545,7 +1545,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot allocate neighbour cache hashes"); rwlock_init(&tbl->lock); - INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); + INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); skb_queue_head_init_class(&tbl->proxy_queue, @@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) break; @@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (neightbl_fill_param_info(skb, tbl, p, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) @@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) <= 0) { @@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, tbl) <= 0) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 72607174ea5a..bcf02f608cbf 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -166,9 +166,21 @@ static ssize_t show_duplex(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!__ethtool_get_settings(netdev, &cmd)) - ret = sprintf(buf, "%s\n", - cmd.duplex ? "full" : "half"); + if (!__ethtool_get_settings(netdev, &cmd)) { + const char *duplex; + switch (cmd.duplex) { + case DUPLEX_HALF: + duplex = "half"; + break; + case DUPLEX_FULL: + duplex = "full"; + break; + default: + duplex = "unknown"; + break; + } + ret = sprintf(buf, "%s\n", duplex); + } } rtnl_unlock(); return ret; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b4c90e42b443..77a0388fc3be 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -26,6 +26,7 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/if_vlan.h> #include <net/tcp.h> #include <net/udp.h> #include <asm/unaligned.h> @@ -54,7 +55,7 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void arp_reply(struct sk_buff *skb); +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -170,7 +171,8 @@ static void poll_napi(struct net_device *dev) list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(dev->npinfo, napi, budget); + budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), + napi, budget); spin_unlock(&napi->poll_lock); if (!budget) @@ -185,13 +187,14 @@ static void service_arp_queue(struct netpoll_info *npi) struct sk_buff *skb; while ((skb = skb_dequeue(&npi->arp_tx))) - arp_reply(skb); + netpoll_arp_reply(skb, npi); } } static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; + struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); if (!dev || !netif_running(dev)) return; @@ -206,17 +209,18 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); if (dev->flags & IFF_SLAVE) { - if (dev->npinfo) { + if (ni) { struct net_device *bond_dev = dev->master; struct sk_buff *skb; - while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); + while ((skb = skb_dequeue(&ni->arp_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + skb_queue_tail(&bond_ni->arp_tx, skb); } } } - service_arp_queue(dev->npinfo); + service_arp_queue(ni); zap_completion_queue(); } @@ -302,6 +306,7 @@ static int netpoll_owner_active(struct net_device *dev) return 0; } +/* call with IRQ disabled */ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev) { @@ -309,8 +314,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, unsigned long tries; const struct net_device_ops *ops = dev->netdev_ops; /* It is up to the caller to keep npinfo alive. */ - struct netpoll_info *npinfo = np->dev->npinfo; + struct netpoll_info *npinfo; + + WARN_ON_ONCE(!irqs_disabled()); + npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { __kfree_skb(skb); return; @@ -319,16 +327,22 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - unsigned long flags; - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = netdev_pick_tx(dev, skb); - local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_xmit_stopped(txq)) { + if (vlan_tx_tag_present(skb) && + !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + break; + skb->vlan_tci = 0; + } + status = ops->ndo_start_xmit(skb, dev); if (status == NETDEV_TX_OK) txq_trans_update(txq); @@ -347,10 +361,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } WARN_ONCE(!irqs_disabled(), - "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", + "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", dev->name, ops->ndo_start_xmit); - local_irq_restore(flags); } if (status != NETDEV_TX_OK) { @@ -367,6 +380,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; + static atomic_t ip_ident; udp_len = len + sizeof(*udph); ip_len = udp_len + sizeof(*iph); @@ -402,7 +416,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) put_unaligned(0x45, (unsigned char *)iph); iph->tos = 0; put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = 0; + iph->id = htons(atomic_inc_return(&ip_ident)); iph->frag_off = 0; iph->ttl = 64; iph->protocol = IPPROTO_UDP; @@ -423,9 +437,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void arp_reply(struct sk_buff *skb) +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { - struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; @@ -543,13 +556,12 @@ static void arp_reply(struct sk_buff *skb) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -int __netpoll_rx(struct sk_buff *skb) +int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { int proto, len, ulen; int hits = 0; const struct iphdr *iph; struct udphdr *uh; - struct netpoll_info *npinfo = skb->dev->npinfo; struct netpoll *np, *tmp; if (list_empty(&npinfo->rx_np)) @@ -565,6 +577,12 @@ int __netpoll_rx(struct sk_buff *skb) return 1; } + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + goto out; + } + proto = ntohs(eth_hdr(skb)->h_proto); if (proto != ETH_P_IP) goto out; @@ -715,7 +733,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev) +int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) { struct netpoll_info *npinfo; const struct net_device_ops *ops; @@ -734,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) } if (!ndev->npinfo) { - npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + npinfo = kmalloc(sizeof(*npinfo), gfp); if (!npinfo) { err = -ENOMEM; goto out; @@ -752,7 +770,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo); + err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); if (err) goto free_npinfo; } @@ -857,7 +875,7 @@ int netpoll_setup(struct netpoll *np) refill_skbs(); rtnl_lock(); - err = __netpoll_setup(np, ndev); + err = __netpoll_setup(np, ndev, GFP_KERNEL); rtnl_unlock(); if (err) @@ -878,6 +896,24 @@ static int __init netpoll_init(void) } core_initcall(netpoll_init); +static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) +{ + struct netpoll_info *npinfo = + container_of(rcu_head, struct netpoll_info, rcu); + + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + + /* we can't call cancel_delayed_work_sync here, as we are in softirq */ + cancel_delayed_work(&npinfo->tx_work); + + /* clean after last, unfinished work */ + __skb_queue_purge(&npinfo->txq); + /* now cancel it again */ + cancel_delayed_work(&npinfo->tx_work); + kfree(npinfo); +} + void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -903,20 +939,24 @@ void __netpoll_cleanup(struct netpoll *np) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); + call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); + } +} +EXPORT_SYMBOL_GPL(__netpoll_cleanup); - /* avoid racing with NAPI reading npinfo */ - synchronize_rcu_bh(); +static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +{ + struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); - skb_queue_purge(&npinfo->arp_tx); - skb_queue_purge(&npinfo->txq); - cancel_delayed_work_sync(&npinfo->tx_work); + __netpoll_cleanup(np); + kfree(np); +} - /* clean after last, unfinished work */ - __skb_queue_purge(&npinfo->txq); - kfree(npinfo); - } +void __netpoll_free_rcu(struct netpoll *np) +{ + call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); } -EXPORT_SYMBOL_GPL(__netpoll_cleanup); +EXPORT_SYMBOL_GPL(__netpoll_free_rcu); void netpoll_cleanup(struct netpoll *np) { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 63d15e8f80e9..4a83fb3c8e87 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -73,7 +73,6 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len) ((sizeof(u32) * new_len)); struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); struct netprio_map *old_priomap; - int i; old_priomap = rtnl_dereference(dev->priomap); @@ -82,10 +81,10 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len) return -ENOMEM; } - for (i = 0; - old_priomap && (i < old_priomap->priomap_len); - i++) - new_priomap->priomap[i] = old_priomap->priomap[i]; + if (old_priomap) + memcpy(new_priomap->priomap, old_priomap->priomap, + old_priomap->priomap_len * + sizeof(old_priomap->priomap[0])); new_priomap->priomap_len = new_len; @@ -101,39 +100,11 @@ static int write_update_netdev_table(struct net_device *dev) u32 max_len; struct netprio_map *map; - rtnl_lock(); max_len = atomic_read(&max_prioidx) + 1; map = rtnl_dereference(dev->priomap); if (!map || map->priomap_len < max_len) ret = extend_netdev_table(dev, max_len); - rtnl_unlock(); - - return ret; -} - -static int update_netdev_tables(void) -{ - int ret = 0; - struct net_device *dev; - u32 max_len; - struct netprio_map *map; - rtnl_lock(); - max_len = atomic_read(&max_prioidx) + 1; - for_each_netdev(&init_net, dev) { - map = rtnl_dereference(dev->priomap); - /* - * don't allocate priomap if we didn't - * change net_prio.ifpriomap (map == NULL), - * this will speed up skb_update_prio. - */ - if (map && map->priomap_len < max_len) { - ret = extend_netdev_table(dev, max_len); - if (ret < 0) - break; - } - } - rtnl_unlock(); return ret; } @@ -155,12 +126,6 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) goto out; } - ret = update_netdev_tables(); - if (ret < 0) { - put_prioidx(cs->prioidx); - goto out; - } - return &cs->css; out: kfree(cs); @@ -234,7 +199,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, /* *Separate the devname from the associated priority - *and advance the priostr poitner to the priority value + *and advance the priostr pointer to the priority value */ *priostr = '\0'; priostr++; @@ -256,17 +221,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, if (!dev) goto out_free_devname; + rtnl_lock(); ret = write_update_netdev_table(dev); if (ret < 0) goto out_put_dev; - rcu_read_lock(); - map = rcu_dereference(dev->priomap); + map = rtnl_dereference(dev->priomap); if (map) map->priomap[prioidx] = priority; - rcu_read_unlock(); out_put_dev: + rtnl_unlock(); dev_put(dev); out_free_devname: @@ -277,12 +242,6 @@ out_free_devname: void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; - char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); - - if (!tmp) { - pr_warn("Unable to attach cgrp due to alloc failure!\n"); - return; - } cgroup_taskset_for_each(p, cgrp, tset) { unsigned int fd; @@ -296,32 +255,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) continue; } - rcu_read_lock(); + spin_lock(&files->file_lock); fdt = files_fdtable(files); for (fd = 0; fd < fdt->max_fds; fd++) { - char *path; struct file *file; struct socket *sock; - unsigned long s; - int rv, err = 0; + int err; file = fcheck_files(files, fd); if (!file) continue; - path = d_path(&file->f_path, tmp, PAGE_SIZE); - rv = sscanf(path, "socket:[%lu]", &s); - if (rv <= 0) - continue; - sock = sock_from_file(file, &err); - if (!err) + if (sock) sock_update_netprioidx(sock->sk, p); } - rcu_read_unlock(); + spin_unlock(&files->file_lock); task_unlock(p); } - kfree(tmp); } static struct cftype ss_files[] = { @@ -342,11 +293,19 @@ struct cgroup_subsys net_prio_subsys = { .create = cgrp_create, .destroy = cgrp_destroy, .attach = net_prio_attach, -#ifdef CONFIG_NETPRIO_CGROUP .subsys_id = net_prio_subsys_id, -#endif .base_cftypes = ss_files, - .module = THIS_MODULE + .module = THIS_MODULE, + + /* + * net_prio has artificial limit on the number of cgroups and + * disallows nesting making it impossible to co-mount it with other + * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ + * limit and properly nest configuration such that children follow + * their parents' configurations by default and are allowed to + * override and remove the following. + */ + .broken_hierarchy = true, }; static int netprio_device_event(struct notifier_block *unused, @@ -382,10 +341,6 @@ static int __init init_cgroup_netprio(void) ret = cgroup_load_subsys(&net_prio_subsys); if (ret) goto out; -#ifndef CONFIG_NETPRIO_CGROUP - smp_wmb(); - net_prio_subsys_id = net_prio_subsys.subsys_id; -#endif register_netdevice_notifier(&netprio_device_notifier); @@ -402,11 +357,6 @@ static void __exit exit_cgroup_netprio(void) cgroup_unload_subsys(&net_prio_subsys); -#ifndef CONFIG_NETPRIO_CGROUP - net_prio_subsys_id = -1; - synchronize_rcu(); -#endif - rtnl_lock(); for_each_netdev(&init_net, dev) { old = rtnl_dereference(dev->priomap); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index cce9e53528b1..148e73d2c451 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2721,7 +2721,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - pkt_dev->pkt_overhead; - if (datalen < sizeof(struct pktgen_hdr)) + if (datalen < 0 || datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); udph->source = htons(pkt_dev->cur_udp_src); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 9b570a6a33c5..c31d9e8668c3 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -15,6 +15,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/tcp.h> #include <linux/vmalloc.h> #include <net/request_sock.h> @@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) kfree(lopt); } +/* + * This function is called to set a Fast Open socket's "fastopen_rsk" field + * to NULL when a TFO socket no longer needs to access the request_sock. + * This happens only after 3WHS has been either completed or aborted (e.g., + * RST is received). + * + * Before TFO, a child socket is created only after 3WHS is completed, + * hence it never needs to access the request_sock. things get a lot more + * complex with TFO. A child socket, accepted or not, has to access its + * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts, + * until 3WHS is either completed or aborted. Afterwards the req will stay + * until either the child socket is accepted, or in the rare case when the + * listener is closed before the child is accepted. + * + * In short, a request socket is only freed after BOTH 3WHS has completed + * (or aborted) and the child socket has been accepted (or listener closed). + * When a child socket is accepted, its corresponding req->sk is set to + * NULL since it's no longer needed. More importantly, "req->sk == NULL" + * will be used by the code below to determine if a child socket has been + * accepted or not, and the check is protected by the fastopenq->lock + * described below. + * + * Note that fastopen_rsk is only accessed from the child socket's context + * with its socket lock held. But a request_sock (req) can be accessed by + * both its child socket through fastopen_rsk, and a listener socket through + * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin + * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created. + * only in the rare case when both the listener and the child locks are held, + * e.g., in inet_csk_listen_stop() do we not need to acquire the lock. + * The lock also protects other fields such as fastopenq->qlen, which is + * decremented by this function when fastopen_rsk is no longer needed. + * + * Note that another solution was to simply use the existing socket lock + * from the listener. But first socket lock is difficult to use. It is not + * a simple spin lock - one must consider sock_owned_by_user() and arrange + * to use sk_add_backlog() stuff. But what really makes it infeasible is the + * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to + * acquire a child's lock while holding listener's socket lock. A corner + * case might also exist in tcp_v4_hnd_req() that will trigger this locking + * order. + * + * When a TFO req is created, it needs to sock_hold its listener to prevent + * the latter data structure from going away. + * + * This function also sets "treq->listener" to NULL and unreference listener + * socket. treq->listener is used by the listener so it is protected by the + * fastopenq->lock in this function. + */ +void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, + bool reset) +{ + struct sock *lsk = tcp_rsk(req)->listener; + struct fastopen_queue *fastopenq = + inet_csk(lsk)->icsk_accept_queue.fastopenq; + + BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk)); + + tcp_sk(sk)->fastopen_rsk = NULL; + spin_lock_bh(&fastopenq->lock); + fastopenq->qlen--; + tcp_rsk(req)->listener = NULL; + if (req->sk) /* the child socket hasn't been accepted yet */ + goto out; + + if (!reset || lsk->sk_state != TCP_LISTEN) { + /* If the listener has been closed don't bother with the + * special RST handling below. + */ + spin_unlock_bh(&fastopenq->lock); + sock_put(lsk); + reqsk_free(req); + return; + } + /* Wait for 60secs before removing a req that has triggered RST. + * This is a simple defense against TFO spoofing attack - by + * counting the req against fastopen.max_qlen, and disabling + * TFO when the qlen exceeds max_qlen. + * + * For more details see CoNext'11 "TCP Fast Open" paper. + */ + req->expires = jiffies + 60*HZ; + if (fastopenq->rskq_rst_head == NULL) + fastopenq->rskq_rst_head = req; + else + fastopenq->rskq_rst_tail->dl_next = req; + + req->dl_next = NULL; + fastopenq->rskq_rst_tail = req; + fastopenq->qlen++; +out: + spin_unlock_bh(&fastopenq->lock); + sock_put(lsk); + return; +} diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 334b930e0de3..76d4c2c3c89b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -618,16 +618,20 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), + .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse), .rta_used = dst->__use, .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, .rta_id = id, }; - if (expires) - ci.rta_expires = jiffies_to_clock_t(expires); + if (expires) { + unsigned long clock; + clock = jiffies_to_clock_t(abs(expires)); + clock = min_t(unsigned long, clock, INT_MAX); + ci.rta_expires = (expires > 0) ? clock : -clock; + } return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); @@ -659,6 +663,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } } +static unsigned int rtnl_dev_get_flags(const struct net_device *dev) +{ + return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) | + (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI)); +} + static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, const struct ifinfomsg *ifm) { @@ -667,7 +677,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ if (ifm->ifi_change) flags = (flags & ifm->ifi_change) | - (dev->flags & ~ifm->ifi_change); + (rtnl_dev_get_flags(dev) & ~ifm->ifi_change); return flags; } @@ -1071,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI, ext_filter_mask) <= 0) @@ -1371,6 +1381,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; send_addr_notify = 1; modified = 1; + add_device_randomness(dev->dev_addr, dev->addr_len); } if (tb[IFLA_MTU]) { @@ -1801,8 +1812,6 @@ replay: return -ENODEV; } - if (ifm->ifi_index) - return -EOPNOTSUPP; if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) return -EOPNOTSUPP; @@ -1828,10 +1837,14 @@ replay: return PTR_ERR(dest_net); dev = rtnl_create_link(net, dest_net, ifname, ops, tb); - - if (IS_ERR(dev)) + if (IS_ERR(dev)) { err = PTR_ERR(dev); - else if (ops->newlink) + goto out; + } + + dev->ifindex = ifm->ifi_index; + + if (ops->newlink) err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); @@ -1886,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (nskb == NULL) return -ENOBUFS; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, + err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); return err; } @@ -2077,7 +2090,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && (dev->priv_flags & IFF_BRIDGE_PORT)) { master = dev->master; - err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr, + err = master->netdev_ops->ndo_fdb_add(ndm, tb, + dev, addr, nlh->nlmsg_flags); if (err) goto out; @@ -2087,7 +2101,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { - err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr, + err = dev->netdev_ops->ndo_fdb_add(ndm, tb, + dev, addr, nlh->nlmsg_flags); if (!err) { @@ -2167,9 +2182,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, { struct netdev_hw_addr *ha; int err; - u32 pid, seq; + u32 portid, seq; - pid = NETLINK_CB(cb->skb).pid; + portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { @@ -2177,7 +2192,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, - pid, seq, 0, NTF_SELF); + portid, seq, 0, NTF_SELF); if (err < 0) return err; skip: @@ -2345,7 +2360,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_PRE_TYPE_CHANGE: case NETDEV_GOING_DOWN: case NETDEV_UNREGISTER: - case NETDEV_UNREGISTER_BATCH: + case NETDEV_UNREGISTER_FINAL: case NETDEV_RELEASE: case NETDEV_JOIN: break; @@ -2368,9 +2383,10 @@ static int __net_init rtnetlink_net_init(struct net *net) .groups = RTNLGRP_MAX, .input = rtnetlink_rcv, .cb_mutex = &rtnl_mutex, + .flags = NL_CFG_F_NONROOT_RECV, }; - sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg); + sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg); if (!sk) return -ENOMEM; net->rtnl = sk; @@ -2403,7 +2419,6 @@ void __init rtnetlink_init(void) if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); - netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, diff --git a/net/core/scm.c b/net/core/scm.c index 8f6ccfd68ef4..9c1c63da3ca8 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -45,12 +45,17 @@ static __inline__ int scm_check_creds(struct ucred *creds) { const struct cred *cred = current_cred(); + kuid_t uid = make_kuid(cred->user_ns, creds->uid); + kgid_t gid = make_kgid(cred->user_ns, creds->gid); + + if (!uid_valid(uid) || !gid_valid(gid)) + return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == cred->uid || creds->uid == cred->euid || - creds->uid == cred->suid) || capable(CAP_SETUID)) && - ((creds->gid == cred->gid || creds->gid == cred->egid || - creds->gid == cred->sgid) || capable(CAP_SETGID))) { + ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || + uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) && + ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || + gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) { return 0; } return -EPERM; @@ -149,39 +154,54 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) goto error; break; case SCM_CREDENTIALS: + { + struct ucred creds; + kuid_t uid; + kgid_t gid; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; - memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); - err = scm_check_creds(&p->creds); + memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred)); + err = scm_check_creds(&creds); if (err) goto error; - if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { + p->creds.pid = creds.pid; + if (!p->pid || pid_vnr(p->pid) != creds.pid) { struct pid *pid; err = -ESRCH; - pid = find_get_pid(p->creds.pid); + pid = find_get_pid(creds.pid); if (!pid) goto error; put_pid(p->pid); p->pid = pid; } + err = -EINVAL; + uid = make_kuid(current_user_ns(), creds.uid); + gid = make_kgid(current_user_ns(), creds.gid); + if (!uid_valid(uid) || !gid_valid(gid)) + goto error; + + p->creds.uid = uid; + p->creds.gid = gid; + if (!p->cred || - (p->cred->euid != p->creds.uid) || - (p->cred->egid != p->creds.gid)) { + !uid_eq(p->cred->euid, uid) || + !gid_eq(p->cred->egid, gid)) { struct cred *cred; err = -ENOMEM; cred = prepare_creds(); if (!cred) goto error; - cred->uid = cred->euid = p->creds.uid; - cred->gid = cred->egid = p->creds.gid; + cred->uid = cred->euid = uid; + cred->gid = cred->egid = gid; if (p->cred) put_cred(p->cred); p->cred = cred; } break; + } default: goto error; } @@ -265,6 +285,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) { + struct socket *sock; int new_fd; err = security_file_receive(fp[i]); if (err) @@ -281,6 +302,9 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) } /* Bump the usage count and install the file. */ get_file(fp[i]); + sock = sock_from_file(fp[i], &err); + if (sock) + sock_update_netprioidx(sock->sk, current); fd_install(new_fd, fp[i]); } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 99b2596531bb..e61a8bb7fce7 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, return hash[0]; } +EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4f..cdc28598f4ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) BUG(); } + +/* + * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells + * the caller if emergency pfmemalloc reserves are being used. If it is and + * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves + * may be used. Otherwise, the packet data may be discarded until enough + * memory is free + */ +#define kmalloc_reserve(size, gfp, node, pfmemalloc) \ + __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) +void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, + bool *pfmemalloc) +{ + void *obj; + bool ret_pfmemalloc = false; + + /* + * Try a regular allocation, when that fails and we're not entitled + * to the reserves, fail. + */ + obj = kmalloc_node_track_caller(size, + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, + node); + if (obj || !(gfp_pfmemalloc_allowed(flags))) + goto out; + + /* Try again but now we are using pfmemalloc reserves */ + ret_pfmemalloc = true; + obj = kmalloc_node_track_caller(size, flags, node); + +out: + if (pfmemalloc) + *pfmemalloc = ret_pfmemalloc; + + return obj; +} + /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. @@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask - * @fclone: allocate from fclone cache instead of head cache - * and allocate a cloned (child) skb + * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache + * instead of head cache and allocate a cloned (child) skb. + * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for + * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a @@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone, int node) + int flags, int node) { struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; + bool pfmemalloc; + + cache = (flags & SKB_ALLOC_FCLONE) + ? skbuff_fclone_cache : skbuff_head_cache; - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; + if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) + gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); @@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = kmalloc_node_track_caller(size, gfp_mask, node); + data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); if (!data) goto nodata; /* kmalloc(size) might give us more room than requested. @@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, memset(skb, 0, offsetof(struct sk_buff, tail)); /* Account for allocated memory : skb + skb->head */ skb->truesize = SKB_TRUESIZE(size); + skb->pfmemalloc = pfmemalloc; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(&shinfo->dataref, 1); kmemcheck_annotate_variable(shinfo->destructor_arg); - if (fclone) { + if (flags & SKB_ALLOC_FCLONE) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); @@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(fclone_ref, 1); child->fclone = SKB_FCLONE_UNAVAILABLE; + child->pfmemalloc = pfmemalloc; } out: return skb; @@ -294,55 +340,74 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { - struct page *page; - unsigned int offset; - unsigned int pagecnt_bias; + struct page_frag frag; + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_count every time we allocate a fragment. + */ + unsigned int pagecnt_bias; }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) +#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) +#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE -/** - * netdev_alloc_frag - allocate a page fragment - * @fragsz: fragment size - * - * Allocates a frag from a page for receive buffer. - * Uses GFP_ATOMIC allocations. - */ -void *netdev_alloc_frag(unsigned int fragsz) +static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; void *data = NULL; + int order; unsigned long flags; local_irq_save(flags); nc = &__get_cpu_var(netdev_alloc_cache); - if (unlikely(!nc->page)) { + if (unlikely(!nc->frag.page)) { refill: - nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); - if (unlikely(!nc->page)) - goto end; + for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { + gfp_t gfp = gfp_mask; + + if (order) + gfp |= __GFP_COMP | __GFP_NOWARN; + nc->frag.page = alloc_pages(gfp, order); + if (likely(nc->frag.page)) + break; + if (--order < 0) + goto end; + } + nc->frag.size = PAGE_SIZE << order; recycle: - atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS); - nc->pagecnt_bias = NETDEV_PAGECNT_BIAS; - nc->offset = 0; + atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); + nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; + nc->frag.offset = 0; } - if (nc->offset + fragsz > PAGE_SIZE) { + if (nc->frag.offset + fragsz > nc->frag.size) { /* avoid unnecessary locked operations if possible */ - if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) || - atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count)) + if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || + atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) goto recycle; goto refill; } - data = page_address(nc->page) + nc->offset; - nc->offset += fragsz; + data = page_address(nc->frag.page) + nc->frag.offset; + nc->frag.offset += fragsz; nc->pagecnt_bias--; end: local_irq_restore(flags); return data; } + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); +} EXPORT_SYMBOL(netdev_alloc_frag); /** @@ -366,7 +431,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { - void *data = netdev_alloc_frag(fragsz); + void *data; + + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; + + data = __netdev_alloc_frag(fragsz, gfp_mask); if (likely(data)) { skb = build_skb(data, fragsz); @@ -374,7 +444,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, put_page(virt_to_head_page(data)); } } else { - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, + SKB_ALLOC_RX, NUMA_NO_NODE); } if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); @@ -656,6 +727,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if IS_ENABLED(CONFIG_IP_VS) new->ipvs_property = old->ipvs_property; #endif + new->pfmemalloc = old->pfmemalloc; new->protocol = old->protocol; new->mark = old->mark; new->skb_iif = old->skb_iif; @@ -814,6 +886,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_CLONE; atomic_inc(fclone_ref); } else { + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; @@ -850,6 +925,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } +static inline int skb_alloc_rx_flag(const struct sk_buff *skb) +{ + if (skb_pfmemalloc(skb)) + return SKB_ALLOC_RX; + return 0; +} + /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy @@ -871,7 +953,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb_headroom(skb); unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -906,7 +989,8 @@ EXPORT_SYMBOL(skb_copy); struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) goto out; @@ -979,8 +1063,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); - data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask); + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(ksize(data)); @@ -1092,8 +1178,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); + struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; int off; @@ -1582,38 +1669,19 @@ static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, struct sk_buff *skb, struct sock *sk) { - struct page *p = sk->sk_sndmsg_page; - unsigned int off; + struct page_frag *pfrag = sk_page_frag(sk); - if (!p) { -new_page: - p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0); - if (!p) - return NULL; - - off = sk->sk_sndmsg_off = 0; - /* hold one ref to this page until it's full */ - } else { - unsigned int mlen; - - /* If we are the only user of the page, we can reset offset */ - if (page_count(p) == 1) - sk->sk_sndmsg_off = 0; - off = sk->sk_sndmsg_off; - mlen = PAGE_SIZE - off; - if (mlen < 64 && mlen < *len) { - put_page(p); - goto new_page; - } + if (!sk_page_frag_refill(sk, pfrag)) + return NULL; - *len = min_t(unsigned int, *len, mlen); - } + *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); - memcpy(page_address(p) + off, page_address(page) + *offset, *len); - sk->sk_sndmsg_off += *len; - *offset = off; + memcpy(page_address(pfrag->page) + pfrag->offset, + page_address(page) + *offset, *len); + *offset = pfrag->offset; + pfrag->offset += *len; - return p; + return pfrag->page; } static bool spd_can_coalesce(const struct splice_pipe_desc *spd, @@ -2775,8 +2843,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { - nskb = alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC); + nskb = __alloc_skb(hsize + doffset + headroom, + GFP_ATOMIC, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (unlikely(!nskb)) goto err; @@ -3414,8 +3483,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) return false; - delta = from->truesize - - SKB_TRUESIZE(skb_end_pointer(from) - from->head); + delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); } WARN_ON_ONCE(delta < len); @@ -3428,7 +3496,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (!skb_cloned(from)) skb_shinfo(from)->nr_frags = 0; - /* if the skb is cloned this does nothing since we set nr_frags to 0 */ + /* if the skb is not cloned this does nothing + * since we set nr_frags to 0. + */ for (i = 0; i < skb_shinfo(from)->nr_frags; i++) skb_frag_ref(from, i); diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533e..8a146cfcc366 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,7 +142,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { struct proto *proto; @@ -271,16 +271,60 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); -#if defined(CONFIG_CGROUPS) -#if !defined(CONFIG_NET_CLS_CGROUP) -int net_cls_subsys_id = -1; -EXPORT_SYMBOL_GPL(net_cls_subsys_id); -#endif -#if !defined(CONFIG_NETPRIO_CGROUP) -int net_prio_subsys_id = -1; -EXPORT_SYMBOL_GPL(net_prio_subsys_id); -#endif -#endif +struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(memalloc_socks); + +/** + * sk_set_memalloc - sets %SOCK_MEMALLOC + * @sk: socket to set it on + * + * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. + * It's the responsibility of the admin to adjust min_free_kbytes + * to meet the requirements + */ +void sk_set_memalloc(struct sock *sk) +{ + sock_set_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation |= __GFP_MEMALLOC; + static_key_slow_inc(&memalloc_socks); +} +EXPORT_SYMBOL_GPL(sk_set_memalloc); + +void sk_clear_memalloc(struct sock *sk) +{ + sock_reset_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation &= ~__GFP_MEMALLOC; + static_key_slow_dec(&memalloc_socks); + + /* + * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward + * progress of swapping. However, if SOCK_MEMALLOC is cleared while + * it has rmem allocations there is a risk that the user of the + * socket cannot make forward progress due to exceeding the rmem + * limits. By rights, sk_clear_memalloc() should only be called + * on sockets being torn down but warn and reset the accounting if + * that assumption breaks. + */ + if (WARN_ON(sk->sk_forward_alloc)) + sk_mem_reclaim(sk); +} +EXPORT_SYMBOL_GPL(sk_clear_memalloc); + +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + unsigned long pflags = current->flags; + + /* these should have been dropped before queueing */ + BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); + + current->flags |= PF_MEMALLOC; + ret = sk->sk_backlog_rcv(sk, skb); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + + return ret; +} +EXPORT_SYMBOL(__sk_backlog_rcv); static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { @@ -353,7 +397,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; } @@ -636,7 +680,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); @@ -813,8 +858,8 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, if (cred) { struct user_namespace *current_ns = current_user_ns(); - ucred->uid = from_kuid(current_ns, cred->euid); - ucred->gid = from_kgid(current_ns, cred->egid); + ucred->uid = from_kuid_munged(current_ns, cred->euid); + ucred->gid = from_kgid_munged(current_ns, cred->egid); } } EXPORT_SYMBOL_GPL(cred_to_ucred); @@ -1168,6 +1213,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) } #ifdef CONFIG_CGROUPS +#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) void sock_update_classid(struct sock *sk) { u32 classid; @@ -1175,11 +1221,13 @@ void sock_update_classid(struct sock *sk) rcu_read_lock(); /* doing current task, which cannot vanish. */ classid = task_cls_classid(current); rcu_read_unlock(); - if (classid && classid != sk->sk_classid) + if (classid != sk->sk_classid) sk->sk_classid = classid; } EXPORT_SYMBOL(sock_update_classid); +#endif +#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) void sock_update_netprioidx(struct sock *sk, struct task_struct *task) { if (in_interrupt()) @@ -1189,6 +1237,7 @@ void sock_update_netprioidx(struct sock *sk, struct task_struct *task) } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif +#endif /** * sk_alloc - All socket objects are allocated here @@ -1403,24 +1452,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = dst->dev->gso_max_size; + sk->sk_gso_max_segs = dst->dev->gso_max_segs; } } } EXPORT_SYMBOL_GPL(sk_setup_caps); -void __init sk_init(void) -{ - if (totalram_pages <= 4096) { - sysctl_wmem_max = 32767; - sysctl_rmem_max = 32767; - sysctl_wmem_default = 32767; - sysctl_rmem_default = 32767; - } else if (totalram_pages >= 131072) { - sysctl_wmem_max = 131071; - sysctl_rmem_max = 131071; - } -} - /* * Simple resource managers for sockets. */ @@ -1467,16 +1504,23 @@ EXPORT_SYMBOL(sock_rfree); void sock_edemux(struct sk_buff *skb) { - sock_put(skb->sk); + struct sock *sk = skb->sk; + +#ifdef CONFIG_INET + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else +#endif + sock_put(sk); } EXPORT_SYMBOL(sock_edemux); -int sock_i_uid(struct sock *sk) +kuid_t sock_i_uid(struct sock *sk) { - int uid; + kuid_t uid; read_lock_bh(&sk->sk_callback_lock); - uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; + uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID; read_unlock_bh(&sk->sk_callback_lock); return uid; } @@ -1681,6 +1725,45 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +/* On 32bit arches, an skb frag is limited to 2^15 */ +#define SKB_FRAG_PAGE_ORDER get_order(32768) + +bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +{ + int order; + + if (pfrag->page) { + if (atomic_read(&pfrag->page->_count) == 1) { + pfrag->offset = 0; + return true; + } + if (pfrag->offset < pfrag->size) + return true; + put_page(pfrag->page); + } + + /* We restrict high order allocations to users that can afford to wait */ + order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; + + do { + gfp_t gfp = sk->sk_allocation; + + if (order) + gfp |= __GFP_COMP | __GFP_NOWARN; + pfrag->page = alloc_pages(gfp, order); + if (likely(pfrag->page)) { + pfrag->offset = 0; + pfrag->size = PAGE_SIZE << order; + return true; + } + } while (--order >= 0); + + sk_enter_memory_pressure(sk); + sk_stream_moderate_sndbuf(sk); + return false; +} +EXPORT_SYMBOL(sk_page_frag_refill); + static void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) @@ -2110,8 +2193,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_error_report = sock_def_error_report; sk->sk_destruct = sock_def_destruct; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; sk->sk_peek_off = -1; sk->sk_peer_pid = NULL; @@ -2354,6 +2437,12 @@ void sk_common_release(struct sock *sk) xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + sock_put(sk); } EXPORT_SYMBOL(sk_common_release); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 9d8755e4a7a5..602cd637182e 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -172,8 +172,7 @@ static int __net_init diag_net_init(struct net *net) .input = sock_diag_rcv, }; - net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, - THIS_MODULE, &cfg); + net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg); return net->diag_nlsk == NULL ? -ENOMEM : 0; } diff --git a/net/core/utils.c b/net/core/utils.c index 39895a65e54a..f5613d569c23 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -294,6 +294,26 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace4); +void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, + const __be32 *from, const __be32 *to, + int pseudohdr) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_partial(diff, sizeof(diff), + ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial(diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial(diff, sizeof(diff), + csum_unfold(*sum))); +} +EXPORT_SYMBOL(inet_proto_csum_replace16); + int mac_pton(const char *s, u8 *mac) { int i; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 81f2bb62dea3..70989e672304 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1319,7 +1319,7 @@ nla_put_failure: } static int dcbnl_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid, int dcbx_ver) + u32 seq, u32 portid, int dcbx_ver) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -1330,7 +1330,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, if (!ops) return -EOPNOTSUPP; - skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh); + skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh); if (!skb) return -ENOBUFS; @@ -1353,16 +1353,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, } int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_IEEE); } EXPORT_SYMBOL(dcbnl_ieee_notify); int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_CEE); } EXPORT_SYMBOL(dcbnl_cee_notify); @@ -1656,7 +1656,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *netdev; struct dcbmsg *dcb = nlmsg_data(nlh); struct nlattr *tb[DCB_ATTR_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = -EINVAL; struct sk_buff *reply_skb; struct nlmsghdr *reply_nlh = NULL; @@ -1690,7 +1690,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; } - reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq, + reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq, nlh->nlmsg_flags, &reply_nlh); if (!reply_skb) { ret = -ENOBUFS; @@ -1705,7 +1705,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) nlmsg_end(reply_skb, reply_nlh); - ret = rtnl_unicast(reply_skb, &init_net, pid); + ret = rtnl_unicast(reply_skb, &init_net, portid); out: dev_put(netdev); return ret; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c3582a7678..fb85d371a8de 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d65e98798eca..119c04317d48 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; + memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2ba1a2814c24..307c322d53bb 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1313,10 +1313,10 @@ static int dn_shutdown(struct socket *sock, int how) if (scp->state == DN_O) goto out; - if (how != SHUTDOWN_MASK) + if (how != SHUT_RDWR) goto out; - sk->sk_shutdown = how; + sk->sk_shutdown = SHUTDOWN_MASK; dn_destroy_sock(sk); err = 0; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index f3924ab1e019..7b7e561412d3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -667,12 +667,12 @@ static inline size_t dn_ifaddr_nlmsg_size(void) } static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -753,7 +753,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (dn_idx < skip_naddr) continue; - if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) < 0) goto done; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 85a3604c87c8..b57419cc41a4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -961,7 +961,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o .saddr = oldflp->saddr, .flowidn_scope = RT_SCOPE_UNIVERSE, .flowidn_mark = oldflp->flowidn_mark, - .flowidn_iif = init_net.loopback_dev->ifindex, + .flowidn_iif = LOOPBACK_IFINDEX, .flowidn_oif = oldflp->flowidn_oif, }; struct dn_route *rt = NULL; @@ -979,7 +979,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), le16_to_cpu(oldflp->saddr), - oldflp->flowidn_mark, init_net.loopback_dev->ifindex, + oldflp->flowidn_mark, LOOPBACK_IFINDEX, oldflp->flowidn_oif); /* If we have an output interface, verify its a DECnet device */ @@ -1042,7 +1042,7 @@ source_ok: if (!fld.daddr) goto out; } - fld.flowidn_oif = init_net.loopback_dev->ifindex; + fld.flowidn_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; goto make_route; } @@ -1543,7 +1543,7 @@ static int dn_route_input(struct sk_buff *skb) return dn_route_input_slow(skb); } -static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct dn_route *rt = (struct dn_route *)skb_dst(skb); @@ -1551,7 +1551,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct nlmsghdr *nlh; long expires; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; @@ -1685,7 +1685,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); + err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err == 0) goto out_free; @@ -1694,7 +1694,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void goto out_free; } - return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid); out_free: kfree_skb(skb); @@ -1737,7 +1737,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) continue; skb_dst_set(skb, dst_clone(&rt->dst)); - if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { skb_dst_drop(skb); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 16c986ab1228..f968c1b58f47 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -291,14 +291,14 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) return payload; } -static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -374,14 +374,14 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; - u32 pid = req ? req->pid : 0; + u32 portid = req ? req->portid : 0; int err = -ENOBUFS; skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); if (skb == NULL) goto errout; - err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); if (err < 0) { @@ -390,7 +390,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, kfree_skb(skb); goto errout; } - rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); return; errout: if (err < 0) @@ -411,7 +411,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, continue; if (f->fn_state & DN_S_ZOMBIE) continue; - if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->n, diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 11db0ecf342f..dfe42012a044 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -130,8 +130,7 @@ static int __init dn_rtmsg_init(void) .input = dnrmg_receive_user_skb, }; - dnrmg = netlink_kernel_create(&init_net, - NETLINK_DNRTMSG, THIS_MODULE, &cfg); + dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index d9507dd05818..9807945a56d9 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -259,7 +259,8 @@ static int __init init_dns_resolver(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, + keyring = key_alloc(&key_type_keyring, ".dns_resolver", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_NOT_IN_QUOTA); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 88e7c2f3fa0d..45295ca09571 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -370,7 +370,7 @@ static int dsa_remove(struct platform_device *pdev) if (dst->link_poll_needed) del_timer_sync(&dst->link_poll_timer); - flush_work_sync(&dst->link_poll_work); + flush_work(&dst->link_poll_work); for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 6a095225148e..6d42c17af96b 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1063,12 +1063,6 @@ out: return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); } -static void lowpan_dev_free(struct net_device *dev) -{ - dev_put(lowpan_dev_info(dev)->real_dev); - free_netdev(dev); -} - static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; @@ -1118,7 +1112,7 @@ static void lowpan_setup(struct net_device *dev) dev->netdev_ops = &lowpan_netdev_ops; dev->header_ops = &lowpan_header_ops; dev->ml_priv = &lowpan_mlme; - dev->destructor = lowpan_dev_free; + dev->destructor = free_netdev; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1133,6 +1127,8 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + struct sk_buff *local_skb; + if (!netif_running(dev)) goto drop; @@ -1144,7 +1140,12 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - lowpan_process_data(skb); + local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + goto drop; + lowpan_process_data(local_skb); + + kfree_skb(skb); break; default: break; @@ -1237,6 +1238,34 @@ static inline void __init lowpan_netlink_fini(void) rtnl_link_unregister(&lowpan_link_ops); } +static int lowpan_device_event(struct notifier_block *unused, + unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + LIST_HEAD(del_list); + struct lowpan_dev_record *entry, *tmp; + + if (dev->type != ARPHRD_IEEE802154) + goto out; + + if (event == NETDEV_UNREGISTER) { + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (lowpan_dev_info(entry->ldev)->real_dev == dev) + lowpan_dellink(entry->ldev, &del_list); + } + + unregister_netdevice_many(&del_list); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block lowpan_dev_notifier = { + .notifier_call = lowpan_device_event, +}; + static struct packet_type lowpan_packet_type = { .type = __constant_htons(ETH_P_IEEE802154), .func = lowpan_rcv, @@ -1251,6 +1280,12 @@ static int __init lowpan_init_module(void) goto out; dev_add_pack(&lowpan_packet_type); + + err = register_netdevice_notifier(&lowpan_dev_notifier); + if (err < 0) { + dev_remove_pack(&lowpan_packet_type); + lowpan_netlink_fini(); + } out: return err; } @@ -1263,6 +1298,8 @@ static void __exit lowpan_cleanup_module(void) dev_remove_pack(&lowpan_packet_type); + unregister_netdevice_notifier(&lowpan_dev_notifier); + /* Now 6lowpan packet_type is removed, so no new fragments are * expected on RX, therefore that's the time to clean incomplete * fragments. diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 1e9917124e75..96bb08abece2 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -246,7 +246,7 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_start_confirm); -static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) { void *hdr; @@ -534,7 +534,7 @@ static int ieee802154_list_iface(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, 0, dev); if (rc < 0) goto out_free; @@ -565,7 +565,7 @@ static int ieee802154_dump_iface(struct sk_buff *skb, if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) goto cont; - if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, + if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) break; cont: diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index d54be34cca94..22b1a7058fd3 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -35,7 +35,7 @@ #include "ieee802154.h" -static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct wpan_phy *phy) { void *hdr; @@ -105,7 +105,7 @@ static int ieee802154_list_phy(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, 0, phy); if (rc < 0) goto out_free; @@ -138,7 +138,7 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) return 0; rc = ieee802154_nl_fill_phy(data->skb, - NETLINK_CB(data->cb->skb).pid, + NETLINK_CB(data->cb->skb).portid, data->cb->nlh->nlmsg_seq, NLM_F_MULTI, phy); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ae2ccf2890e4..15ca63ec604e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o +obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe4582ca969a..766c59658563 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -212,6 +212,26 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { + /* Check special setups for testing purpose to enable TFO w/o + * requiring TCP_FASTOPEN sockopt. + * Note that only TCP sockets (SOCK_STREAM) will reach here. + * Also fastopenq may already been allocated because this + * socket was in TCP_LISTEN state previously but was + * shutdown() (rather than close()). + */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && + inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) { + if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) + err = fastopen_init_queue(sk, backlog); + else if ((sysctl_tcp_fastopen & + TFO_SERVER_WO_SOCKOPT2) != 0) + err = fastopen_init_queue(sk, + ((uint)sysctl_tcp_fastopen) >> 16); + else + err = 0; + if (err) + goto out; + } err = inet_csk_listen_start(sk, backlog); if (err) goto out; @@ -701,7 +721,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); + (TCPF_ESTABLISHED | TCPF_SYN_RECV | + TCPF_CLOSE_WAIT | TCPF_CLOSE))); sock_graft(sk2, newsock); @@ -1364,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (*(u8 *)iph != 0x45) goto out_unlock; - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + if (unlikely(ip_fast_csum((u8 *)iph, 5))) goto out_unlock; id = ntohl(*(__be32 *)&iph->id); @@ -1380,7 +1401,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, iph2 = ip_hdr(p); if ((iph->protocol ^ iph2->protocol) | - (iph->tos ^ iph2->tos) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; @@ -1390,6 +1410,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, /* All fields must match except length and checksum. */ NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | + (iph->tos ^ iph2->tos) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a0124eb7dbea..47800459e4cb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -827,7 +827,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input(skb, tip, sip, 0, dev) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; @@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44bf82e3aef7..2a6abc163ed2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -94,25 +94,22 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; -/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE - * value. So if you change this define, make appropriate changes to - * inet_addr_hash as well. - */ -#define IN4_ADDR_HSIZE 256 +#define IN4_ADDR_HSIZE_SHIFT 8 +#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) + static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; static DEFINE_SPINLOCK(inet_addr_hash_lock); -static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) +static u32 inet_addr_hash(struct net *net, __be32 addr) { - u32 val = (__force u32) addr ^ hash_ptr(net, 8); + u32 val = (__force u32) addr ^ net_hash_mix(net); - return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & - (IN4_ADDR_HSIZE - 1)); + return hash_32(val, IN4_ADDR_HSIZE_SHIFT); } static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) { - unsigned int hash = inet_addr_hash(net, ifa->ifa_local); + u32 hash = inet_addr_hash(net, ifa->ifa_local); spin_lock(&inet_addr_hash_lock); hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); @@ -136,18 +133,18 @@ static void inet_hash_remove(struct in_ifaddr *ifa) */ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { - unsigned int hash = inet_addr_hash(net, addr); + u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; struct hlist_node *node; rcu_read_lock(); hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { - struct net_device *dev = ifa->ifa_dev->dev; - - if (!net_eq(dev_net(dev), net)) - continue; if (ifa->ifa_local == addr) { + struct net_device *dev = ifa->ifa_dev->dev; + + if (!net_eq(dev_net(dev), net)) + continue; result = dev; break; } @@ -182,10 +179,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, static void devinet_sysctl_register(struct in_device *idev); static void devinet_sysctl_unregister(struct in_device *idev); #else -static inline void devinet_sysctl_register(struct in_device *idev) +static void devinet_sysctl_register(struct in_device *idev) { } -static inline void devinet_sysctl_unregister(struct in_device *idev) +static void devinet_sysctl_unregister(struct in_device *idev) { } #endif @@ -205,7 +202,7 @@ static void inet_rcu_free_ifa(struct rcu_head *head) kfree(ifa); } -static inline void inet_free_ifa(struct in_ifaddr *ifa) +static void inet_free_ifa(struct in_ifaddr *ifa) { call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } @@ -314,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) } static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy, struct nlmsghdr *nlh, u32 pid) + int destroy, struct nlmsghdr *nlh, u32 portid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -348,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -385,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -398,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { @@ -420,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -467,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; @@ -566,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; - __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -652,14 +649,14 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } /* * Determine a default network mask, based on the IP address. */ -static inline int inet_abc_len(__be32 addr) +static int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ @@ -725,7 +722,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCSIFFLAGS: - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; break; @@ -733,7 +730,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; ret = -EINVAL; @@ -1124,7 +1121,7 @@ skip: } } -static inline bool inetdev_valid_mtu(unsigned int mtu) +static bool inetdev_valid_mtu(unsigned int mtu) { return mtu >= 68; } @@ -1239,7 +1236,7 @@ static struct notifier_block ip_netdev_notifier = { .notifier_call = inetdev_event, }; -static inline size_t inet_nlmsg_size(void) +static size_t inet_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(4) /* IFA_ADDRESS */ @@ -1249,12 +1246,12 @@ static inline size_t inet_nlmsg_size(void) } static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -1316,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) { rcu_read_unlock(); @@ -1338,7 +1335,7 @@ done: } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct sk_buff *skb; u32 seq = nlh ? nlh->nlmsg_seq : 0; @@ -1350,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, if (skb == NULL) goto errout; - err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); return; errout: if (err < 0) @@ -1503,7 +1500,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) - rt_cache_flush(net, 0); + rt_cache_flush(net); } return ret; @@ -1537,7 +1534,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, dev_disable_lro(idev->dev); } rtnl_unlock(); - rt_cache_flush(net, 0); + rt_cache_flush(net); } } @@ -1554,7 +1551,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(net, 0); + rt_cache_flush(net); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8732cc7920ed..68c93d1bb03a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -148,7 +148,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(net, -1); + rt_cache_flush(net); } /* @@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { fl4.flowi4_oif = 0; - fl4.flowi4_iif = net->loopback_dev->ifindex; + fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.daddr = ip_hdr(skb)->saddr; fl4.saddr = 0; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); @@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_flags = rtm->rtm_flags; cfg->fc_nlflags = nlh->nlmsg_flags; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = net; @@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb) struct fib_result_nl *frn; struct nlmsghdr *nlh; struct fib_table *tb; - u32 pid; + u32 portid; net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); @@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb) nl_fib_lookup(frn, tb); - pid = NETLINK_CB(skb).pid; /* pid of sending process */ - NETLINK_CB(skb).pid = 0; /* from kernel */ + portid = NETLINK_CB(skb).portid; /* pid of sending process */ + NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); + netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); } static int __net_init nl_fib_lookup_init(struct net *net) @@ -986,7 +986,7 @@ static int __net_init nl_fib_lookup_init(struct net *net) .input = nl_fib_input, }; - sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, THIS_MODULE, &cfg); + sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg); if (sk == NULL) return -EAFNOSUPPORT; net->ipv4.fibnl = sk; @@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force, int delay) +static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), delay); + rt_cache_flush(dev_net(dev)); arp_ifdown(dev); } @@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, 1, 0); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); } break; } @@ -1041,16 +1041,16 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev; struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2, -1); + fib_disable_ip(dev, 2); + rt_flush_dev(dev); return NOTIFY_DONE; } - if (!in_dev) - return NOTIFY_DONE; + in_dev = __in_dev_get_rtnl(dev); switch (event) { case NETDEV_UP: @@ -1061,16 +1061,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(net); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0, 0); + fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(dev_net(dev), 0); - break; - case NETDEV_UNREGISTER_BATCH: + rt_cache_flush(net); break; } return NOTIFY_DONE; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index a83d74e498d2..274309d3aded 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(ops->fro_net, -1); + rt_cache_flush(ops->fro_net); } static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e55171f184f9..267753060ffc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }, }; +static void rt_fibinfo_free(struct rtable __rcu **rtp) +{ + struct rtable *rt = rcu_dereference_protected(*rtp, 1); + + if (!rt) + return; + + /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); + * because we waited an RCU grace period before calling + * free_fib_info_rcu() + */ + + dst_free(&rt->dst); +} + static void free_nh_exceptions(struct fib_nh *nh) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; @@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh) struct fib_nh_exception *next; next = rcu_dereference_protected(fnhe->fnhe_next, 1); + + rt_fibinfo_free(&fnhe->fnhe_rth); + kfree(fnhe); fnhe = next; @@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh) kfree(hash); } +static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) +{ + int cpu; + + if (!rtp) + return; + + for_each_possible_cpu(cpu) { + struct rtable *rt; + + rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); + if (rt) + dst_free(&rt->dst); + } + free_percpu(rtp); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - if (nexthop_nh->nh_rth_output) - dst_release(&nexthop_nh->nh_rth_output->dst); - if (nexthop_nh->nh_rth_input) - dst_release(&nexthop_nh->nh_rth_input->dst); + rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); + rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); release_net(fi->fib_net); @@ -281,6 +314,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && + nfi->fib_type == fi->fib_type && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && @@ -358,7 +392,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, if (skb == NULL) goto errout; - err = fib_dump_info(skb, info->pid, seq, event, tb_id, + err = fib_dump_info(skb, info->portid, seq, event, tb_id, fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { @@ -367,7 +401,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, kfree_skb(skb); goto errout; } - rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, + rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); return; errout: @@ -800,10 +834,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; + fi->fib_type = cfg->fc_type; fi->fib_nhs = nhs; change_nexthops(fi) { nexthop_nh->nh_parent = fi; + nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); } endfor_nexthops(fi) if (cfg->fc_mx) { @@ -955,14 +991,14 @@ failure: return ERR_PTR(err); } -int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 18cbc15b20d5..31d771ca9a70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -159,7 +159,6 @@ struct trie { #endif }; -static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull); static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); @@ -368,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head) static inline void free_leaf(struct leaf *l) { - call_rcu_bh(&l->rcu, __leaf_free_rcu); + call_rcu(&l->rcu, __leaf_free_rcu); } static inline void free_leaf_info(struct leaf_info *leaf) @@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct rt_trie_node) << bits); + sizeof(struct rt_trie_node *) << bits); return tn; } @@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node * return ((struct tnode *) n)->pos == tn->pos + tn->bits; } -static inline void put_child(struct trie *t, struct tnode *tn, int i, +static inline void put_child(struct tnode *tn, int i, struct rt_trie_node *n) { tnode_put_child_reorg(tn, i, n, -1); @@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) goto nomem; } - put_child(t, tn, 2*i, (struct rt_trie_node *) left); - put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); + put_child(tn, 2*i, (struct rt_trie_node *) left); + put_child(tn, 2*i+1, (struct rt_trie_node *) right); } } @@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, 1) == 0) - put_child(t, tn, 2*i, node); + put_child(tn, 2*i, node); else - put_child(t, tn, 2*i+1, node); + put_child(tn, 2*i+1, node); continue; } @@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) inode = (struct tnode *) node; if (inode->bits == 1) { - put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); - put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); + put_child(tn, 2*i, rtnl_dereference(inode->child[0])); + put_child(tn, 2*i+1, rtnl_dereference(inode->child[1])); tnode_free_safe(inode); continue; @@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) */ left = (struct tnode *) tnode_get_child(tn, 2*i); - put_child(t, tn, 2*i, NULL); + put_child(tn, 2*i, NULL); BUG_ON(!left); right = (struct tnode *) tnode_get_child(tn, 2*i+1); - put_child(t, tn, 2*i+1, NULL); + put_child(tn, 2*i+1, NULL); BUG_ON(!right); size = tnode_child_length(left); for (j = 0; j < size; j++) { - put_child(t, left, j, rtnl_dereference(inode->child[j])); - put_child(t, right, j, rtnl_dereference(inode->child[j + size])); + put_child(left, j, rtnl_dereference(inode->child[j])); + put_child(right, j, rtnl_dereference(inode->child[j + size])); } - put_child(t, tn, 2*i, resize(t, left)); - put_child(t, tn, 2*i+1, resize(t, right)); + put_child(tn, 2*i, resize(t, left)); + put_child(tn, 2*i+1, resize(t, right)); tnode_free_safe(inode); } @@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) if (!newn) goto nomem; - put_child(t, tn, i/2, (struct rt_trie_node *)newn); + put_child(tn, i/2, (struct rt_trie_node *)newn); } } @@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) if (left == NULL) { if (right == NULL) /* Both are empty */ continue; - put_child(t, tn, i/2, right); + put_child(tn, i/2, right); continue; } if (right == NULL) { - put_child(t, tn, i/2, left); + put_child(tn, i/2, left); continue; } /* Two nonempty children */ newBinNode = (struct tnode *) tnode_get_child(tn, i/2); - put_child(t, tn, i/2, NULL); - put_child(t, newBinNode, 0, left); - put_child(t, newBinNode, 1, right); - put_child(t, tn, i/2, resize(t, newBinNode)); + put_child(tn, i/2, NULL); + put_child(newBinNode, 0, left); + put_child(newBinNode, 1, right); + put_child(tn, i/2, resize(t, newBinNode)); } tnode_free_safe(oldtnode); return tn; @@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) node_set_parent((struct rt_trie_node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, tp, cindex, (struct rt_trie_node *)l); + put_child(tp, cindex, (struct rt_trie_node *)l); } else { /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* @@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) node_set_parent((struct rt_trie_node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); - put_child(t, tn, missbit, (struct rt_trie_node *)l); - put_child(t, tn, 1-missbit, n); + put_child(tn, missbit, (struct rt_trie_node *)l); + put_child(tn, 1-missbit, n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, tp, cindex, (struct rt_trie_node *)tn); + put_child(tp, cindex, (struct rt_trie_node *)tn); } else { rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; @@ -1287,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1334,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1551,7 +1550,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, * state.directly. */ if (pref_mismatch) { - int mp = KEYLENGTH - fls(pref_mismatch); + /* fls(x) = __fls(x) + 1 */ + int mp = KEYLENGTH - __fls(pref_mismatch) - 1; if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) goto backtrace; @@ -1619,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); - put_child(t, tp, cindex, NULL); + put_child(tp, cindex, NULL); trie_rebalance(t, tp); } else RCU_INIT_POINTER(t->trie, NULL); @@ -1656,7 +1656,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!l) return -ESRCH; - fa_head = get_fa_head(l, plen); + li = find_leaf_info(l, plen); + + if (!li) + return -ESRCH; + + fa_head = &li->falh; fa = fib_find_alias(fa_head, tos, 0); if (!fa) @@ -1692,9 +1697,6 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); - l = fib_find_node(t, key); - li = find_leaf_info(l, plen); - list_del_rcu(&fa->fa_list); if (!plen) @@ -1709,7 +1711,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -1871,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6699f23e6f55..736ab70fd179 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -815,14 +815,15 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) return 1; } -static void igmp_heard_report(struct in_device *in_dev, __be32 group) +/* return true if packet was dropped */ +static bool igmp_heard_report(struct in_device *in_dev, __be32 group) { struct ip_mc_list *im; /* Timers are only set for non-local groups */ if (group == IGMP_ALL_HOSTS) - return; + return false; rcu_read_lock(); for_each_pmc_rcu(in_dev, im) { @@ -832,9 +833,11 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) } } rcu_read_unlock(); + return false; } -static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, +/* return true if packet was dropped */ +static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int len) { struct igmphdr *ih = igmp_hdr(skb); @@ -866,7 +869,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, /* clear deleted report items */ igmpv3_clear_delrec(in_dev); } else if (len < 12) { - return; /* ignore bogus packet; freed by caller */ + return true; /* ignore bogus packet; freed by caller */ } else if (IGMP_V1_SEEN(in_dev)) { /* This is a v3 query with v1 queriers present */ max_delay = IGMP_Query_Response_Interval; @@ -883,13 +886,13 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) - return; + return true; ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) { if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) + ntohs(ih3->nsrcs)*sizeof(__be32))) - return; + return true; ih3 = igmpv3_query_hdr(skb); } @@ -901,9 +904,9 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qrv = ih3->qrv; if (!group) { /* general query */ if (ih3->nsrcs) - return; /* no sources allowed */ + return false; /* no sources allowed */ igmp_gq_start_timer(in_dev); - return; + return false; } /* mark sources to include, if group & source-specific */ mark = ih3->nsrcs != 0; @@ -939,6 +942,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, igmp_mod_timer(im, max_delay); } rcu_read_unlock(); + return false; } /* called in rcu_read_lock() section */ @@ -948,6 +952,7 @@ int igmp_rcv(struct sk_buff *skb) struct igmphdr *ih; struct in_device *in_dev = __in_dev_get_rcu(skb->dev); int len = skb->len; + bool dropped = true; if (in_dev == NULL) goto drop; @@ -969,7 +974,7 @@ int igmp_rcv(struct sk_buff *skb) ih = igmp_hdr(skb); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_QUERY: - igmp_heard_query(in_dev, skb, len); + dropped = igmp_heard_query(in_dev, skb, len); break; case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: @@ -979,7 +984,7 @@ int igmp_rcv(struct sk_buff *skb) /* don't rely on MC router hearing unicast reports */ if (skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST) - igmp_heard_report(in_dev, ih->group); + dropped = igmp_heard_report(in_dev, ih->group); break; case IGMP_PIM: #ifdef CONFIG_IP_PIMSM_V1 @@ -997,7 +1002,10 @@ int igmp_rcv(struct sk_buff *skb) } drop: - kfree_skb(skb); + if (dropped) + kfree_skb(skb); + else + consume_skb(skb); return 0; } @@ -1896,6 +1904,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_unlock(); return ret; } +EXPORT_SYMBOL(ip_mc_leave_group); int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mreq_source *mreqs, int ifindex) @@ -2435,6 +2444,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) struct ip_mc_list *im = (struct ip_mc_list *)v; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; + long delta; + #ifdef CONFIG_IP_MULTICAST querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : IGMP_V2_SEEN(state->in_dev) ? "V2" : @@ -2448,11 +2459,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } + delta = im->timer.expires - jiffies; seq_printf(seq, "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", im->multiaddr, im->users, - im->tm_running, im->tm_running ? - jiffies_to_clock_t(im->timer.expires-jiffies) : 0, + im->tm_running, + im->tm_running ? jiffies_delta_to_clock_t(delta) : 0, im->reporter); } return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index db0cf17c00f7..f0c5b9c1a957 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct sock *newsk; + struct request_sock *req; int error; lock_sock(sk); @@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) goto out_err; /* Find already established connection */ - if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { + if (reqsk_queue_empty(queue)) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ @@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) if (error) goto out_err; } - - newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - WARN_ON(newsk->sk_state == TCP_SYN_RECV); + req = reqsk_queue_remove(queue); + newsk = req->sk; + + sk_acceptq_removed(sk); + if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { + spin_lock_bh(&queue->fastopenq->lock); + if (tcp_rsk(req)->listener) { + /* We are still waiting for the final ACK from 3WHS + * so can't free req now. Instead, we set req->sk to + * NULL to signify that the child socket is taken + * so reqsk_fastopen_remove() will free the req + * when 3WHS finishes (or is aborted). + */ + req->sk = NULL; + req = NULL; + } + spin_unlock_bh(&queue->fastopenq->lock); + } out: release_sock(sk); + if (req) + __reqsk_free(req); return newsk; out_err: newsk = NULL; + req = NULL; *err = error; goto out; } @@ -404,12 +424,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *newinet = inet_sk(newsk); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct net *net = sock_net(sk); struct flowi4 *fl4; struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; + + rcu_read_lock(); + opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -421,11 +444,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } @@ -715,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); void inet_csk_listen_stop(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct request_sock *acc_req; struct request_sock *req; inet_csk_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); + acc_req = reqsk_queue_yank_acceptq(queue); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) @@ -731,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk) * To be honest, we are not able to make either * of the variants now. --ANK */ - reqsk_queue_destroy(&icsk->icsk_accept_queue); + reqsk_queue_destroy(queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; @@ -749,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); + if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { + BUG_ON(tcp_sk(child)->fastopen_rsk != req); + BUG_ON(sk != tcp_rsk(req)->listener); + + /* Paranoid, to prevent race condition if + * an inbound pkt destined for child is + * blocked by sock lock in tcp_v4_rcv(). + * Also to satisfy an assertion in + * tcp_v4_destroy_sock(). + */ + tcp_sk(child)->fastopen_rsk = NULL; + sock_put(sk); + } inet_csk_destroy_sock(child); bh_unlock_sock(child); @@ -758,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); __reqsk_free(req); } + if (queue->fastopenq != NULL) { + /* Free all the reqs queued in rskq_rst_head. */ + spin_lock_bh(&queue->fastopenq->lock); + acc_req = queue->fastopenq->rskq_rst_head; + queue->fastopenq->rskq_rst_head = NULL; + spin_unlock_bh(&queue->fastopenq->lock); + while ((req = acc_req) != NULL) { + acc_req = req->dl_next; + __reqsk_free(req); + } + } WARN_ON(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 570e61f9611f..535584c00f91 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -69,7 +69,8 @@ static inline void inet_diag_unlock_handler( int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 pid, u32 seq, u16 nlmsg_flags, + struct user_namespace *user_ns, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); @@ -83,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -124,7 +125,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } #endif - r->idiag_uid = sock_i_uid(sk); + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -199,23 +200,24 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 pid, u32 seq, u16 nlmsg_flags, + struct user_namespace *user_ns, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, pid, seq, nlmsg_flags, unlh); + skb, req, user_ns, portid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -256,14 +258,16 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req_v2 *r, + struct user_namespace *user_ns, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, pid, seq, nlmsg_flags, + skb, r, portid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -311,14 +315,15 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s } err = sk_diag_fill(sk, rep, req, - NETLINK_CB(in_skb).pid, + sk_user_ns(NETLINK_CB(in_skb).ssk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -551,7 +556,8 @@ static int inet_csk_diag_dump(struct sock *sk, return 0; return inet_csk_diag_fill(sk, skb, r, - NETLINK_CB(cb->skb).pid, + sk_user_ns(NETLINK_CB(cb->skb).ssk), + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -586,12 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, } return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, u32 pid, u32 seq, + struct request_sock *req, + struct user_namespace *user_ns, + u32 portid, u32 seq, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -600,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; @@ -625,7 +633,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; - r->idiag_uid = sock_i_uid(sk); + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { @@ -702,7 +710,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } err = inet_diag_fill_req(skb, sk, req, - NETLINK_CB(cb->skb).pid, + sk_user_ns(NETLINK_CB(cb->skb).ssk), + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { cb->args[3] = j + 1; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 85190e69297b..4750d2b74d79 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -89,7 +89,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) nf->low_thresh = 0; local_bh_disable(); - inet_frag_evictor(nf, f); + inet_frag_evictor(nf, f, true); local_bh_enable(); } EXPORT_SYMBOL(inet_frags_exit_net); @@ -158,11 +158,16 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, } EXPORT_SYMBOL(inet_frag_destroy); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f) +int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) { struct inet_frag_queue *q; int work, evicted = 0; + if (!force) { + if (atomic_read(&nf->mem) <= nf->high_thresh) + return 0; + } + work = atomic_read(&nf->mem) - nf->low_thresh; while (work > 0) { read_lock(&f->lock); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e8fd34..000e3d239d64 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -194,7 +194,7 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); + INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } static int addr_compare(const struct inetpeer_addr *a, @@ -510,7 +510,10 @@ relookup: secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; - p->rate_last = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7ad88e5e7110..448e68546827 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -219,7 +219,7 @@ static void ip_evictor(struct net *net) { int evicted; - evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); + evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); if (evicted) IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } @@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) /* skb dst is stale, drop it, and perform route lookup again */ skb_dst_drop(head); iph = ip_hdr(head); - err = ip_route_input(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); if (err) goto out_rcu_unlock; @@ -523,6 +523,10 @@ found: if (offset == 0) qp->q.last_in |= INET_FRAG_FIRST_IN; + if (ip_hdr(skb)->frag_off & htons(IP_DF) && + skb->len + ihl > qp->q.max_size) + qp->q.max_size = skb->len + ihl; + if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) return ip_frag_reasm(qp, prev, dev); @@ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->next = NULL; head->dev = dev; head->tstamp = qp->q.stamp; + IPCB(head)->frag_max_size = qp->q.max_size; iph = ip_hdr(head); - iph->frag_off = 0; + /* max_size != 0 implies at least one fragment had IP_DF set */ + iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; iph->tot_len = htons(len); iph->tos |= ecn; IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); @@ -678,8 +684,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) - ip_evictor(net); + ip_evictor(net); /* Lookup (or create) queue header */ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b062a98574f2..7240f8e2dd45 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -120,6 +120,10 @@ Alexey Kuznetsov. */ +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void ipgre_tunnel_setup(struct net_device *dev); @@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, tot->rx_crc_errors = dev->stats.rx_crc_errors; tot->rx_fifo_errors = dev->stats.rx_fifo_errors; tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; tot->tx_carrier_errors = dev->stats.tx_carrier_errors; tot->tx_dropped = dev->stats.tx_dropped; @@ -214,11 +220,25 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, return tot; } +/* Does key in tunnel parameters match packet */ +static bool ipgre_key_match(const struct ip_tunnel_parm *p, + __be16 flags, __be32 key) +{ + if (p->i_flags & GRE_KEY) { + if (flags & GRE_KEY) + return key == p->i_key; + else + return false; /* key expected, none present */ + } else + return !(flags & GRE_KEY); +} + /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, __be32 remote, __be32 local, - __be32 key, __be16 gre_proto) + __be16 flags, __be32 key, + __be16 gre_proto) { struct net *net = dev_net(dev); int link = dev->ifindex; @@ -233,10 +253,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { if (local != t->parms.iph.saddr || remote != t->parms.iph.daddr || - key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; + if (!ipgre_key_match(&t->parms, flags, key)) + continue; + if (t->dev->type != ARPHRD_IPGRE && t->dev->type != dev_type) continue; @@ -257,10 +279,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { if (remote != t->parms.iph.daddr || - key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; + if (!ipgre_key_match(&t->parms, flags, key)) + continue; + if (t->dev->type != ARPHRD_IPGRE && t->dev->type != dev_type) continue; @@ -283,10 +307,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, if ((local != t->parms.iph.saddr && (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) || - key != t->parms.i_key || !(t->dev->flags & IFF_UP)) continue; + if (!ipgre_key_match(&t->parms, flags, key)) + continue; + if (t->dev->type != ARPHRD_IPGRE && t->dev->type != dev_type) continue; @@ -489,6 +515,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; __be16 flags; + __be32 key = 0; flags = p[0]; if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { @@ -505,6 +532,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) if (skb_headlen(skb) < grehlen) return; + if (flags & GRE_KEY) + key = *(((__be32 *)p) + (grehlen / 4) - 1); + switch (type) { default: case ICMP_PARAMETERPROB: @@ -533,49 +563,34 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + flags, key, p[1]); + if (t == NULL) - goto out; + return; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, t->parms.link, 0, IPPROTO_GRE, 0); - goto out; + return; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_GRE, 0); - goto out; + return; } if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - goto out; + return; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - goto out; + return; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; -out: - rcu_read_unlock(); -} - -static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) -{ - if (INET_ECN_is_ce(iph->tos)) { - if (skb->protocol == htons(ETH_P_IP)) { - IP_ECN_set_ce(ip_hdr(skb)); - } else if (skb->protocol == htons(ETH_P_IPV6)) { - IP6_ECN_set_ce(ipv6_hdr(skb)); - } - } } static inline u8 @@ -600,9 +615,10 @@ static int ipgre_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; int offset = 4; __be16 gre_proto; + int err; if (!pskb_may_pull(skb, 16)) - goto drop_nolock; + goto drop; iph = ip_hdr(skb); h = skb->data; @@ -613,7 +629,7 @@ static int ipgre_rcv(struct sk_buff *skb) - We do not support routing headers. */ if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop_nolock; + goto drop; if (flags&GRE_CSUM) { switch (skb->ip_summed) { @@ -641,10 +657,10 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - rcu_read_lock(); - if ((tunnel = ipgre_tunnel_lookup(skb->dev, - iph->saddr, iph->daddr, key, - gre_proto))) { + tunnel = ipgre_tunnel_lookup(skb->dev, + iph->saddr, iph->daddr, flags, key, + gre_proto); + if (tunnel) { struct pcpu_tstats *tstats; secpath_reset(skb); @@ -703,27 +719,33 @@ static int ipgre_rcv(struct sk_buff *skb) skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - ipgre_ecn_decapsulate(iph, skb); - - netif_rx(skb); - - rcu_read_unlock(); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: - rcu_read_unlock(); -drop_nolock: kfree_skb(skb); return 0; } @@ -745,6 +767,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) + goto tx_error; + if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -1292,10 +1318,18 @@ static const struct net_device_ops ipgre_netdev_ops = { static void ipgre_dev_free(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + + gro_cells_destroy(&tunnel->gro_cells); free_percpu(dev->tstats); free_netdev(dev); } +#define GRE_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; @@ -1309,12 +1343,16 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel; struct iphdr *iph; + int err; tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; @@ -1341,6 +1379,12 @@ static int ipgre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + err = gro_cells_init(&tunnel->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 93134b0eab0c..f1395a6fb35f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,6 +314,7 @@ drop: } int sysctl_ip_early_demux __read_mostly = 1; +EXPORT_SYMBOL(sysctl_ip_early_demux); static int ip_rcv_finish(struct sk_buff *skb) { @@ -324,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct net_protocol *ipprot; int protocol = iph->protocol; - rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) { ipprot->early_demux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } - rcu_read_unlock(); } /* @@ -339,8 +338,8 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { - int err = ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ba39a52d18c1..24a29a39e9a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); - if (neigh) { + if (!IS_ERR(neigh)) { int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); @@ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); - if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || + (IPCB(skb)->frag_max_size && + IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(ip_skb_dst_mtu(skb))); @@ -791,6 +793,7 @@ static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork, + struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -985,47 +988,30 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = cork->page; - int off = cork->off; - unsigned int left; - - if (page && (left = PAGE_SIZE - off) > 0) { - if (copy >= left) - copy = left; - if (page != skb_frag_page(frag)) { - if (i == MAX_SKB_FRAGS) { - err = -EMSGSIZE; - goto error; - } - skb_fill_page_desc(skb, i, page, off, 0); - skb_frag_ref(skb, i); - frag = &skb_shinfo(skb)->frags[i]; - } - } else if (i < MAX_SKB_FRAGS) { - if (copy > PAGE_SIZE) - copy = PAGE_SIZE; - page = alloc_pages(sk->sk_allocation, 0); - if (page == NULL) { - err = -ENOMEM; - goto error; - } - cork->page = page; - cork->off = 0; - skb_fill_page_desc(skb, i, page, 0, 0); - frag = &skb_shinfo(skb)->frags[i]; - } else { - err = -EMSGSIZE; - goto error; - } - if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag), - offset, copy, skb->len, skb) < 0) { - err = -EFAULT; + err = -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) goto error; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + err = -EMSGSIZE; + if (i == MAX_SKB_FRAGS) + goto error; + + __skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, 0); + skb_shinfo(skb)->nr_frags = ++i; + get_page(pfrag->page); } - cork->off += copy; - skb_frag_size_add(frag, copy); + copy = min_t(int, copy, pfrag->size - pfrag->offset); + if (getfrag(from, + page_address(pfrag->page) + pfrag->offset, + offset, copy, skb->len, skb) < 0) + goto error_efault; + + pfrag->offset += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1037,6 +1023,8 @@ alloc_new_skb: return 0; +error_efault: + err = -EFAULT; error: cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); @@ -1077,8 +1065,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->dst = &rt->dst; cork->length = 0; cork->tx_flags = ipc->tx_flags; - cork->page = NULL; - cork->off = 0; return 0; } @@ -1115,7 +1101,8 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4, transhdrlen = 0; } - return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, + return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, + sk_page_frag(sk), getfrag, from, length, transhdrlen, flags); } @@ -1338,10 +1325,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ihl = 5; iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); + ip_select_ident(iph, &rt->dst, sk); if (opt) { iph->ihl += opt->optlen>>2; @@ -1366,9 +1353,8 @@ out: return skb; } -int ip_send_skb(struct sk_buff *skb) +int ip_send_skb(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); int err; err = ip_local_out(skb); @@ -1391,7 +1377,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) return 0; /* Netfilter gets whole the not fragmented skb. */ - return ip_send_skb(skb); + return ip_send_skb(sock_net(sk), skb); } /* @@ -1438,7 +1424,8 @@ struct sk_buff *ip_make_skb(struct sock *sk, if (err) return ERR_PTR(err); - err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, + err = __ip_append_data(sk, fl4, &queue, &cork, + ¤t->task_frag, getfrag, from, length, transhdrlen, flags); if (err) { __ip_flush_pending_frames(sk, &queue, &cork); @@ -1536,6 +1523,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; + skb_orphan(nskb); skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3511ffba7bd4..978bca4818ae 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -304,7 +304,6 @@ static int vti_err(struct sk_buff *skb, u32 info) err = -ENOENT; - rcu_read_lock(); t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL) goto out; @@ -326,7 +325,6 @@ static int vti_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); return err; } @@ -336,7 +334,6 @@ static int vti_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - rcu_read_lock(); tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; @@ -348,10 +345,8 @@ static int vti_rcv(struct sk_buff *skb) u64_stats_update_end(&tstats->syncp); skb->dev = tunnel->dev; - rcu_read_unlock(); return 1; } - rcu_read_unlock(); return -1; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 67e8a6b086ea..798358b10717 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -583,6 +583,17 @@ static void __init ic_rarp_send_if(struct ic_device *d) #endif /* + * Predefine Nameservers + */ +static inline void __init ic_nameservers_predef(void) +{ + int i; + + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + ic_nameservers[i] = NONE; +} + +/* * DHCP/BOOTP support. */ @@ -747,10 +758,7 @@ static void __init ic_bootp_init_ext(u8 *e) */ static inline void __init ic_bootp_init(void) { - int i; - - for (i = 0; i < CONF_NAMESERVERS_MAX; i++) - ic_nameservers[i] = NONE; + ic_nameservers_predef(); dev_add_pack(&bootp_packet_type); } @@ -1379,6 +1387,7 @@ static int __init ip_auto_config(void) int retries = CONF_OPEN_RETRIES; #endif int err; + unsigned int i; #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1499,7 +1508,15 @@ static int __init ip_auto_config(void) &ic_servaddr, &root_server_addr, root_server_path); if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); - pr_cont("\n"); + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) { + pr_info(" nameserver%u=%pI4", + i, &ic_nameservers[i]); + break; + } + for (i++; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) + pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); #endif /* !SILENT */ return 0; @@ -1570,6 +1587,8 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + ic_nameservers_predef(); + /* Parse string for static IP assignment. */ ip = addrs; while (ip && *ip) { @@ -1613,6 +1632,20 @@ static int __init ip_auto_config_setup(char *addrs) ic_enable = 0; } break; + case 7: + if (CONF_NAMESERVERS_MAX >= 1) { + ic_nameservers[0] = in_aton(ip); + if (ic_nameservers[0] == ANY) + ic_nameservers[0] = NONE; + } + break; + case 8: + if (CONF_NAMESERVERS_MAX >= 2) { + ic_nameservers[1] = in_aton(ip); + if (ic_nameservers[1] == ANY) + ic_nameservers[1] = NONE; + } + break; } } ip = cp; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 99af1f0cc658..e15b45297c09 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -120,6 +120,10 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static int ipip_net_id __read_mostly; struct ipip_net { struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; @@ -365,8 +369,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) } err = -ENOENT; - - rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL) goto out; @@ -398,34 +400,22 @@ static int ipip_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); - return err; -} - -static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, - struct sk_buff *skb) -{ - struct iphdr *inner_iph = ip_hdr(skb); - if (INET_ECN_is_ce(outer_iph->tos)) - IP_ECN_set_ce(inner_iph); + return err; } static int ipip_rcv(struct sk_buff *skb) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); + int err; - rcu_read_lock(); tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - rcu_read_unlock(); - kfree_skb(skb); - return 0; - } + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; secpath_reset(skb); @@ -434,24 +424,35 @@ static int ipip_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; + __skb_tunnel_rx(skb, tunnel->dev); + + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - ipip_ecn_decapsulate(iph, skb); - netif_rx(skb); - - rcu_read_unlock(); return 0; } - rcu_read_unlock(); return -1; + +drop: + kfree_skb(skb); + return 0; } /* diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4a0536..1daa95c2a0ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static struct mr_table *ipmr_new_table(struct net *net, u32 id); +static void ipmr_free_table(struct mr_table *mrt); + static int ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local); @@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct mr_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net) list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - kfree(mrt); + ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); } @@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { - kfree(net->ipv4.mrt); + ipmr_free_table(net->ipv4.mrt); } #endif @@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) return mrt; } +static void ipmr_free_table(struct mr_table *mrt) +{ + del_timer_sync(&mrt->ipmr_expire_timer); + mroute_clean_tables(mrt); + kfree(mrt); +} + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) @@ -616,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { kfree_skb(skb); } @@ -860,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { ip_mr_forward(net, mrt, skb, c, 0); } @@ -1798,7 +1808,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? - net->loopback_dev->ifindex : + LOOPBACK_IFINDEX : skb->dev->ifindex), .flowi4_mark = skb->mark, }; @@ -2107,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc_cache *c) + u32 portid, u32 seq, struct mfc_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2166,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ed1b36783192..4c0cf63dd92e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -72,43 +72,6 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) } EXPORT_SYMBOL(ip_route_me_harder); -#ifdef CONFIG_XFRM -int ip_xfrm_me_harder(struct sk_buff *skb) -{ - struct flowi fl; - unsigned int hh_len; - struct dst_entry *dst; - - if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) - return 0; - if (xfrm_decode_session(skb, &fl, AF_INET) < 0) - return -1; - - dst = skb_dst(skb); - if (dst->xfrm) - dst = ((struct xfrm_dst *)dst)->route; - dst_hold(dst); - - dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); - if (IS_ERR(dst)) - return -1; - - skb_dst_drop(skb); - skb_dst_set(skb, dst); - - /* Change in oif may mean change in hh_len. */ - hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) - return -1; - return 0; -} -EXPORT_SYMBOL(ip_xfrm_me_harder); -#endif - -void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); -EXPORT_SYMBOL(ip_nat_decode_session); - /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. @@ -225,12 +188,12 @@ static const struct nf_afinfo nf_ip_afinfo = { .route_key_size = sizeof(struct ip_rt_info), }; -static int ipv4_netfilter_init(void) +static int __init ipv4_netfilter_init(void) { return nf_register_afinfo(&nf_ip_afinfo); } -static void ipv4_netfilter_fini(void) +static void __exit ipv4_netfilter_fini(void) { nf_unregister_afinfo(&nf_ip_afinfo); } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fcc543cd987a..d8d6f2a5bf12 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -143,25 +143,22 @@ config IP_NF_TARGET_ULOG To compile it as a module, choose M here. If unsure, say N. # NAT + specific targets: nf_conntrack -config NF_NAT - tristate "Full NAT" +config NF_NAT_IPV4 + tristate "IPv4 NAT" depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n + select NF_NAT help - The Full NAT option allows masquerading, port forwarding and other + The IPv4 NAT option allows masquerading, port forwarding and other forms of full Network Address Port Translation. It is controlled by the `nat' table in iptables: see the man page for iptables(8). To compile it as a module, choose M here. If unsure, say N. -config NF_NAT_NEEDED - bool - depends on NF_NAT - default y +if NF_NAT_IPV4 config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - depends on NF_NAT default m if NETFILTER_ADVANCED=n help Masquerading is a special case of NAT: all outgoing connections are @@ -174,30 +171,27 @@ config IP_NF_TARGET_MASQUERADE config IP_NF_TARGET_NETMAP tristate "NETMAP target support" - depends on NF_NAT depends on NETFILTER_ADVANCED - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_TARGET_NETMAP + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_NETMAP. config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" - depends on NF_NAT depends on NETFILTER_ADVANCED - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. + select NETFILTER_XT_TARGET_REDIRECT + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_REDIRECT. - To compile it as a module, choose M here. If unsure, say N. +endif config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" - depends on NF_CONNTRACK_SNMP && NF_NAT + depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4 depends on NETFILTER_ADVANCED default NF_NAT && NF_CONNTRACK_SNMP ---help--- @@ -219,61 +213,21 @@ config NF_NAT_SNMP_BASIC # <expr> '&&' <expr> (6) # # (6) Returns the result of min(/expr/, /expr/). -config NF_NAT_PROTO_DCCP - tristate - depends on NF_NAT && NF_CT_PROTO_DCCP - default NF_NAT && NF_CT_PROTO_DCCP config NF_NAT_PROTO_GRE tristate - depends on NF_NAT && NF_CT_PROTO_GRE - -config NF_NAT_PROTO_UDPLITE - tristate - depends on NF_NAT && NF_CT_PROTO_UDPLITE - default NF_NAT && NF_CT_PROTO_UDPLITE - -config NF_NAT_PROTO_SCTP - tristate - default NF_NAT && NF_CT_PROTO_SCTP - depends on NF_NAT && NF_CT_PROTO_SCTP - select LIBCRC32C - -config NF_NAT_FTP - tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_FTP - -config NF_NAT_IRC - tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_IRC - -config NF_NAT_TFTP - tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_TFTP - -config NF_NAT_AMANDA - tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_AMANDA + depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE config NF_NAT_PPTP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_PPTP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_PPTP select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_H323 - -config NF_NAT_SIP - tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_SIP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_H323 # mangle + specific targets config IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c20674dc9452..007b128eecc9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -10,32 +10,22 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o endif endif -nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o -iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o - # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o -obj-$(CONFIG_NF_NAT) += nf_nat.o +nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o +obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o # NAT helpers (nf_conntrack) -obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o -obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o -obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o -obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o -obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o -obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o -obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o @@ -43,7 +33,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_NF_NAT) += iptable_nat.o +obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o @@ -55,8 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o -obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index cbb6a1a6f6f7..5d5d4d1be9c2 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -19,9 +19,9 @@ #include <net/ip.h> #include <net/checksum.h> #include <net/route.h> -#include <net/netfilter/nf_nat_rule.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_nat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -49,7 +49,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; - struct nf_nat_ipv4_range newrange; + struct nf_nat_range newrange; const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; __be32 newsrc, nh; @@ -80,10 +80,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat->masq_index = par->out->ifindex; /* Transfer from original range. */ - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - newsrc, newsrc, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newsrc; + newrange.max_addr.ip = newsrc; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); @@ -96,7 +99,8 @@ device_cmp(struct nf_conn *i, void *ifindex) if (!nat) return 0; - + if (nf_ct_l3num(i) != NFPROTO_IPV4) + return 0; return nat->masq_index == (int)(long)ifindex; } diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c deleted file mode 100644 index b5bfbbabf70d..000000000000 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ /dev/null @@ -1,98 +0,0 @@ -/* NETMAP - static NAT mapping of IP network addresses (1:1). - * The mapping can be applied to source (POSTROUTING), - * destination (PREROUTING), or both (with separate rules). - */ - -/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter/x_tables.h> -#include <net/netfilter/nf_nat_rule.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); -MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); - -static int netmap_tg_check(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { - pr_debug("bad MAP_IPS.\n"); - return -EINVAL; - } - if (mr->rangesize != 1) { - pr_debug("bad rangesize %u.\n", mr->rangesize); - return -EINVAL; - } - return 0; -} - -static unsigned int -netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 new_ip, netmask; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_ipv4_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT || - par->hooknum == NF_INET_LOCAL_IN); - ct = nf_ct_get(skb, &ctinfo); - - netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) - new_ip = ip_hdr(skb)->daddr & ~netmask; - else - new_ip = ip_hdr(skb)->saddr & ~netmask; - new_ip |= mr->range[0].min_ip & netmask; - - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - new_ip, new_ip, - mr->range[0].min, mr->range[0].max }); - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); -} - -static struct xt_target netmap_tg_reg __read_mostly = { - .name = "NETMAP", - .family = NFPROTO_IPV4, - .target = netmap_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .checkentry = netmap_tg_check, - .me = THIS_MODULE -}; - -static int __init netmap_tg_init(void) -{ - return xt_register_target(&netmap_tg_reg); -} - -static void __exit netmap_tg_exit(void) -{ - xt_unregister_target(&netmap_tg_reg); -} - -module_init(netmap_tg_init); -module_exit(netmap_tg_exit); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c deleted file mode 100644 index 7c0103a5203e..000000000000 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ /dev/null @@ -1,110 +0,0 @@ -/* Redirect. Simple mapping which alters dst to a local IP address. */ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/types.h> -#include <linux/ip.h> -#include <linux/timer.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/netdevice.h> -#include <linux/if.h> -#include <linux/inetdevice.h> -#include <net/protocol.h> -#include <net/checksum.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter/x_tables.h> -#include <net/netfilter/nf_nat_rule.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); - -/* FIXME: Take multiple ranges --RR */ -static int redirect_tg_check(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { - pr_debug("bad MAP_IPS.\n"); - return -EINVAL; - } - if (mr->rangesize != 1) { - pr_debug("bad rangesize %u.\n", mr->rangesize); - return -EINVAL; - } - return 0; -} - -static unsigned int -redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_ipv4_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - /* Local packets: make them go to loopback */ - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = htonl(0x7F000001); - else { - struct in_device *indev; - struct in_ifaddr *ifa; - - newdst = 0; - - rcu_read_lock(); - indev = __in_dev_get_rcu(skb->dev); - if (indev && (ifa = indev->ifa_list)) - newdst = ifa->ifa_local; - rcu_read_unlock(); - - if (!newdst) - return NF_DROP; - } - - /* Transfer from original range. */ - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - newdst, newdst, - mr->range[0].min, mr->range[0].max }); - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} - -static struct xt_target redirect_tg_reg __read_mostly = { - .name = "REDIRECT", - .family = NFPROTO_IPV4, - .target = redirect_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .checkentry = redirect_tg_check, - .me = THIS_MODULE, -}; - -static int __init redirect_tg_init(void) -{ - return xt_register_target(&redirect_tg_reg); -} - -static void __exit redirect_tg_exit(void) -{ - xt_unregister_target(&redirect_tg_reg); -} - -module_init(redirect_tg_init); -module_exit(redirect_tg_exit); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 1109f7f6c254..b5ef3cba2250 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -396,8 +396,7 @@ static int __init ulog_tg_init(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, - THIS_MODULE, &cfg); + nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 31371be8174b..c30130062cd6 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return ipv4_is_local_multicast(iph->daddr) ^ invert; flow.flowi4_iif = 0; } else { - flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; + flow.flowi4_iif = LOOPBACK_IFINDEX; } flow.daddr = iph->saddr; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 851acec852d2..6b3da5cf54e9 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net) net->ipv4.iptable_filter = ipt_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_filter)) - return PTR_ERR(net->ipv4.iptable_filter); - return 0; + return PTR_RET(net->ipv4.iptable_filter); } static void __net_exit iptable_filter_net_exit(struct net *net) @@ -96,14 +94,10 @@ static int __init iptable_filter_init(void) filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); if (IS_ERR(filter_ops)) { ret = PTR_ERR(filter_ops); - goto cleanup_table; + unregister_pernet_subsys(&iptable_filter_net_ops); } return ret; - - cleanup_table: - unregister_pernet_subsys(&iptable_filter_net_ops); - return ret; } static void __exit iptable_filter_fini(void) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index aef5d1fbe77d..85d88f206447 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) net->ipv4.iptable_mangle = ipt_register_table(net, &packet_mangler, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_mangle)) - return PTR_ERR(net->ipv4.iptable_mangle); - return 0; + return PTR_RET(net->ipv4.iptable_mangle); } static void __net_exit iptable_mangle_net_exit(struct net *net) @@ -131,14 +129,10 @@ static int __init iptable_mangle_init(void) mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); if (IS_ERR(mangle_ops)) { ret = PTR_ERR(mangle_ops); - goto cleanup_table; + unregister_pernet_subsys(&iptable_mangle_net_ops); } return ret; - - cleanup_table: - unregister_pernet_subsys(&iptable_mangle_net_ops); - return ret; } static void __exit iptable_mangle_fini(void) diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/iptable_nat.c index 3828a4229822..9e0ffaf1d942 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -1,84 +1,71 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/types.h> -#include <linux/icmp.h> -#include <linux/gfp.h> -#include <linux/ip.h> + +#include <linux/module.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/ip.h> #include <net/ip.h> -#include <net/checksum.h> -#include <linux/spinlock.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_rule.h> -#include <net/netfilter/nf_nat_protocol.h> #include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_helper.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/netfilter/nf_nat_l3proto.h> + +static const struct xt_table nf_nat_ipv4_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV4, +}; -#ifdef CONFIG_XFRM -static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { - struct flowi4 *fl4 = &fl->u.ip4; - const struct nf_conn *ct; - const struct nf_conntrack_tuple *t; - enum ip_conntrack_info ctinfo; - enum ip_conntrack_dir dir; - unsigned long statusbit; - - ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) - return; - dir = CTINFO2DIR(ctinfo); - t = &ct->tuplehash[dir].tuple; - - if (dir == IP_CT_DIR_ORIGINAL) - statusbit = IPS_DST_NAT; - else - statusbit = IPS_SRC_NAT; - - if (ct->status & statusbit) { - fl4->daddr = t->dst.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_dport = t->dst.u.tcp.port; - } + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} - statusbit ^= IPS_NAT_MASK; +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; - if (ct->status & statusbit) { - fl4->saddr = t->src.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_sport = t->src.u.tcp.port; + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); } + return ret; } -#endif static unsigned int -nf_nat_fn(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +nf_nat_ipv4_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -87,14 +74,16 @@ nf_nat_fn(unsigned int hooknum, enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); /* We never see fragments: conntrack defrags on pre-routing - and local-out, and nf_nat_out protects post-routing. */ + * and local-out, and nf_nat_out protects post-routing. + */ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would - have dropped it. Hence it's the user's responsibilty to - packet filter it out, or implement conntrack/NAT for that - protocol. 8) --RR */ + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ if (!ct) return NF_ACCEPT; @@ -118,17 +107,17 @@ nf_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(ct, ctinfo, - hooknum, skb)) + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + hooknum)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: - /* Seen it before? This can happen for loopback, retrans, - or local packets.. */ + * or local packets. + */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; @@ -151,16 +140,16 @@ nf_nat_fn(unsigned int hooknum, } static unsigned int -nf_nat_in(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +nf_nat_ipv4_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -169,11 +158,11 @@ nf_nat_in(unsigned int hooknum, } static unsigned int -nf_nat_out(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +nf_nat_ipv4_out(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { #ifdef CONFIG_XFRM const struct nf_conn *ct; @@ -186,29 +175,30 @@ nf_nat_out(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && + !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if ((ct->tuplehash[dir].tuple.src.u3.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) || (ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all) - ) - return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET) < 0) + ret = NF_DROP; } #endif return ret; } static unsigned int -nf_nat_local_fn(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +nf_nat_ipv4_local_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { const struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -219,7 +209,7 @@ nf_nat_local_fn(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -230,21 +220,20 @@ nf_nat_local_fn(unsigned int hooknum, ret = NF_DROP; } #ifdef CONFIG_XFRM - else if (ct->tuplehash[dir].tuple.dst.u.all != + else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) - if (ip_xfrm_me_harder(skb)) + if (nf_xfrm_me_harder(skb, AF_INET) < 0) ret = NF_DROP; #endif } return ret; } -/* We must be after connection tracking and before packet filtering. */ - -static struct nf_hook_ops nf_nat_ops[] __read_mostly = { +static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { /* Before packet filtering, change destination */ { - .hook = nf_nat_in, + .hook = nf_nat_ipv4_in, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, @@ -252,7 +241,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { }, /* After packet filtering, change source */ { - .hook = nf_nat_out, + .hook = nf_nat_ipv4_out, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, @@ -260,7 +249,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { }, /* Before packet filtering, change destination */ { - .hook = nf_nat_local_fn, + .hook = nf_nat_ipv4_local_fn, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, @@ -268,7 +257,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { }, /* After packet filtering, change source */ { - .hook = nf_nat_fn, + .hook = nf_nat_ipv4_fn, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, @@ -276,51 +265,56 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { }, }; -static int __init nf_nat_standalone_init(void) +static int __net_init iptable_nat_net_init(struct net *net) { - int ret = 0; + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); + kfree(repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} - need_ipv4_conntrack(); +static void __net_exit iptable_nat_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.nat_table); +} -#ifdef CONFIG_XFRM - BUG_ON(ip_nat_decode_session != NULL); - RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session); -#endif - ret = nf_nat_rule_init(); - if (ret < 0) { - pr_err("nf_nat_init: can't setup rules.\n"); - goto cleanup_decode_session; - } - ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); - if (ret < 0) { - pr_err("nf_nat_init: can't register hooks.\n"); - goto cleanup_rule_init; - } - return ret; +static struct pernet_operations iptable_nat_net_ops = { + .init = iptable_nat_net_init, + .exit = iptable_nat_net_exit, +}; - cleanup_rule_init: - nf_nat_rule_cleanup(); - cleanup_decode_session: -#ifdef CONFIG_XFRM - RCU_INIT_POINTER(ip_nat_decode_session, NULL); - synchronize_net(); -#endif - return ret; +static int __init iptable_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&iptable_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&iptable_nat_net_ops); +err1: + return err; } -static void __exit nf_nat_standalone_fini(void) +static void __exit iptable_nat_exit(void) { - nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); - nf_nat_rule_cleanup(); -#ifdef CONFIG_XFRM - RCU_INIT_POINTER(ip_nat_decode_session, NULL); - synchronize_net(); -#endif - /* Conntrack caches are unregistered in nf_conntrack_cleanup */ + nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + unregister_pernet_subsys(&iptable_nat_net_ops); } -module_init(nf_nat_standalone_init); -module_exit(nf_nat_standalone_fini); +module_init(iptable_nat_init); +module_exit(iptable_nat_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat"); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 07fb710cd722..03d9696d3c6e 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net) net->ipv4.iptable_raw = ipt_register_table(net, &packet_raw, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_raw)) - return PTR_ERR(net->ipv4.iptable_raw); - return 0; + return PTR_RET(net->ipv4.iptable_raw); } static void __net_exit iptable_raw_net_exit(struct net *net) @@ -75,14 +73,10 @@ static int __init iptable_raw_init(void) rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); if (IS_ERR(rawtable_ops)) { ret = PTR_ERR(rawtable_ops); - goto cleanup_table; + unregister_pernet_subsys(&iptable_raw_net_ops); } return ret; - - cleanup_table: - unregister_pernet_subsys(&iptable_raw_net_ops); - return ret; } static void __exit iptable_raw_fini(void) diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index be45bdc4c602..b283d8e2601a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net) net->ipv4.iptable_security = ipt_register_table(net, &security_table, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_security)) - return PTR_ERR(net->ipv4.iptable_security); - - return 0; + return PTR_RET(net->ipv4.iptable_security); } static void __net_exit iptable_security_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index e7ff2dcab6ce..fcdd0c2406e6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -29,11 +29,6 @@ #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #include <net/netfilter/nf_log.h> -int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo); -EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); - static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -149,7 +144,8 @@ static unsigned int ipv4_confirm(unsigned int hooknum, typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); - if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { + if (!seq_adjust || + !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c6784a18c1c4..9c3db10b22d3 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -15,13 +15,12 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_h323.h> /****************************************************************************/ -static int set_addr(struct sk_buff *skb, +static int set_addr(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, unsigned int addroff, __be32 ip, __be16 port) { @@ -40,7 +39,7 @@ static int set_addr(struct sk_buff *skb, if (ip_hdr(skb)->protocol == IPPROTO_TCP) { if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, - addroff, sizeof(buf), + protoff, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); return -1; @@ -54,7 +53,7 @@ static int set_addr(struct sk_buff *skb, *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; } else { if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, - addroff, sizeof(buf), + protoff, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); return -1; @@ -69,22 +68,22 @@ static int set_addr(struct sk_buff *skb, } /****************************************************************************/ -static int set_h225_addr(struct sk_buff *skb, +static int set_h225_addr(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) { - return set_addr(skb, data, dataoff, taddr->ipAddress.ip, + return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip, addr->ip, port); } /****************************************************************************/ -static int set_h245_addr(struct sk_buff *skb, +static int set_h245_addr(struct sk_buff *skb, unsigned protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) { - return set_addr(skb, data, dataoff, + return set_addr(skb, protoff, data, dataoff, taddr->unicastAddress.iPAddress.network, addr->ip, port); } @@ -92,7 +91,7 @@ static int set_h245_addr(struct sk_buff *skb, /****************************************************************************/ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) { const struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -118,7 +117,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, &addr.ip, port, &ct->tuplehash[!dir].tuple.dst.u3.ip, info->sig_port[!dir]); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, info->sig_port[!dir]); @@ -129,7 +129,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, &addr.ip, port, &ct->tuplehash[!dir].tuple.src.u3.ip, info->sig_port[!dir]); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, info->sig_port[!dir]); @@ -143,7 +144,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) { int dir = CTINFO2DIR(ctinfo); @@ -159,7 +160,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, &addr.ip, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3.ip, ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(skb, protoff, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. dst.u.udp.port); @@ -172,7 +173,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, @@ -244,7 +245,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(skb, data, dataoff, taddr, + if (set_h245_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : nated_port)) == 0) { @@ -275,7 +276,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) { @@ -307,7 +308,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(skb, data, dataoff, taddr, + if (set_h245_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { nf_ct_unexpect_related(exp); @@ -326,7 +327,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) { @@ -363,7 +364,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(skb, data, dataoff, taddr, + if (set_h225_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -390,7 +391,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ nf_nat_follow_master(new, this); @@ -402,21 +403,23 @@ static void ip_nat_q931_expect(struct nf_conn *new, /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = this->saved_proto; - range.min_ip = range.max_ip = - new->master->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = + new->master->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } /****************************************************************************/ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, TransportAddress *taddr, int idx, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -453,7 +456,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(skb, data, 0, &taddr[idx], + if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -464,7 +467,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, if (idx > 0 && get_h225_addr(ct, *data, &taddr[0], &addr, &port) && (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(skb, data, 0, &taddr[0], + set_h225_addr(skb, protoff, data, 0, &taddr[0], &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); } @@ -487,26 +490,28 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_callforwarding_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(new->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = this->saved_proto; - range.min_ip = range.max_ip = this->saved_ip; + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = this->saved_addr; nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } /****************************************************************************/ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) @@ -515,7 +520,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, u_int16_t nated_port; /* Set expectations for NAT */ - exp->saved_ip = exp->tuple.dst.u3.ip; + exp->saved_addr = exp->tuple.dst.u3; exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->expectfn = ip_nat_callforwarding_expect; @@ -541,7 +546,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(skb, data, dataoff, taddr, + if (!set_h225_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c new file mode 100644 index 000000000000..d8b2e14efddc --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -0,0 +1,281 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/icmp.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <net/secure_seq.h> +#include <net/checksum.h> +#include <net/route.h> +#include <net/ip.h> + +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv4_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi4 *fl4 = &fl->u.ip4; + + if (ct->status & statusbit) { + fl4->daddr = t->dst.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl4->saddr = t->src.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_sport = t->src.u.all; + } +} +#endif /* CONFIG_XFRM */ + +static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && + ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); +} + +static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); +} + +static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph; + unsigned int hdroff; + + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) + return false; + + iph = (void *)skb->data + iphdroff; + hdroff = iphdroff + iph->ihl * 4; + + if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, + target, maniptype)) + return false; + iph = (void *)skb->data + iphdroff; + + if (maniptype == NF_NAT_MANIP_SRC) { + csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); + iph->saddr = target->src.u3.ip; + } else { + csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); + iph->daddr = target->dst.u3.ip; + } + return true; +} + +static void nf_nat_ipv4_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + __be32 oldip, newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = iph->saddr; + newip = t->src.u3.ip; + } else { + oldip = iph->daddr; + newip = t->dst.u3.ip; + } + inet_proto_csum_replace4(check, skb, oldip, newip, 1); +} + +static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt_flags & RTCF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + ip_hdrlen(skb); + skb->csum_offset = (void *)check - data; + *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V4_MINIP]) { + range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V4_MAXIP]) + range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); + else + range->max_addr.ip = range->min_addr.ip; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { + .l3proto = NFPROTO_IPV4, + .in_range = nf_nat_ipv4_in_range, + .secure_port = nf_nat_ipv4_secure_port, + .manip_pkt = nf_nat_ipv4_manip_pkt, + .csum_update = nf_nat_ipv4_csum_update, + .csum_recalc = nf_nat_ipv4_csum_recalc, + .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv4_decode_session, +#endif +}; + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + unsigned int hdrlen = ip_hdrlen(skb); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp.type == ICMP_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); + if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt may reallocate */ + inside = (void *)skb->data + hdrlen; + inside->icmp.checksum = 0; + inside->icmp.checksum = + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); + } + + /* Change outer to look like the reply to an incoming packet */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); + if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); + +static int __init nf_nat_l3proto_ipv4_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv4_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); + +module_init(nf_nat_l3proto_ipv4_init); +module_exit(nf_nat_l3proto_ipv4_exit); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 388140881ebe..a06d7d74817d 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -22,7 +22,6 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -47,7 +46,7 @@ static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_tuple t; const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; - struct nf_nat_ipv4_range range; + struct nf_nat_range range; ct_pptp_info = nfct_help_data(master); nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; @@ -89,21 +88,21 @@ static void pptp_nat_expected(struct nf_conn *ct, /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; if (exp->dir == IP_CT_DIR_ORIGINAL) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; + range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; if (exp->dir == IP_CT_DIR_REPLY) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; + range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } @@ -113,6 +112,7 @@ static int pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) @@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff *skb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, cid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_callid), (char *)&new_callid, @@ -216,6 +216,7 @@ static int pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) { @@ -268,7 +269,7 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, pcid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_pcid), (char *)&new_pcid, diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 46ba0b9ab985..ea44f02563b5 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -28,8 +28,7 @@ #include <linux/ip.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_rule.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l4proto.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> MODULE_LICENSE("GPL"); @@ -38,8 +37,9 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ static void -gre_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +gre_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -62,8 +62,8 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, min = 1; range_size = 0xffff; } else { - min = ntohs(range->min.gre.key); - range_size = ntohs(range->max.gre.key) - min + 1; + min = ntohs(range->min_proto.gre.key); + range_size = ntohs(range->max_proto.gre.key) - min + 1; } pr_debug("min = %u, range_size = %u\n", min, range_size); @@ -80,14 +80,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* manipulate a GRE packet according to maniptype */ static bool -gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, +gre_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { const struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ @@ -117,24 +117,24 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, return true; } -static const struct nf_nat_protocol gre = { - .protonum = IPPROTO_GRE, +static const struct nf_nat_l4proto gre = { + .l4proto = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = gre_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; static int __init nf_nat_proto_gre_init(void) { - return nf_nat_protocol_register(&gre); + return nf_nat_l4proto_register(NFPROTO_IPV4, &gre); } static void __exit nf_nat_proto_gre_fini(void) { - nf_nat_protocol_unregister(&gre); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre); } module_init(nf_nat_proto_gre_init); diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index b35172851bae..eb303471bcf6 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -15,8 +15,7 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_rule.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l4proto.h> static bool icmp_in_range(const struct nf_conntrack_tuple *tuple, @@ -29,8 +28,9 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, } static void -icmp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -38,13 +38,14 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, unsigned int range_size; unsigned int i; - range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; /* If no range specified... */ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; for (i = 0; ; ++id) { - tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + (id % range_size)); if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) return; @@ -54,13 +55,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, static bool icmp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct icmphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; @@ -72,12 +72,12 @@ icmp_manip_pkt(struct sk_buff *skb, return true; } -const struct nf_nat_protocol nf_nat_protocol_icmp = { - .protonum = IPPROTO_ICMP, +const struct nf_nat_l4proto nf_nat_l4proto_icmp = { + .l4proto = IPPROTO_ICMP, .manip_pkt = icmp_manip_pkt, .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c deleted file mode 100644 index d2a9dc314e0e..000000000000 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ /dev/null @@ -1,214 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Everything about the rules for NAT. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/types.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <linux/slab.h> -#include <net/checksum.h> -#include <net/route.h> -#include <linux/bitops.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_rule.h> - -#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ - (1 << NF_INET_POST_ROUTING) | \ - (1 << NF_INET_LOCAL_OUT) | \ - (1 << NF_INET_LOCAL_IN)) - -static const struct xt_table nat_table = { - .name = "nat", - .valid_hooks = NAT_VALID_HOOKS, - .me = THIS_MODULE, - .af = NFPROTO_IPV4, -}; - -/* Source NAT */ -static unsigned int -ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_IN); - - ct = nf_ct_get(skb, &ctinfo); - - /* Connection must be valid and new. */ - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); - NF_CT_ASSERT(par->out != NULL); - - return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC); -} - -static unsigned int -ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - - /* Connection must be valid and new. */ - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST); -} - -static int ipt_snat_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - pr_info("SNAT: multiple ranges no longer supported\n"); - return -EINVAL; - } - return 0; -} - -static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - pr_info("DNAT: multiple ranges no longer supported\n"); - return -EINVAL; - } - return 0; -} - -static unsigned int -alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) -{ - /* Force range to this IP; let proto decide mapping for - per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED). - */ - struct nf_nat_ipv4_range range; - - range.flags = 0; - pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, - HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); - - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); -} - -int nf_nat_rule_find(struct sk_buff *skb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - struct nf_conn *ct) -{ - struct net *net = nf_ct_net(ct); - int ret; - - ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); - - if (ret == NF_ACCEPT) { - if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) - /* NUL mapping */ - ret = alloc_null_binding(ct, hooknum); - } - return ret; -} - -static struct xt_target ipt_snat_reg __read_mostly = { - .name = "SNAT", - .target = ipt_snat_target, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), - .checkentry = ipt_snat_checkentry, - .family = AF_INET, -}; - -static struct xt_target ipt_dnat_reg __read_mostly = { - .name = "DNAT", - .target = ipt_dnat_target, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .checkentry = ipt_dnat_checkentry, - .family = AF_INET, -}; - -static int __net_init nf_nat_rule_net_init(struct net *net) -{ - struct ipt_replace *repl; - - repl = ipt_alloc_initial_table(&nat_table); - if (repl == NULL) - return -ENOMEM; - net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); - kfree(repl); - if (IS_ERR(net->ipv4.nat_table)) - return PTR_ERR(net->ipv4.nat_table); - return 0; -} - -static void __net_exit nf_nat_rule_net_exit(struct net *net) -{ - ipt_unregister_table(net, net->ipv4.nat_table); -} - -static struct pernet_operations nf_nat_rule_net_ops = { - .init = nf_nat_rule_net_init, - .exit = nf_nat_rule_net_exit, -}; - -int __init nf_nat_rule_init(void) -{ - int ret; - - ret = register_pernet_subsys(&nf_nat_rule_net_ops); - if (ret != 0) - goto out; - ret = xt_register_target(&ipt_snat_reg); - if (ret != 0) - goto unregister_table; - - ret = xt_register_target(&ipt_dnat_reg); - if (ret != 0) - goto unregister_snat; - - return ret; - - unregister_snat: - xt_unregister_target(&ipt_snat_reg); - unregister_table: - unregister_pernet_subsys(&nf_nat_rule_net_ops); - out: - return ret; -} - -void nf_nat_rule_cleanup(void) -{ - xt_unregister_target(&ipt_dnat_reg); - xt_unregister_target(&ipt_snat_reg); - unregister_pernet_subsys(&nf_nat_rule_net_ops); -} diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 6232d476f37e..8f3d05424a3e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -185,10 +185,10 @@ exit: return sk; } -static void inet_get_ping_group_range_net(struct net *net, gid_t *low, - gid_t *high) +static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, + kgid_t *high) { - gid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.sysctl_ping_group_range; unsigned int seq; do { @@ -203,19 +203,13 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, static int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - gid_t group = current_egid(); - gid_t range[2]; + kgid_t group = current_egid(); struct group_info *group_info = get_current_groups(); int i, j, count = group_info->ngroups; kgid_t low, high; - inet_get_ping_group_range_net(net, range, range+1); - low = make_kgid(&init_user_ns, range[0]); - high = make_kgid(&init_user_ns, range[1]); - if (!gid_valid(low) || !gid_valid(high) || gid_lt(high, low)) - return -EACCES; - - if (range[0] <= group && group <= range[1]) + inet_get_ping_group_range_net(net, &low, &high); + if (gid_lte(low, group) && gid_lte(group, high)) return 0; for (i = 0; i < group_info->nblocks; i++) { @@ -845,7 +839,9 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 957acd12250b..8de53e1ddd54 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), + SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), + SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), + SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), + SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071969ea..73d1e4df4bf6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ @@ -992,7 +994,9 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) i, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f7bb7185c50..ff622069fcef 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -70,7 +70,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/bootmem.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -80,7 +79,6 @@ #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/init.h> -#include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> @@ -88,11 +86,9 @@ #include <linux/mroute.h> #include <linux/netfilter_ipv4.h> #include <linux/random.h> -#include <linux/jhash.h> #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> -#include <linux/prefetch.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -147,6 +143,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void ipv4_dst_destroy(struct dst_entry *dst); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -170,6 +167,7 @@ static struct dst_ops ipv4_dst_ops = { .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, + .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, @@ -204,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline int rt_genid(struct net *net) -{ - return atomic_read(&net->ipv4.rt_genid); -} - #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { @@ -449,27 +442,9 @@ static inline bool rt_is_expired(const struct rtable *rth) return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perturbation of rt_genid by a small quantity [1..256] - * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() - * many times (2^24) without giving recent rt_genid. - * Jenkins hash is strong enough that litle changes of rt_genid are OK. - */ -static void rt_cache_invalidate(struct net *net) -{ - unsigned char shuffle; - - get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &net->ipv4.rt_genid); -} - -/* - * delay < 0 : invalidate cache (fast : entries will be deleted later) - * delay >= 0 : invalidate & flush cache (can be long) - */ -void rt_cache_flush(struct net *net, int delay) +void rt_cache_flush(struct net *net) { - rt_cache_invalidate(net); + rt_genid_bump(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -587,11 +562,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, build_sk_flow_key(fl4, sk); } -static DEFINE_SEQLOCK(fnhe_seqlock); +static inline void rt_free(struct rtable *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + +static DEFINE_SPINLOCK(fnhe_lock); static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; + struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -599,6 +580,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } + orig = rcu_dereference(oldest->fnhe_rth); + if (orig) { + RCU_INIT_POINTER(oldest->fnhe_rth, NULL); + rt_free(orig); + } return oldest; } @@ -620,7 +606,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, int depth; u32 hval = fnhe_hashfun(daddr); - write_seqlock_bh(&fnhe_seqlock); + spin_lock_bh(&fnhe_lock); hash = nh->nh_exceptions; if (!hash) { @@ -667,7 +653,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_stamp = jiffies; out_unlock: - write_sequnlock_bh(&fnhe_seqlock); + spin_unlock_bh(&fnhe_lock); return; } @@ -925,12 +911,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } + rcu_read_unlock(); return mtu; } @@ -947,7 +935,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, dst->obsolete = DST_OBSOLETE_KILL; } else { rt->rt_pmtu = mtu; - dst_set_expires(&rt->dst, ip_rt_mtu_expires); + rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); } } @@ -1123,10 +1111,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) const struct rtable *rt = (const struct rtable *) dst; unsigned int mtu = rt->rt_pmtu; - if (mtu && time_after_eq(jiffies, rt->dst.expires)) - mtu = 0; - - if (!mtu) + if (!mtu || time_after_eq(jiffies, rt->dst.expires)) mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu && rt_is_output_route(rt)) @@ -1164,61 +1149,118 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) return NULL; } -static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, +static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr) { - __be32 fnhe_daddr, gw; - unsigned long expires; - unsigned int seq; - u32 pmtu; - -restart: - seq = read_seqbegin(&fnhe_seqlock); - fnhe_daddr = fnhe->fnhe_daddr; - gw = fnhe->fnhe_gw; - pmtu = fnhe->fnhe_pmtu; - expires = fnhe->fnhe_expires; - if (read_seqretry(&fnhe_seqlock, seq)) - goto restart; - - if (daddr != fnhe_daddr) - return; + bool ret = false; + + spin_lock_bh(&fnhe_lock); - if (pmtu) { - unsigned long diff = expires - jiffies; + if (daddr == fnhe->fnhe_daddr) { + struct rtable *orig; - if (time_before(jiffies, expires)) { - rt->rt_pmtu = pmtu; - dst_set_expires(&rt->dst, diff); + if (fnhe->fnhe_pmtu) { + unsigned long expires = fnhe->fnhe_expires; + unsigned long diff = expires - jiffies; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = fnhe->fnhe_pmtu; + dst_set_expires(&rt->dst, diff); + } } + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; + } + + orig = rcu_dereference(fnhe->fnhe_rth); + rcu_assign_pointer(fnhe->fnhe_rth, rt); + if (orig) + rt_free(orig); + + fnhe->fnhe_stamp = jiffies; + ret = true; + } else { + /* Routes we intend to cache in nexthop exception have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; } - if (gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = gw; - } - fnhe->fnhe_stamp = jiffies; -} + spin_unlock_bh(&fnhe_lock); -static inline void rt_release_rcu(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_release(dst); + return ret; } -static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) { - struct rtable *orig, *prev, **p = &nh->nh_rth_output; - - if (rt_is_input_route(rt)) - p = &nh->nh_rth_input; + struct rtable *orig, *prev, **p; + bool ret = true; + if (rt_is_input_route(rt)) { + p = (struct rtable **)&nh->nh_rth_input; + } else { + if (!nh->nh_pcpu_rth_output) + goto nocache; + p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); + } orig = *p; prev = cmpxchg(p, orig, rt); if (prev == orig) { - dst_clone(&rt->dst); if (orig) - call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); + rt_free(orig); + } else { + /* Routes we intend to cache in the FIB nexthop have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ +nocache: + rt->dst.flags |= DST_NOCACHE; + ret = false; + } + + return ret; +} + +static DEFINE_SPINLOCK(rt_uncached_lock); +static LIST_HEAD(rt_uncached_list); + +static void rt_add_uncached_list(struct rtable *rt) +{ + spin_lock_bh(&rt_uncached_lock); + list_add_tail(&rt->rt_uncached, &rt_uncached_list); + spin_unlock_bh(&rt_uncached_lock); +} + +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct rtable *rt = (struct rtable *) dst; + + if (!list_empty(&rt->rt_uncached)) { + spin_lock_bh(&rt_uncached_lock); + list_del(&rt->rt_uncached); + spin_unlock_bh(&rt_uncached_lock); + } +} + +void rt_flush_dev(struct net_device *dev) +{ + if (!list_empty(&rt_uncached_list)) { + struct net *net = dev_net(dev); + struct rtable *rt; + + spin_lock_bh(&rt_uncached_lock); + list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + if (rt->dst.dev != dev) + continue; + rt->dst.dev = net->loopback_dev; + dev_hold(rt->dst.dev); + dev_put(dev); + } + spin_unlock_bh(&rt_uncached_lock); } } @@ -1234,20 +1276,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag) { + bool cached = false; + if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) rt->rt_gateway = nh->nh_gw; - if (unlikely(fnhe)) - rt_bind_exception(rt, fnhe, daddr); dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_HOST)) - rt_cache_route(nh, rt); + if (unlikely(fnhe)) + cached = rt_bind_exception(rt, fnhe, daddr); + else if (!(rt->dst.flags & DST_NOCACHE)) + cached = rt_cache_route(nh, rt); } + if (unlikely(!cached)) + rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1261,7 +1307,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm, bool will_cache) { return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : DST_HOST) | DST_NOCACHE | + (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1314,6 +1360,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1366,8 +1413,7 @@ static void ip_handle_martian_source(struct net_device *dev, static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos, - struct rtable **result) + __be32 daddr, __be32 saddr, u32 tos) { struct rtable *rth; int err; @@ -1416,9 +1462,9 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = false; if (res->fi) { if (!itag) { - rth = FIB_RES_NH(*res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); + skb_dst_set_noref(skb, &rth->dst); goto out; } do_cache = true; @@ -1440,13 +1486,14 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + skb_dst_set(skb, &rth->dst); out: - *result = rth; err = 0; cleanup: return err; @@ -1458,21 +1505,13 @@ static int ip_mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { - struct rtable *rth = NULL; - int err; - #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) fib_select_multipath(res); #endif /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); - if (err) - return err; - - skb_dst_set(skb, &rth->dst); - return 0; + return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); } /* @@ -1524,11 +1563,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_zeronet(daddr)) goto martian_destination; - if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) { - if (ipv4_is_loopback(daddr)) + /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), + * and call it once if daddr or/and saddr are loopback addresses + */ + if (ipv4_is_loopback(daddr)) { + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) goto martian_destination; - - if (ipv4_is_loopback(saddr)) + } else if (ipv4_is_loopback(saddr)) { + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) goto martian_source; } @@ -1553,7 +1595,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, - net->loopback_dev->ifindex, + LOOPBACK_IFINDEX, dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; @@ -1586,10 +1628,11 @@ local_input: do_cache = false; if (res.fi) { if (!itag) { - rth = FIB_RES_NH(res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - goto set_and_out; + skb_dst_set_noref(skb, &rth->dst); + err = 0; + goto out; } do_cache = true; } @@ -1613,6 +1656,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -1620,7 +1664,6 @@ local_input: } if (do_cache) rt_cache_route(&FIB_RES_NH(res), rth); -set_and_out: skb_dst_set(skb, &rth->dst); err = 0; goto out; @@ -1658,8 +1701,8 @@ martian_source_keep_err: goto out; } -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) +int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev) { int res; @@ -1702,7 +1745,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_unlock(); return res; } -EXPORT_SYMBOL(ip_route_input); +EXPORT_SYMBOL(ip_route_input_noref); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, @@ -1752,19 +1795,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; if (fi) { + struct rtable __rcu **prth; + fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); - if (!fnhe) { - rth = FIB_RES_NH(*res).nh_rth_output; - if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - return rth; - } + if (fnhe) + prth = &fnhe->fnhe_rth; + else + prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + rth = rcu_dereference(*prth); + if (rt_cache_valid(rth)) { + dst_hold(&rth->dst); + return rth; } } rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi && !fnhe); + fi); if (!rth) return ERR_PTR(-ENOBUFS); @@ -1777,6 +1824,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -1823,7 +1871,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) orig_oif = fl4->flowi4_oif; - fl4->flowi4_iif = net->loopback_dev->ifindex; + fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); @@ -1912,7 +1960,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!fl4->daddr) fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl4->flowi4_oif = net->loopback_dev->ifindex; + fl4->flowi4_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -1959,7 +2007,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) } dev_out = net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; - res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } @@ -2056,6 +2103,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + INIT_LIST_HEAD(&rt->rt_uncached); + dst_free(new); } @@ -2082,7 +2131,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, EXPORT_SYMBOL_GPL(ip_route_output_flow); static int rt_fill_info(struct net *net, __be32 dst, __be32 src, - struct flowi4 *fl4, struct sk_buff *skb, u32 pid, + struct flowi4 *fl4, struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb_rtable(skb); @@ -2092,7 +2141,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2252,12 +2301,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void rt->rt_flags |= RTCF_NOTIFY; err = rt_fill_info(net, dst, src, &fl4, skb, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; @@ -2273,7 +2322,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(dev_net(in_dev->dev), 0); + rt_cache_flush(dev_net(in_dev->dev)); } #ifdef CONFIG_SYSCTL @@ -2282,16 +2331,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, size_t *lenp, loff_t *ppos) { if (write) { - int flush_delay; - ctl_table ctl; - struct net *net; - - memcpy(&ctl, __ctl, sizeof(ctl)); - ctl.data = &flush_delay; - proc_dointvec(&ctl, write, buffer, lenp, ppos); - - net = (struct net *)__ctl->extra1; - rt_cache_flush(net, flush_delay); + rt_cache_flush((struct net *)__ctl->extra1); return 0; } @@ -2461,8 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - get_random_bytes(&net->ipv4.rt_genid, - sizeof(net->ipv4.rt_genid)); + atomic_set(&net->rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650e1528e1e6..ba48e799b031 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; + treq->listener = NULL; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c3255721..9205e492dc9d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -76,9 +76,9 @@ static int ipv4_local_port_range(ctl_table *table, int write, } -static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) +static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { - gid_t *data = table->data; + kgid_t *data = table->data; unsigned int seq; do { seq = read_seqbegin(&sysctl_local_ports.lock); @@ -89,12 +89,12 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, } /* Update system visible IP port range */ -static void set_ping_group_range(struct ctl_table *table, gid_t range[2]) +static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) { - gid_t *data = table->data; + kgid_t *data = table->data; write_seqlock(&sysctl_local_ports.lock); - data[0] = range[0]; - data[1] = range[1]; + data[0] = low; + data[1] = high; write_sequnlock(&sysctl_local_ports.lock); } @@ -103,21 +103,33 @@ static int ipv4_ping_group_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct user_namespace *user_ns = current_user_ns(); int ret; - gid_t range[2]; + gid_t urange[2]; + kgid_t low, high; ctl_table tmp = { - .data = &range, - .maxlen = sizeof(range), + .data = &urange, + .maxlen = sizeof(urange), .mode = table->mode, .extra1 = &ip_ping_group_range_min, .extra2 = &ip_ping_group_range_max, }; - inet_get_ping_group_range_table(table, range, range + 1); + inet_get_ping_group_range_table(table, &low, &high); + urange[0] = from_kgid_munged(user_ns, low); + urange[1] = from_kgid_munged(user_ns, high); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); - if (write && ret == 0) - set_ping_group_range(table, range); + if (write && ret == 0) { + low = make_kgid(user_ns, urange[0]); + high = make_kgid(user_ns, urange[1]); + if (!gid_valid(low) || !gid_valid(high) || + (urange[1] < urange[0]) || gid_lt(high, low)) { + low = make_kgid(&init_user_ns, 1); + high = make_kgid(&init_user_ns, 0); + } + set_ping_group_range(table, low, high); + } return ret; } @@ -184,7 +196,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, int ret; unsigned long vec[3]; struct net *net = current->nsproxy->net_ns; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -203,7 +215,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, if (ret) return ret; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM rcu_read_lock(); memcg = mem_cgroup_from_task(current); @@ -220,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } +int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct tcp_fastopen_context *ctxt; + int ret; + u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ + + tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); + if (!tbl.data) + return -ENOMEM; + + rcu_read_lock(); + ctxt = rcu_dereference(tcp_fastopen_ctx); + if (ctxt) + memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + rcu_read_unlock(); + + snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", + user_key[0], user_key[1], user_key[2], user_key[3]); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (write && ret == 0) { + if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, + user_key + 2, user_key + 3) != 4) { + ret = -EINVAL; + goto bad_key; + } + tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); + } + +bad_key: + pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", + user_key[0], user_key[1], user_key[2], user_key[3], + (char *)tbl.data, ret); + kfree(tbl.data); + return ret; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -374,6 +425,12 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "tcp_fastopen_key", + .mode = 0600, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, + { .procname = "tcp_tw_recycle", .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), @@ -784,16 +841,9 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .procname = "rt_cache_rebuild_count", - .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "ping_group_range", .data = &init_net.ipv4.sysctl_ping_group_range, - .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), + .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, }, @@ -829,8 +879,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[5].data = &net->ipv4.sysctl_icmp_ratemask; table[6].data = - &net->ipv4.sysctl_rt_cache_rebuild_count; - table[7].data = &net->ipv4.sysctl_ping_group_range; } @@ -839,10 +887,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) * Sane defaults - nobody may create ping sockets. * Boot scripts should set this to distro-specific group. */ - net->ipv4.sysctl_ping_group_range[0] = 1; - net->ipv4.sysctl_ping_group_range[1] = 0; - - net->ipv4.sysctl_rt_cache_rebuild_count = 4; + net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); tcp_init_mem(net); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 581ecf02c6b5..f32c02e2a543 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLIN | POLLRDNORM | POLLRDHUP; - /* Connected? */ - if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { + /* Connected or passive Fast Open socket? */ + if (sk->sk_state != TCP_SYN_SENT && + (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && @@ -811,7 +812,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, old_size_goal + mss_now > xmit_size_goal)) { xmit_size_goal = old_size_goal; } else { - tp->xmit_size_goal_segs = xmit_size_goal / mss_now; + tp->xmit_size_goal_segs = + min_t(u16, xmit_size_goal / mss_now, + sk->sk_gso_max_segs); xmit_size_goal = tp->xmit_size_goal_segs * mss_now; } } @@ -838,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) + /* Wait for a connection to finish. One exception is TCP Fast Open + * (passive side) where data is allowed to be sent before a connection + * is fully established. + */ + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; + } clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -1040,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) + /* Wait for a connection to finish. One exception is TCP Fast Open + * (passive side) where data is allowed to be sent before a connection + * is fully established. + */ + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto do_error; + } if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { @@ -1137,78 +1150,43 @@ new_segment: if (err) goto do_fault; } else { - bool merge = false; + bool merge = true; int i = skb_shinfo(skb)->nr_frags; - struct page *page = sk->sk_sndmsg_page; - int off; - - if (page && page_count(page) == 1) - sk->sk_sndmsg_off = 0; - - off = sk->sk_sndmsg_off; - - if (skb_can_coalesce(skb, i, page, off) && - off != PAGE_SIZE) { - /* We can extend the last page - * fragment. */ - merge = true; - } else if (i == MAX_SKB_FRAGS || !sg) { - /* Need to add new fragment and cannot - * do this because interface is non-SG, - * or because all the page slots are - * busy. */ - tcp_mark_push(tp, skb); - goto new_segment; - } else if (page) { - if (off == PAGE_SIZE) { - put_page(page); - sk->sk_sndmsg_page = page = NULL; - off = 0; + struct page_frag *pfrag = sk_page_frag(sk); + + if (!sk_page_frag_refill(sk, pfrag)) + goto wait_for_memory; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + if (i == MAX_SKB_FRAGS || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; } - } else - off = 0; + merge = false; + } - if (copy > PAGE_SIZE - off) - copy = PAGE_SIZE - off; + copy = min_t(int, copy, pfrag->size - pfrag->offset); if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; - if (!page) { - /* Allocate new cache page. */ - if (!(page = sk_stream_alloc_page(sk))) - goto wait_for_memory; - } - - /* Time to copy data. We are close to - * the end! */ err = skb_copy_to_page_nocache(sk, from, skb, - page, off, copy); - if (err) { - /* If this page was new, give it to the - * socket so it does not get leaked. - */ - if (!sk->sk_sndmsg_page) { - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; - } + pfrag->page, + pfrag->offset, + copy); + if (err) goto do_error; - } /* Update the skb. */ if (merge) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { - skb_fill_page_desc(skb, i, page, off, copy); - if (sk->sk_sndmsg_page) { - get_page(page); - } else if (off + copy < PAGE_SIZE) { - get_page(page); - sk->sk_sndmsg_page = page; - } + skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, copy); + get_page(pfrag->page); } - - sk->sk_sndmsg_off = off + copy; + pfrag->offset += copy; } if (!copied) @@ -1760,8 +1738,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if (tp->ucopy.dma_chan) { + if (tp->rcv_wnd == 0 && + !skb_queue_empty(&sk->sk_async_wait_queue)) { + tcp_service_net_dma(sk, true); + tcp_cleanup_rbuf(sk, copied); + } else + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + } #endif if (copied >= target) { /* Do not sleep, just process backlog. */ @@ -2142,6 +2126,10 @@ void tcp_close(struct sock *sk, long timeout) * they look as CLOSING or LAST_ACK for Linux) * Probably, I missed some more holelets. * --ANK + * XXX (TFO) - To start off we don't support SYN+ACK+FIN + * in a single packet! (May consider it later but will + * probably need API support or TCP_CORK SYN-ACK until + * data is written and socket is closed.) */ tcp_send_fin(sk); } @@ -2213,8 +2201,16 @@ adjudge_to_death: } } - if (sk->sk_state == TCP_CLOSE) + if (sk->sk_state == TCP_CLOSE) { + struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + /* We could get here with a non-NULL req if the socket is + * aborted (e.g., closed with unread data) before 3WHS + * finishes. + */ + if (req != NULL) + reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); + } /* Otherwise, socket is reprieved until protocol close. */ out: @@ -2300,6 +2296,13 @@ int tcp_disconnect(struct sock *sk, int flags) } EXPORT_SYMBOL(tcp_disconnect); +void tcp_sock_destruct(struct sock *sk) +{ + inet_sock_destruct(sk); + + kfree(inet_csk(sk)->icsk_accept_queue.fastopenq); +} + static inline bool tcp_can_repair_sock(const struct sock *sk) { return capable(CAP_NET_ADMIN) && @@ -2323,10 +2326,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp, tp->rx_opt.mss_clamp = opt.opt_val; break; case TCPOPT_WINDOW: - if (opt.opt_val > 14) - return -EFBIG; + { + u16 snd_wscale = opt.opt_val & 0xFFFF; + u16 rcv_wscale = opt.opt_val >> 16; + + if (snd_wscale > 14 || rcv_wscale > 14) + return -EFBIG; - tp->rx_opt.snd_wscale = opt.opt_val; + tp->rx_opt.snd_wscale = snd_wscale; + tp->rx_opt.rcv_wscale = rcv_wscale; + tp->rx_opt.wscale_ok = 1; + } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) @@ -2681,7 +2691,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Cap the max timeout in ms TCP will retry/retrans * before giving up and aborting (ETIMEDOUT) a connection. */ - icsk->icsk_user_timeout = msecs_to_jiffies(val); + if (val < 0) + err = -EINVAL; + else + icsk->icsk_user_timeout = msecs_to_jiffies(val); + break; + + case TCP_FASTOPEN: + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | + TCPF_LISTEN))) + err = fastopen_init_queue(sk, val); + else + err = -EINVAL; break; default: err = -ENOPROTOOPT; @@ -3496,11 +3517,15 @@ EXPORT_SYMBOL(tcp_cookie_generator); void tcp_done(struct sock *sk) { + struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); + if (req != NULL) + reqsk_fastopen_remove(sk, req, false); sk->sk_shutdown = SHUTDOWN_MASK; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4d4db16e336e..1432cdb0644c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left * tp->mss_cache < sk->sk_gso_max_size) + left * tp->mss_cache < sk->sk_gso_max_size && + left < sk->sk_gso_max_segs) return true; return left <= tcp_max_tso_deferred_mss(tp); } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index a7f729c409d7..8f7ef0ad80e5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -1,10 +1,91 @@ +#include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/list.h> +#include <linux/tcp.h> +#include <linux/rcupdate.h> +#include <linux/rculist.h> +#include <net/inetpeer.h> +#include <net/tcp.h> -int sysctl_tcp_fastopen; +int sysctl_tcp_fastopen __read_mostly; + +struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; + +static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); + +static void tcp_fastopen_ctx_free(struct rcu_head *head) +{ + struct tcp_fastopen_context *ctx = + container_of(head, struct tcp_fastopen_context, rcu); + crypto_free_cipher(ctx->tfm); + kfree(ctx); +} + +int tcp_fastopen_reset_cipher(void *key, unsigned int len) +{ + int err; + struct tcp_fastopen_context *ctx, *octx; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->tfm = crypto_alloc_cipher("aes", 0, 0); + + if (IS_ERR(ctx->tfm)) { + err = PTR_ERR(ctx->tfm); +error: kfree(ctx); + pr_err("TCP: TFO aes cipher alloc error: %d\n", err); + return err; + } + err = crypto_cipher_setkey(ctx->tfm, key, len); + if (err) { + pr_err("TCP: TFO cipher key error: %d\n", err); + crypto_free_cipher(ctx->tfm); + goto error; + } + memcpy(ctx->key, key, len); + + spin_lock(&tcp_fastopen_ctx_lock); + + octx = rcu_dereference_protected(tcp_fastopen_ctx, + lockdep_is_held(&tcp_fastopen_ctx_lock)); + rcu_assign_pointer(tcp_fastopen_ctx, ctx); + spin_unlock(&tcp_fastopen_ctx_lock); + + if (octx) + call_rcu(&octx->rcu, tcp_fastopen_ctx_free); + return err; +} + +/* Computes the fastopen cookie for the peer. + * The peer address is a 128 bits long (pad with zeros for IPv4). + * + * The caller must check foc->len to determine if a valid cookie + * has been generated successfully. +*/ +void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) +{ + __be32 peer_addr[4] = { addr, 0, 0, 0 }; + struct tcp_fastopen_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(tcp_fastopen_ctx); + if (ctx) { + crypto_cipher_encrypt_one(ctx->tfm, + foc->val, + (__u8 *)peer_addr); + foc->len = TCP_FASTOPEN_COOKIE_SIZE; + } + rcu_read_unlock(); +} static int __init tcp_fastopen_init(void) { + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + + get_random_bytes(key, sizeof(key)); + tcp_fastopen_reset_cipher(key, sizeof(key)); return 0; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e07a64ca44e..432c36649db3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s tcp_enter_quickack_mode((struct sock *)tp); break; case INET_ECN_CE: - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { + /* Better not delay acks, sender can have a very low cwnd */ + tcp_enter_quickack_mode((struct sock *)tp); + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + } /* fallinto */ default: tp->ecn_flags |= TCP_ECN_SEEN; @@ -374,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) /* 4. Try to fixup all. It is made immediately after connection enters * established state. */ -static void tcp_init_buffer_space(struct sock *sk) +void tcp_init_buffer_space(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -739,29 +743,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -/* Set slow start threshold and cwnd not falling to slow start */ -void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) -{ - struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - - tp->prior_ssthresh = 0; - tp->bytes_acked = 0; - if (icsk->icsk_ca_state < TCP_CA_CWR) { - tp->undo_marker = 0; - if (set_ssthresh) - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + 1U); - tp->snd_cwnd_cnt = 0; - tp->high_seq = tp->snd_nxt; - tp->snd_cwnd_stamp = tcp_time_stamp; - TCP_ECN_queue_cwr(tp); - - tcp_set_ca_state(sk, TCP_CA_CWR); - } -} - /* * Packet counting of FACK is based on in-order assumptions, therefore TCP * disables it when reordering is detected @@ -2489,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) tp->snd_cwnd_stamp = tcp_time_stamp; } -/* Lower bound on congestion window is slow start threshold - * unless congestion avoidance choice decides to overide it. - */ -static inline u32 tcp_cwnd_min(const struct sock *sk) -{ - const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - - return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh; -} - -/* Decrease cwnd each second ack. */ -static void tcp_cwnd_down(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - int decr = tp->snd_cwnd_cnt + 1; - - if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) || - (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) { - tp->snd_cwnd_cnt = decr & 1; - decr >>= 1; - - if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) - tp->snd_cwnd -= decr; - - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - tp->snd_cwnd_stamp = tcp_time_stamp; - } -} - /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ @@ -2719,24 +2671,80 @@ static bool tcp_try_undo_loss(struct sock *sk) return false; } -static inline void tcp_complete_cwr(struct sock *sk) +/* The cwnd reduction in CWR and Recovery use the PRR algorithm + * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->high_seq = tp->snd_nxt; + tp->bytes_acked = 0; + tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; + if (set_ssthresh) + tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + TCP_ECN_queue_cwr(tp); +} + +static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, + int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); - /* Do not moderate cwnd if it's already undone in cwr or recovery. */ - if (tp->undo_marker) { - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_stamp = tcp_time_stamp; - } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { - /* PRR algorithm. */ - tp->snd_cwnd = tp->snd_ssthresh; - tp->snd_cwnd_stamp = tcp_time_stamp; - } + tp->prr_delivered += newly_acked_sacked; + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + +static inline void tcp_end_cwnd_reduction(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || + (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + tp->snd_cwnd = tp->snd_ssthresh; + tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } +/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ +void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->prior_ssthresh = 0; + tp->bytes_acked = 0; + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + tp->undo_marker = 0; + tcp_init_cwnd_reduction(sk, set_ssthresh); + tcp_set_ca_state(sk, TCP_CA_CWR); + } +} + static void tcp_try_keep_open(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2751,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag) +static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2768,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) tcp_moderate_cwnd(tp); } else { - tcp_cwnd_down(sk, flag); + tcp_cwnd_reduction(sk, newly_acked_sacked, 0); } } @@ -2850,38 +2858,6 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); -/* This function implements the PRR algorithm, specifcally the PRR-SSRB - * (proportional rate reduction with slow start reduction bound) as described in - * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. - * It computes the number of packets to send (sndcnt) based on packets newly - * delivered: - * 1) If the packets in flight is larger than ssthresh, PRR spreads the - * cwnd reductions across a full RTT. - * 2) If packets in flight is lower than ssthresh (such as due to excess - * losses and/or application stalls), do not perform any further cwnd - * reductions, but instead slow start up to ssthresh. - */ -static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, - int fast_rexmit, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - int sndcnt = 0; - int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); - - if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { - u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + - tp->prior_cwnd - 1; - sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else { - sndcnt = min_t(int, delta, - max_t(int, tp->prr_delivered - tp->prr_out, - newly_acked_sacked) + 1); - } - - sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); - tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; -} - static void tcp_enter_recovery(struct sock *sk, bool ece_ack) { struct tcp_sock *tp = tcp_sk(sk); @@ -2894,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) NET_INC_STATS_BH(sock_net(sk), mib_idx); - tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; tp->undo_retrans = tp->retrans_out; @@ -2902,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); - TCP_ECN_queue_cwr(tp); + tcp_init_cwnd_reduction(sk, true); } - - tp->bytes_acked = 0; - tp->snd_cwnd_cnt = 0; - tp->prior_cwnd = tp->snd_cwnd; - tp->prr_delivered = 0; - tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); } @@ -2926,13 +2894,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, bool is_dupack, + int prior_sacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); + int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2969,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ if (tp->snd_una != tp->high_seq) { - tcp_complete_cwr(sk); + tcp_end_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -2979,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk)) return; - tcp_complete_cwr(sk); + tcp_end_cwnd_reduction(sk); break; } } @@ -2992,6 +2961,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_add_reno_sack(sk); } else do_lost = tcp_try_undo_partial(sk, pkts_acked); + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: if (flag & FLAG_DATA_ACKED) @@ -3013,12 +2983,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag); + tcp_try_to_open(sk, flag, newly_acked_sacked); return; } @@ -3040,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tp->prr_delivered += newly_acked_sacked; - tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); + tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); } @@ -3120,6 +3090,12 @@ void tcp_rearm_rto(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + /* If the retrans timer is currently being used by Fast Open + * for SYN-ACK retrans purpose, stay put. + */ + if (tp->fastopen_rsk) + return; + if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { @@ -3381,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { const struct tcp_sock *tp = tcp_sk(sk); return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && - !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); + !tcp_in_cwnd_reduction(sk); } /* Check that window update is acceptable. @@ -3449,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) } /* A conservative spurious RTO response algorithm: reduce cwnd using - * rate halving and continue in congestion avoidance. + * PRR and continue in congestion avoidance. */ -static void tcp_ratehalving_spur_to_response(struct sock *sk) +static void tcp_cwr_spur_to_response(struct sock *sk) { tcp_enter_cwr(sk, 0); } @@ -3459,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk) static void tcp_undo_spur_to_response(struct sock *sk, int flag) { if (flag & FLAG_ECE) - tcp_ratehalving_spur_to_response(sk); + tcp_cwr_spur_to_response(sk); else tcp_undo_cwr(sk, true); } @@ -3566,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag) tcp_conservative_spur_to_response(tp); break; default: - tcp_ratehalving_spur_to_response(sk); + tcp_cwr_spur_to_response(sk); break; } tp->frto_counter = 0; @@ -3590,7 +3566,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - int newly_acked_sacked = 0; bool frto_cwnd = false; /* If the ack is older than previous acks @@ -3666,8 +3641,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); pkts_acked = prior_packets - tp->packets_out; - newly_acked_sacked = (prior_packets - prior_sacked) - - (tp->packets_out - tp->sacked_out); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -3681,7 +3654,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) @@ -3698,7 +3671,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3718,8 +3691,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - newly_acked_sacked = tp->sacked_out - prior_sacked; - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } @@ -4035,7 +4007,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) } /* When we get a reset we do this. */ -static void tcp_reset(struct sock *sk) +void tcp_reset(struct sock *sk) { /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { @@ -4351,19 +4323,20 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk); -static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) +static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, + unsigned int size) { if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, size)) { + !sk_rmem_schedule(sk, skb, size)) { if (tcp_prune_queue(sk) < 0) return -1; - if (!sk_rmem_schedule(sk, size)) { + if (!sk_rmem_schedule(sk, skb, size)) { if (!tcp_prune_ofo_queue(sk)) return -1; - if (!sk_rmem_schedule(sk, size)) + if (!sk_rmem_schedule(sk, skb, size)) return -1; } } @@ -4418,7 +4391,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) TCP_ECN_check_ce(tp, skb); - if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) { + if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); __kfree_skb(skb); return; @@ -4552,17 +4525,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct tcphdr *th; bool fragstolen; - if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) - goto err; - skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); if (!skb) goto err; + if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th))) + goto err_free; + th = (struct tcphdr *)skb_put(skb, sizeof(*th)); skb_reset_transport_header(skb); memset(th, 0, sizeof(*th)); @@ -4633,7 +4606,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten <= 0) { queue_and_out: if (eaten < 0 && - tcp_try_rmem_schedule(sk, skb->truesize)) + tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto drop; eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); @@ -4661,7 +4634,7 @@ queue_and_out: if (eaten > 0) kfree_skb_partial(skb, fragstolen); - else if (!sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); return; } @@ -5391,6 +5364,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); + if (unlikely(sk->sk_rx_dst == NULL)) + inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. * The code loosely follows the one in the famous @@ -5475,7 +5450,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { #ifdef CONFIG_NET_DMA - if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { + if (tp->ucopy.task == current && + sock_owned_by_user(sk) && + tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { copied_early = 1; eaten = 1; } @@ -5552,8 +5529,7 @@ no_ack: #endif if (eaten) kfree_skb_partial(skb, fragstolen); - else - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk, 0); return 0; } } @@ -5602,7 +5578,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); if (skb != NULL) { - sk->sk_rx_dst = dst_clone(skb_dst(skb)); + icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } @@ -5737,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, TCP_ECN_rcv_synack(tp, th); - tp->snd_wl1 = TCP_SKB_CB(skb)->seq; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and @@ -5750,7 +5726,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * never scaled. */ tp->snd_wnd = ntohs(th->window); - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (!tp->rx_opt.wscale_ok) { tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; @@ -5888,7 +5863,9 @@ discard: tcp_send_synack(sk); #if 0 /* Note, we could accept data and URG from this segment. - * There are no obstacles to make this. + * There are no obstacles to make this (except that we must + * either change tcp_recvmsg() to prevent it from returning data + * before 3WHS completes per RFC793, or employ TCP Fast Open). * * However, if we ignore data in ACKless segments sometimes, * we have no reasons to accept it sometimes. @@ -5928,6 +5905,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock *req; int queued = 0; tp->rx_opt.saw_tstamp = 0; @@ -5983,6 +5961,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } + req = tp->fastopen_rsk; + if (req != NULL) { + BUG_ON(sk->sk_state != TCP_SYN_RECV && + sk->sk_state != TCP_FIN_WAIT1); + + if (tcp_check_req(sk, skb, req, NULL, true) == NULL) + goto discard; + } if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; @@ -5993,7 +5979,25 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, switch (sk->sk_state) { case TCP_SYN_RECV: if (acceptable) { - tp->copied_seq = tp->rcv_nxt; + /* Once we leave TCP_SYN_RECV, we no longer + * need req so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->retrans; + + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for + * correct metrics. + */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); + + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -6015,23 +6019,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - - tcp_init_metrics(sk); - - tcp_init_congestion_control(sk); + if (req) { + /* Re-arm the timer because data may + * have been sent out. This is similar + * to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more + * aggressive and retranmitting any data + * sooner based on when they were sent + * out. + */ + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); /* Prevent spurious tcp_cwnd_restart() on * first data packet. */ tp->lsndtime = tcp_time_stamp; - tcp_mtup_init(sk); tcp_initialize_rcv_mss(sk); - tcp_init_buffer_space(sk); tcp_fast_path_on(tp); } else { return 1; @@ -6039,6 +6047,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, break; case TCP_FIN_WAIT1: + /* If we enter the TCP_FIN_WAIT1 state and we are a + * Fast Open socket and this is the first acceptable + * ACK we have received, this would have acknowledged + * our SYNACK so stop the SYNACK timer. + */ + if (acceptable && req != NULL) { + /* We no longer need the request sock. */ + reqsk_fastopen_remove(sk, req, false); + tcp_rearm_rto(sk); + } if (tp->snd_una == tp->write_seq) { struct dst_entry *dst; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b6b07c93924c..75735c9a6a9d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; struct sk_buff *skb; + struct request_sock *req; __u32 seq; __u32 remaining; int err; @@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); + req = tp->fastopen_rsk; seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { + !between(seq, tp->snd_una, tp->snd_nxt) && + (req == NULL || seq != tcp_rsk(req)->snt_isn)) { + /* For a Fast Open socket, allow seq to be snt_isn. */ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -417,10 +421,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ tp->mtu_info = info; - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + } else { + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + sock_hold(sk); + } goto out; } @@ -433,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) !icsk->icsk_backoff) break; + /* XXX (TFO) - revisit the following logic for TFO */ + if (sock_owned_by_user(sk)) break; @@ -464,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; } + /* XXX (TFO) - if it's a TFO socket and has been accepted, rather + * than following the TCP_SYN_RECV case and closing the socket, + * we ignore the ICMP error and keep trying like a fully established + * socket. Is this the right thing to do? + */ + if (req && req->sk == NULL) + goto out; + switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: @@ -496,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen. - It can f.e. if SYNs crossed. + It can f.e. if SYNs crossed, + or Fast Open. */ if (!sock_owned_by_user(sk)) { sk->sk_err = err; @@ -807,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, - tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ + tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -837,7 +858,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp); + skb = tcp_make_synack(sk, dst, req, rvp, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -847,6 +868,8 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, ireq->rmt_addr, ireq->opt); err = net_xmit_eval(err); + if (!tcp_rsk(req)->snt_synack && !err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; } return err; @@ -902,8 +925,7 @@ EXPORT_SYMBOL(tcp_syn_flood_action); /* * Save and compile IPv4 options into the request_sock if needed. */ -static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, - struct sk_buff *skb) +static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) { const struct ip_options *opt = &(IPCB(skb)->opt); struct ip_options_rcu *dopt = NULL; @@ -1270,6 +1292,182 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif +static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct tcp_fastopen_cookie *valid_foc) +{ + bool skip_cookie = false; + struct fastopen_queue *fastopenq; + + if (likely(!fastopen_cookie_present(foc))) { + /* See include/net/tcp.h for the meaning of these knobs */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || + ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) + skip_cookie = true; /* no cookie to validate */ + else + return false; + } + fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; + /* A FO option is present; bump the counter. */ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying + * to validating the cookie in order to avoid burning CPU cycles + * unnecessarily. + * + * XXX (TFO) - The implication of checking the max_qlen before + * processing a cookie request is that clients can't differentiate + * between qlen overflow causing Fast Open to be disabled + * temporarily vs a server not supporting Fast Open at all. + */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || + fastopenq == NULL || fastopenq->max_qlen == 0) + return false; + + if (fastopenq->qlen >= fastopenq->max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; + if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + spin_unlock(&fastopenq->lock); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); + /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ + foc->len = -1; + return false; + } + fastopenq->rskq_rst_head = req1->dl_next; + fastopenq->qlen--; + spin_unlock(&fastopenq->lock); + reqsk_free(req1); + } + if (skip_cookie) { + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + return true; + } + if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { + if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || + memcmp(&foc->val[0], &valid_foc->val[0], + TCP_FASTOPEN_COOKIE_SIZE) != 0) + return false; + valid_foc->len = -1; + } + /* Acknowledge the data received from the peer. */ + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + return true; + } else if (foc->len == 0) { /* Client requesting a cookie */ + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + } else { + /* Client sent a cookie with wrong size. Treat it + * the same as invalid and return a valid one. + */ + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + } + return false; +} + +static int tcp_v4_conn_req_fastopen(struct sock *sk, + struct sk_buff *skb, + struct sk_buff *skb_synack, + struct request_sock *req, + struct request_values *rvp) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + const struct inet_request_sock *ireq = inet_rsk(req); + struct sock *child; + int err; + + req->retrans = 0; + req->sk = NULL; + + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + kfree_skb(skb_synack); + return -1; + } + err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, + ireq->rmt_addr, ireq->opt); + err = net_xmit_eval(err); + if (!err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; + /* XXX (TFO) - is it ok to ignore error and continue? */ + + spin_lock(&queue->fastopenq->lock); + queue->fastopenq->qlen++; + spin_unlock(&queue->fastopenq->lock); + + /* Initialize the child socket. Have to fix some values to take + * into account the child is a Fast Open socket and is created + * only out of the bits carried in the SYN packet. + */ + tp = tcp_sk(child); + + tp->fastopen_rsk = req; + /* Do a hold on the listner sk so that if the listener is being + * closed, the child that has been accepted can live on and still + * access listen_lock. + */ + sock_hold(sk); + tcp_rsk(req)->listener = sk; + + /* RFC1323: The window in SYN & SYN/ACK segments is never + * scaled. So correct it appropriately. + */ + tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + + /* Activate the retrans timer so that SYNACK can be retransmitted. + * The request socket is not added to the SYN table of the parent + * because it's been added to the accept queue directly. + */ + inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT, TCP_RTO_MAX); + + /* Add the child socket directly into the accept queue */ + inet_csk_reqsk_queue_add(sk, req, child); + + /* Now finish processing the fastopen child socket. */ + inet_csk(child)->icsk_af_ops->rebuild_header(child); + tcp_init_congestion_control(child); + tcp_mtup_init(child); + tcp_init_buffer_space(child); + tcp_init_metrics(child); + + /* Queue the data carried in the SYN packet. We need to first + * bump skb's refcnt because the caller will attempt to free it. + * + * XXX (TFO) - we honor a zero-payload TFO request for now. + * (Any reason not to?) + */ + if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { + /* Don't queue the skb if there is no payload in SYN. + * XXX (TFO) - How about SYN+FIN? + */ + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + } else { + skb = skb_get(skb); + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + skb_set_owner_r(skb, child); + __skb_queue_tail(&child->sk_receive_queue, skb); + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + } + sk->sk_data_ready(sk, 0); + bh_unlock_sock(child); + sock_put(child); + WARN_ON(req->sk == NULL); + return 0; +} + int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; @@ -1283,6 +1481,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; bool want_cookie = false; + struct flowi4 fl4; + struct tcp_fastopen_cookie foc = { .len = -1 }; + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + struct sk_buff *skb_synack; + int do_fastopen; /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1317,7 +1520,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, + want_cookie ? NULL : &foc); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && @@ -1363,7 +1567,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->loc_addr = daddr; ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); + ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; @@ -1375,8 +1579,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = cookie_v4_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { - struct flowi4 fl4; - /* VJ's idea. We save last timestamp seen * from the destination in peer table, when entering * state TIME-WAIT, and check against it before @@ -1415,16 +1617,54 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; - if (tcp_v4_send_synack(sk, dst, req, - (struct request_values *)&tmp_ext, - skb_get_queue_mapping(skb), - want_cookie) || - want_cookie) + if (dst == NULL) { + dst = inet_csk_route_req(sk, &fl4, req); + if (dst == NULL) + goto drop_and_free; + } + do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); + + /* We don't call tcp_v4_send_synack() directly because we need + * to make sure a child socket can be created successfully before + * sending back synack! + * + * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() + * (or better yet, call tcp_send_synack() in the child context + * directly, but will have to fix bunch of other code first) + * after syn_recv_sock() except one will need to first fix the + * latter to remove its dependency on the current implementation + * of tcp_v4_send_synack()->tcp_select_initial_window(). + */ + skb_synack = tcp_make_synack(sk, dst, req, + (struct request_values *)&tmp_ext, + fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); + + if (skb_synack) { + __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr); + skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); + } else + goto drop_and_free; + + if (likely(!do_fastopen)) { + int err; + err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, + ireq->rmt_addr, ireq->opt); + err = net_xmit_eval(err); + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->snt_synack = tcp_time_stamp; + tcp_rsk(req)->listener = NULL; + /* Add the request_sock to the SYN table */ + inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + if (fastopen_cookie_present(&foc) && foc.len != 0) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, + (struct request_values *)&tmp_ext)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; drop_and_release: @@ -1462,6 +1702,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; + inet_sk_rx_dst_set(newsk, skb); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); @@ -1497,9 +1738,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); - if (tcp_rsk(req)->snt_synack) - tcp_valid_rtt_meas(newsk, - tcp_time_stamp - tcp_rsk(req)->snt_synack); + tcp_synack_rtt_meas(newsk, req); newtp->total_retrans = req->retrans; #ifdef CONFIG_TCP_MD5SIG @@ -1551,7 +1790,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, iph->saddr, iph->daddr); if (req) - return tcp_check_req(sk, skb, req, prev); + return tcp_check_req(sk, skb, req, prev, false); nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); @@ -1617,21 +1856,16 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); - if (sk->sk_rx_dst) { - struct dst_entry *dst = sk->sk_rx_dst; - if (dst->ops->check(dst, 0) == NULL) { + if (dst) { + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + dst->ops->check(dst, 0) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } } - if (unlikely(sk->sk_rx_dst == NULL)) { - struct inet_sock *icsk = inet_sk(sk); - struct rtable *rt = skb_rtable(skb); - - sk->sk_rx_dst = dst_clone(&rt->dst); - icsk->rx_dst_ifindex = inet_iif(skb); - } if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1709,11 +1943,11 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; - struct inet_sock *icsk = inet_sk(sk); + if (dst) dst = dst_check(dst, 0); if (dst && - icsk->rx_dst_ifindex == skb->skb_iif) + inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1874,10 +2108,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; +} +EXPORT_SYMBOL(inet_sk_rx_dst_set); + const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .net_header_len = sizeof(struct iphdr), @@ -1954,20 +2199,13 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* - * If sendmsg cached page exists, toss it. - */ - if (sk->sk_sndmsg_page) { - __free_page(sk->sk_sndmsg_page); - sk->sk_sndmsg_page = NULL; - } - /* TCP Cookie Transactions */ if (tp->cookie_values != NULL) { kref_put(&tp->cookie_values->kref, tcp_cookie_values_release); tp->cookie_values = NULL; } + BUG_ON(tp->fastopen_rsk != NULL); /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); @@ -2384,10 +2622,10 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(const struct sock *sk, const struct request_sock *req, - struct seq_file *f, int i, int uid, int *len) + struct seq_file *f, int i, kuid_t uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); - int ttd = req->expires - jiffies; + long delta = req->expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", @@ -2399,9 +2637,9 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ - jiffies_to_clock_t(ttd), + jiffies_delta_to_clock_t(delta), req->retrans, - uid, + from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->sk_refcnt), @@ -2416,6 +2654,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_sock *inet = inet_sk(sk); + struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); @@ -2450,9 +2689,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) tp->write_seq - tp->snd_una, rx_queue, timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - sock_i_uid(sk), + from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, @@ -2460,7 +2699,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, + sk->sk_state == TCP_LISTEN ? + (fastopenq ? fastopenq->max_qlen : 0) : + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh), len); } @@ -2469,10 +2710,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = tw->tw_daddr; src = tw->tw_rcv_saddr; @@ -2482,7 +2720,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw, len); } @@ -2565,6 +2803,8 @@ void tcp4_proc_exit(void) struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -2573,11 +2813,22 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - - /* fall through */ - case CHECKSUM_NONE: +flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; } return tcp_gro_receive(head, skb); @@ -2635,7 +2886,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .init_cgroup = tcp_init_cgroup, .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 2288a6399e1e..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/tcp.h> #include <linux/hash.h> +#include <linux/tcp_metrics.h> #include <net/inet_connection_sock.h> #include <net/net_namespace.h> @@ -17,20 +18,10 @@ #include <net/ipv6.h> #include <net/dst.h> #include <net/tcp.h> +#include <net/genetlink.h> int sysctl_tcp_nometrics_save __read_mostly; -enum tcp_metric_index { - TCP_METRIC_RTT, - TCP_METRIC_RTTVAR, - TCP_METRIC_SSTHRESH, - TCP_METRIC_CWND, - TCP_METRIC_REORDERING, - - /* Always last. */ - TCP_METRIC_MAX, -}; - struct tcp_fastopen_metrics { u16 mss; u16 syn_loss:10; /* Recurring Fast Open SYN losses */ @@ -45,8 +36,10 @@ struct tcp_metrics_block { u32 tcpm_ts; u32 tcpm_ts_stamp; u32 tcpm_lock; - u32 tcpm_vals[TCP_METRIC_MAX]; + u32 tcpm_vals[TCP_METRIC_MAX + 1]; struct tcp_fastopen_metrics tcpm_fastopen; + + struct rcu_head rcu_head; }; static bool tcp_metric_locked(struct tcp_metrics_block *tm, @@ -690,6 +683,325 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } +static struct genl_family tcp_metrics_nl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = TCP_METRICS_GENL_NAME, + .version = TCP_METRICS_GENL_VERSION, + .maxattr = TCP_METRICS_ATTR_MAX, + .netnsok = true, +}; + +static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { + [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, + [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr), }, + /* Following attributes are not received for GET/DEL, + * we keep them for reference + */ +#if 0 + [TCP_METRICS_ATTR_AGE] = { .type = NLA_MSECS, }, + [TCP_METRICS_ATTR_TW_TSVAL] = { .type = NLA_U32, }, + [TCP_METRICS_ATTR_TW_TS_STAMP] = { .type = NLA_S32, }, + [TCP_METRICS_ATTR_VALS] = { .type = NLA_NESTED, }, + [TCP_METRICS_ATTR_FOPEN_MSS] = { .type = NLA_U16, }, + [TCP_METRICS_ATTR_FOPEN_SYN_DROPS] = { .type = NLA_U16, }, + [TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS] = { .type = NLA_MSECS, }, + [TCP_METRICS_ATTR_FOPEN_COOKIE] = { .type = NLA_BINARY, + .len = TCP_FASTOPEN_COOKIE_MAX, }, +#endif +}; + +/* Add attributes, caller cancels its header on failure */ +static int tcp_metrics_fill_info(struct sk_buff *msg, + struct tcp_metrics_block *tm) +{ + struct nlattr *nest; + int i; + + switch (tm->tcpm_addr.family) { + case AF_INET: + if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, + tm->tcpm_addr.addr.a4) < 0) + goto nla_put_failure; + break; + case AF_INET6: + if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, + tm->tcpm_addr.addr.a6) < 0) + goto nla_put_failure; + break; + default: + return -EAFNOSUPPORT; + } + + if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, + jiffies - tm->tcpm_stamp) < 0) + goto nla_put_failure; + if (tm->tcpm_ts_stamp) { + if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, + (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) + goto nla_put_failure; + if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, + tm->tcpm_ts) < 0) + goto nla_put_failure; + } + + { + int n = 0; + + nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); + if (!nest) + goto nla_put_failure; + for (i = 0; i < TCP_METRIC_MAX + 1; i++) { + if (!tm->tcpm_vals[i]) + continue; + if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) + goto nla_put_failure; + n++; + } + if (n) + nla_nest_end(msg, nest); + else + nla_nest_cancel(msg, nest); + } + + { + struct tcp_fastopen_metrics tfom_copy[1], *tfom; + unsigned int seq; + + do { + seq = read_seqbegin(&fastopen_seqlock); + tfom_copy[0] = tm->tcpm_fastopen; + } while (read_seqretry(&fastopen_seqlock, seq)); + + tfom = tfom_copy; + if (tfom->mss && + nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS, + tfom->mss) < 0) + goto nla_put_failure; + if (tfom->syn_loss && + (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS, + tfom->syn_loss) < 0 || + nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, + jiffies - tfom->last_syn_loss) < 0)) + goto nla_put_failure; + if (tfom->cookie.len > 0 && + nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE, + tfom->cookie.len, tfom->cookie.val) < 0) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int tcp_metrics_dump_info(struct sk_buff *skb, + struct netlink_callback *cb, + struct tcp_metrics_block *tm) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tcp_metrics_nl_family, NLM_F_MULTI, + TCP_METRICS_CMD_GET); + if (!hdr) + return -EMSGSIZE; + + if (tcp_metrics_fill_info(skb, tm) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int tcp_metrics_nl_dump(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + unsigned int row, s_row = cb->args[0]; + int s_col = cb->args[1], col = s_col; + + for (row = s_row; row < max_rows; row++, s_col = 0) { + struct tcp_metrics_block *tm; + struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; + + rcu_read_lock(); + for (col = 0, tm = rcu_dereference(hb->chain); tm; + tm = rcu_dereference(tm->tcpm_next), col++) { + if (col < s_col) + continue; + if (tcp_metrics_dump_info(skb, cb, tm) < 0) { + rcu_read_unlock(); + goto done; + } + } + rcu_read_unlock(); + } + +done: + cb->args[0] = row; + cb->args[1] = col; + return skb->len; +} + +static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional) +{ + struct nlattr *a; + + a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; + if (a) { + addr->family = AF_INET; + addr->addr.a4 = nla_get_be32(a); + *hash = (__force unsigned int) addr->addr.a4; + return 0; + } + a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; + if (a) { + if (nla_len(a) != sizeof(sizeof(struct in6_addr))) + return -EINVAL; + addr->family = AF_INET6; + memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); + *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + return 0; + } + return optional ? 1 : -EAFNOSUPPORT; +} + +static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct sk_buff *msg; + struct net *net = genl_info_net(info); + void *reply; + int ret; + + ret = parse_nl_addr(info, &addr, &hash, 0); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0, + info->genlhdr->cmd); + if (!reply) + goto nla_put_failure; + + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + ret = -ESRCH; + rcu_read_lock(); + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, &addr)) { + ret = tcp_metrics_fill_info(msg, tm); + break; + } + } + rcu_read_unlock(); + if (ret < 0) + goto out_free; + + genlmsg_end(msg, reply); + return genlmsg_reply(msg, info); + +nla_put_failure: + ret = -EMSGSIZE; + +out_free: + nlmsg_free(msg); + return ret; +} + +#define deref_locked_genl(p) \ + rcu_dereference_protected(p, lockdep_genl_is_held() && \ + lockdep_is_held(&tcp_metrics_lock)) + +#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) + +static int tcp_metrics_flush_all(struct net *net) +{ + unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; + struct tcp_metrics_block *tm; + unsigned int row; + + for (row = 0; row < max_rows; row++, hb++) { + spin_lock_bh(&tcp_metrics_lock); + tm = deref_locked_genl(hb->chain); + if (tm) + hb->chain = NULL; + spin_unlock_bh(&tcp_metrics_lock); + while (tm) { + struct tcp_metrics_block *next; + + next = deref_genl(tm->tcpm_next); + kfree_rcu(tm, rcu_head); + tm = next; + } + } + return 0; +} + +static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct tcpm_hash_bucket *hb; + struct tcp_metrics_block *tm; + struct tcp_metrics_block __rcu **pp; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net = genl_info_net(info); + int ret; + + ret = parse_nl_addr(info, &addr, &hash, 1); + if (ret < 0) + return ret; + if (ret > 0) + return tcp_metrics_flush_all(net); + + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hb = net->ipv4.tcp_metrics_hash + hash; + pp = &hb->chain; + spin_lock_bh(&tcp_metrics_lock); + for (tm = deref_locked_genl(*pp); tm; + pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { + if (addr_same(&tm->tcpm_addr, &addr)) { + *pp = tm->tcpm_next; + break; + } + } + spin_unlock_bh(&tcp_metrics_lock); + if (!tm) + return -ESRCH; + kfree_rcu(tm, rcu_head); + return 0; +} + +static struct genl_ops tcp_metrics_nl_ops[] = { + { + .cmd = TCP_METRICS_CMD_GET, + .doit = tcp_metrics_nl_cmd_get, + .dumpit = tcp_metrics_nl_dump, + .policy = tcp_metrics_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = TCP_METRICS_CMD_DEL, + .doit = tcp_metrics_nl_cmd_del, + .policy = tcp_metrics_nl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { @@ -731,6 +1043,18 @@ static int __net_init tcp_net_metrics_init(struct net *net) static void __net_exit tcp_net_metrics_exit(struct net *net) { + unsigned int i; + + for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { + struct tcp_metrics_block *tm, *next; + + tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); + while (tm) { + next = rcu_dereference_protected(tm->tcpm_next, 1); + kfree(tm); + tm = next; + } + } kfree(net->ipv4.tcp_metrics_hash); } @@ -741,5 +1065,21 @@ static __net_initdata struct pernet_operations tcp_net_metrics_ops = { void __init tcp_metrics_init(void) { - register_pernet_subsys(&tcp_net_metrics_ops); + int ret; + + ret = register_pernet_subsys(&tcp_net_metrics_ops); + if (ret < 0) + goto cleanup; + ret = genl_register_family_with_ops(&tcp_metrics_nl_family, + tcp_metrics_nl_ops, + ARRAY_SIZE(tcp_metrics_nl_ops)); + if (ret < 0) + goto cleanup_subsys; + return; + +cleanup_subsys: + unregister_pernet_subsys(&tcp_net_metrics_ops); + +cleanup: + return; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5912ac3fd240..27536ba16c9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -85,6 +85,8 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) * spinlock it. I do not want! Well, probability of misbehaviour * is ridiculously low and, seems, we could use some mb() tricks * to avoid misread sequence numbers, states etc. --ANK + * + * We don't need to initialize tmp_out.sack_ok as we don't use the results */ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, @@ -387,8 +389,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newsk->sk_rx_dst = dst_clone(skb_dst(skb)); - /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. @@ -509,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); + newtp->fastopen_rsk = NULL; TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } @@ -517,13 +518,20 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, EXPORT_SYMBOL(tcp_create_openreq_child); /* - * Process an incoming packet for SYN_RECV sockets represented - * as a request_sock. + * Process an incoming packet for SYN_RECV sockets represented as a + * request_sock. Normally sk is the listener socket but for TFO it + * points to the child socket. + * + * XXX (TFO) - The current impl contains a special check for ack + * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? + * + * We don't need to initialize tmp_opt.sack_ok as we don't use the results */ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev) + struct request_sock **prev, + bool fastopen) { struct tcp_options_received tmp_opt; const u8 *hash_location; @@ -532,6 +540,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); bool paws_reject = false; + BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN)); + tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); @@ -567,6 +577,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. + * + * Note that even if there is new data in the SYN packet + * they will be thrown away too. */ req->rsk_ops->rtx_syn_ack(sk, req, NULL); return NULL; @@ -624,9 +637,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * sent (the segment carries an unacceptable ACK) ... * a reset is sent." * - * Invalid ACK: reset will be sent by listening socket + * Invalid ACK: reset will be sent by listening socket. + * Note that the ACK validity check for a Fast Open socket is done + * elsewhere and is checked directly against the child socket rather + * than req because user data may have been sent out. */ - if ((flg & TCP_FLAG_ACK) && + if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) return sk; @@ -639,7 +655,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* RFC793: "first check sequence number". */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { + tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_ack(sk, skb, req); @@ -650,7 +666,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* In sequence, PAWS is OK. */ - if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) + if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) req->ts_recent = tmp_opt.rcv_tsval; if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { @@ -669,10 +685,25 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. + * + * XXX (TFO) - if we ever allow "data after SYN", the + * following check needs to be removed. */ if (!(flg & TCP_FLAG_ACK)) return NULL; + /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ + if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) + tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; + else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ + tcp_rsk(req)->snt_synack = 0; + + /* For Fast Open no more processing is needed (sk is the + * child socket). + */ + if (fastopen) + return sk; + /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { @@ -680,10 +711,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } - if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) - tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; - else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ - tcp_rsk(req)->snt_synack = 0; /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all @@ -708,11 +735,21 @@ listen_overflow: } embryonic_reset: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); - if (!(flg & TCP_FLAG_RST)) + if (!(flg & TCP_FLAG_RST)) { + /* Received a bad SYN pkt - for TFO We try not to reset + * the local connection unless it's really necessary to + * avoid becoming vulnerable to outside attack aiming at + * resetting legit local connections. + */ req->rsk_ops->send_reset(sk, skb); - - inet_csk_reqsk_queue_drop(sk, req, prev); + } else if (fastopen) { /* received a valid RST pkt */ + reqsk_fastopen_remove(sk, req, true); + tcp_reset(sk); + } + if (!fastopen) { + inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + } return NULL; } EXPORT_SYMBOL(tcp_check_req); @@ -721,6 +758,12 @@ EXPORT_SYMBOL(tcp_check_req); * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. + * + * For the vast majority of cases child->sk_state will be TCP_SYN_RECV + * when entering. But other states are possible due to a race condition + * where after __inet_lookup_established() fails but before the listener + * locked is obtained, other packets cause the same connection to + * be created. */ int tcp_child_process(struct sock *parent, struct sock *child, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 33cd065cfbd8..cfe6ffe1c177 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -702,7 +702,8 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp) + struct tcp_extend_values *xvp, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -747,7 +748,15 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - + if (foc != NULL) { + u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; + need = (need + 3) & ~3U; /* Align to 32 bits */ + if (remaining >= need) { + opts->options |= OPTION_FAST_OPEN_COOKIE; + opts->fastopen_cookie = foc; + remaining -= need; + } + } /* Similar rationale to tcp_syn_options() applies here, too. * If the <SYN> options fit, the same options should fit now! */ @@ -910,14 +919,18 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); - - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { tcp_delack_timer_handler(sk); - - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { sk->sk_prot->mtu_reduced(sk); + __sock_put(sk); + } } EXPORT_SYMBOL(tcp_release_cb); @@ -940,7 +953,7 @@ void __init tcp_tasklet_init(void) * We cant xmit new skbs from this context, as we might already * hold qdisc lock. */ -void tcp_wfree(struct sk_buff *skb) +static void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); @@ -1522,21 +1535,21 @@ static void tcp_cwnd_validate(struct sock *sk) * when we would be allowed to send the split-due-to-Nagle skb fully. */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, - unsigned int mss_now, unsigned int cwnd) + unsigned int mss_now, unsigned int max_segs) { const struct tcp_sock *tp = tcp_sk(sk); - u32 needed, window, cwnd_len; + u32 needed, window, max_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; - cwnd_len = mss_now * cwnd; + max_len = mss_now * max_segs; - if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) - return cwnd_len; + if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) + return max_len; needed = min(skb->len, window); - if (cwnd_len <= needed) - return cwnd_len; + if (max_len <= needed) + return max_len; return needed - needed % mss_now; } @@ -1765,7 +1778,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) limit = min(send_win, cong_win); /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= sk->sk_gso_max_size) + if (limit >= min_t(unsigned int, sk->sk_gso_max_size, + sk->sk_gso_max_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1999,7 +2013,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); + min_t(unsigned int, + cwnd_quota, + sk->sk_gso_max_segs)); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) @@ -2021,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (push_one) break; } - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) - tp->prr_out += sent_pkts; if (likely(sent_pkts)) { + if (tcp_in_cwnd_reduction(sk)) + tp->prr_out += sent_pkts; tcp_cwnd_validate(sk); return false; } @@ -2045,7 +2061,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; - if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, + sk_gfp_atomic(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -2525,7 +2542,7 @@ begin_fwd: } NET_INC_STATS_BH(sock_net(sk), mib_idx); - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); if (skb == tcp_write_queue_head(sk)) @@ -2650,7 +2667,8 @@ int tcp_send_synack(struct sock *sk) */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp) + struct request_values *rvp, + struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; struct tcp_extend_values *xvp = tcp_xv(rvp); @@ -2666,7 +2684,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, + sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2709,7 +2728,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp) + skb, &opts, &md5, xvp, foc) + sizeof(*th); skb_push(skb, tcp_header_size); @@ -2763,7 +2782,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } th->seq = htonl(TCP_SKB_CB(skb)->seq); - th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); + /* XXX data is queued and acked as is. No buffer/window check */ + th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); @@ -3064,7 +3084,7 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (buff == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; @@ -3079,7 +3099,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; - tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); + tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } /* This routine sends a packet with an out of date sequence @@ -3099,7 +3119,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (skb == NULL) return -1; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6df36ad55a38..fc04711e80c8 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data) inet_csk(sk)->icsk_ack.blocked = 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); @@ -304,6 +305,35 @@ static void tcp_probe_timer(struct sock *sk) } /* + * Timer for Fast Open socket to retransmit SYNACK. Note that the + * sk here is the child socket, not the parent (listener) socket. + */ +static void tcp_fastopen_synack_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + int max_retries = icsk->icsk_syn_retries ? : + sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ + struct request_sock *req; + + req = tcp_sk(sk)->fastopen_rsk; + req->rsk_ops->syn_ack_timeout(sk, req); + + if (req->retrans >= max_retries) { + tcp_write_err(sk); + return; + } + /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error + * returned from rtx_syn_ack() to make it more persistent like + * regular retransmit because if the child socket has been accepted + * it's not good to give up too easily. + */ + req->rsk_ops->rtx_syn_ack(sk, req, NULL); + req->retrans++; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); +} + +/* * The TCP retransmit timer. */ @@ -316,7 +346,15 @@ void tcp_retransmit_timer(struct sock *sk) tcp_resume_early_retransmit(sk); return; } - + if (tp->fastopen_rsk) { + BUG_ON(sk->sk_state != TCP_SYN_RECV && + sk->sk_state != TCP_FIN_WAIT1); + tcp_fastopen_synack_timer(sk); + /* Before we receive ACK to our SYN-ACK don't retransmit + * anything else (e.g., data or FIN segments). + */ + return; + } if (!tp->packets_out) goto out; @@ -481,7 +519,8 @@ static void tcp_write_timer(unsigned long data) tcp_write_timer_handler(sk); } else { /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..79c8dbe59b54 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->check = CSUM_MANGLED_0; send: - err = ip_send_skb(skb); + err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), @@ -1226,6 +1226,11 @@ try_again: if (unlikely(err)) { trace_kfree_skb(skb, udp_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } goto out_free; } @@ -2110,7 +2115,9 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 16d0960062be..505b30ad9182 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -24,7 +24,9 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid, + return inet_sk_diag_fill(sk, NULL, skb, req, + sk_user_ns(NETLINK_CB(cb->skb).ssk), + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -69,14 +71,15 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = inet_sk_diag_fill(sk, NULL, rep, req, - NETLINK_CB(in_skb).pid, + sk_user_ns(NETLINK_CB(in_skb).ssk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 58d23a572509..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) if (skb_dst(skb) == NULL) { const struct iphdr *iph = ip_hdr(skb); - if (ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) goto drop; } return dst_input(skb); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c6281847f16a..681ea2f413e2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); return 0; } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 5728695b5449..4f7fe7270e37 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -201,6 +201,22 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_GRE + tristate "IPv6: GRE tunnel" + select IPV6_TUNNEL + ---help--- + Tunneling means encapsulating data of one protocol type within + another protocol and sending it over a channel that understands the + encapsulating protocol. This particular tunneling driver implements + GRE (Generic Routing Encapsulation) and at this time allows + encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure. + This driver is useful if the other endpoint is a Cisco router: Cisco + likes GRE much better than the other Linux tunneling driver ("IP + tunneling" above). In addition, GRE allows multicast redistribution + through the tunnel. + + Saying M here will produce a module called ip6_gre. If unsure, say N. + config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on EXPERIMENTAL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934acfac1..b6d3f79151e2 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o +obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-y += addrconf_core.o exthdrs_core.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 79181819a24f..d7c56f8a5b4e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) #endif #ifdef CONFIG_IPV6_PRIVACY -static int __ipv6_regen_rndid(struct inet6_dev *idev); -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); +static void __ipv6_regen_rndid(struct inet6_dev *idev); +static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); #endif @@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf) dev_forward_change(idev); } } - rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) @@ -790,10 +788,16 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) struct in6_addr prefix; struct rt6_info *rt; struct net *net = dev_net(ifp->idev->dev); + struct flowi6 fl6 = {}; + ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); + fl6.flowi6_oif = ifp->idev->dev->ifindex; + fl6.daddr = prefix; + rt = (struct rt6_info *)ip6_route_lookup(net, &fl6, + RT6_LOOKUP_F_IFACE); - if (rt && addrconf_is_prefix_route(rt)) { + if (rt != net->ipv6.ip6_null_entry && + addrconf_is_prefix_route(rt)) { if (onlink == 0) { ip6_del_rt(rt); rt = NULL; @@ -854,16 +858,7 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { - spin_unlock_bh(&ifp->lock); - write_unlock(&idev->lock); - pr_warn("%s: regeneration of randomized interface id failed\n", - __func__); - in6_ifa_put(ifp); - in6_dev_put(idev); - ret = -1; - goto out; - } + __ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, @@ -1081,8 +1076,10 @@ static int ipv6_get_saddr_eval(struct net *net, break; case IPV6_SADDR_RULE_PREFIX: /* Rule 8: Use longest matching prefix */ - score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, - dst->addr); + ret = ipv6_addr_diff(&score->ifa->addr, dst->addr); + if (ret > score->ifa->prefix_len) + ret = score->ifa->prefix_len; + score->matchlen = ret; break; default: ret = 0; @@ -1095,7 +1092,7 @@ out: return ret; } -int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, +int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { @@ -1602,7 +1599,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) #ifdef CONFIG_IPV6_PRIVACY /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static int __ipv6_regen_rndid(struct inet6_dev *idev) +static void __ipv6_regen_rndid(struct inet6_dev *idev) { regen: get_random_bytes(idev->rndid, sizeof(idev->rndid)); @@ -1629,8 +1626,6 @@ regen: if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) goto regen; } - - return 0; } static void ipv6_regen_rndid(unsigned long data) @@ -1644,8 +1639,7 @@ static void ipv6_regen_rndid(unsigned long data) if (idev->dead) goto out; - if (__ipv6_regen_rndid(idev) < 0) - goto out; + __ipv6_regen_rndid(idev); expires = jiffies + idev->cnf.temp_prefered_lft * HZ - @@ -1666,13 +1660,10 @@ out: in6_dev_put(idev); } -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { - int ret = 0; - if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - ret = __ipv6_regen_rndid(idev); - return ret; + __ipv6_regen_rndid(idev); } #endif @@ -1723,7 +1714,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, if (table == NULL) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); if (!fn) goto out; @@ -1738,7 +1729,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, break; } out: - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } @@ -1778,14 +1769,6 @@ static void sit_route_add(struct net_device *dev) } #endif -static void addrconf_add_lroute(struct net_device *dev) -{ - struct in6_addr addr; - - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - addrconf_prefix_route(&addr, 64, dev, 0, 0); -} - static struct inet6_dev *addrconf_add_dev(struct net_device *dev) { struct inet6_dev *idev; @@ -1803,8 +1786,6 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) if (!(dev->flags & IFF_LOOPBACK)) addrconf_add_mroute(dev); - /* Add link local route */ - addrconf_add_lroute(dev); return idev; } @@ -2483,10 +2464,9 @@ static void addrconf_sit_config(struct net_device *dev) sit_add_v4_addrs(idev); - if (dev->flags&IFF_POINTOPOINT) { + if (dev->flags&IFF_POINTOPOINT) addrconf_add_mroute(dev); - addrconf_add_lroute(dev); - } else + else sit_route_add(dev); } #endif @@ -3551,12 +3531,12 @@ static inline int inet6_ifaddr_msgsize(void) } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u32 preferred, valid; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3594,7 +3574,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3603,7 +3583,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3619,7 +3599,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3628,7 +3608,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3669,7 +3649,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (++ip_idx < s_ip_idx) continue; err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); @@ -3685,7 +3665,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); @@ -3700,7 +3680,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); @@ -3822,7 +3802,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_ifa; } - err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDR, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ @@ -3830,7 +3810,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout_ifa: in6_ifa_put(ifa); errout: @@ -4032,14 +4012,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) } static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) return -EMSGSIZE; @@ -4097,7 +4077,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (!idev) goto cont; if (inet6_fill_ifinfo(skb, idev, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) <= 0) goto out; @@ -4145,14 +4125,14 @@ static inline size_t inet6_prefix_nlmsg_size(void) } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, + struct prefix_info *pinfo, u32 portid, u32 seq, int event, unsigned int flags) { struct prefixmsg *pmsg; struct nlmsghdr *nlh; struct prefix_cacheinfo ci; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index eb6a63632d3c..4be23da32b89 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) } /* - * Default policy table (RFC3484 + extensions) + * Default policy table (RFC6724 + extensions) * * prefix addr_type label * ------------------------------------------------------------------------- @@ -69,8 +69,12 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) * fc00::/7 N/A 5 ULA (RFC 4193) * 2001::/32 N/A 6 Teredo (RFC 4380) * 2001:10::/28 N/A 7 ORCHID (RFC 4843) + * fec0::/10 N/A 11 Site-local + * (deprecated by RFC3879) + * 3ffe::/16 N/A 12 6bone * * Note: 0xffffffff is used if we do not have any policies. + * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. */ #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL @@ -88,10 +92,18 @@ static const __net_initdata struct ip6addrlbl_init_table .prefix = &(struct in6_addr){{{ 0xfc }}}, .prefixlen = 7, .label = 5, + },{ /* fec0::/10 */ + .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, + .prefixlen = 10, + .label = 11, },{ /* 2002::/16 */ .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, .prefixlen = 16, .label = 2, + },{ /* 3ffe::/16 */ + .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, + .prefixlen = 16, + .label = 12, },{ /* 2001::/32 */ .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, .prefixlen = 32, @@ -470,10 +482,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, static int ip6addrlbl_fill(struct sk_buff *skb, struct ip6addrlbl_entry *p, u32 lseq, - u32 pid, u32 seq, int event, + u32 portid, u32 seq, int event, unsigned int flags) { - struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, + struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrlblmsg), flags); if (!nlh) return -EMSGSIZE; @@ -503,7 +515,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI)) <= 0) @@ -574,7 +586,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, } err = ip6addrlbl_fill(skb, p, lseq, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDRLABEL, 0); ip6addrlbl_put(p); @@ -585,7 +597,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); out: return err; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6dc7fd353ef5..282f3723ee19 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ - err = -ENOMEM; - aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0251a6005be8..c4f934176cab 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); - -#ifdef CONFIG_XFRM - { - struct rt6_info *rt = (struct rt6_info *)dst; - rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); - } -#endif } static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { - struct dst_entry *dst; - - dst = __sk_dst_check(sk, cookie); - -#ifdef CONFIG_XFRM - if (dst) { - struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { - __sk_dst_reset(sk); - dst = NULL; - } - } -#endif - - return dst; + return __sk_dst_check(sk, cookie); } static struct dst_entry *inet6_csk_route_socket(struct sock *sk, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 13690d650c3e..24995a93ef8c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -514,7 +514,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, ln = node_alloc(); if (!ln) - return NULL; + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; ln->parent = pn; @@ -561,7 +561,7 @@ insert_above: node_free(in); if (ln) node_free(ln); - return NULL; + return ERR_PTR(-ENOMEM); } /* @@ -611,7 +611,7 @@ insert_above: ln = node_alloc(); if (!ln) - return NULL; + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; @@ -777,11 +777,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (IS_ERR(fn)) { err = PTR_ERR(fn); - fn = NULL; - } - - if (!fn) goto out; + } pn = fn; @@ -819,12 +816,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) offsetof(struct rt6_info, rt6i_src), allow_create, replace_required); - if (!sn) { + if (IS_ERR(sn)) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node in main tree. */ node_free(sfn); + err = PTR_ERR(sn); goto st_failure; } @@ -839,10 +837,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (IS_ERR(sn)) { err = PTR_ERR(sn); - sn = NULL; - } - if (!sn) goto st_failure; + } } if (!fn->leaf) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 9772fbd8a3f5..90bbefb57943 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -22,6 +22,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/pid_namespace.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -91,6 +92,8 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) static void fl_free(struct ip6_flowlabel *fl) { if (fl) { + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); release_net(fl->fl_net); kfree(fl->opt); } @@ -394,10 +397,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, case IPV6_FL_S_ANY: break; case IPV6_FL_S_PROCESS: - fl->owner = current->pid; + fl->owner.pid = get_task_pid(current, PIDTYPE_PID); break; case IPV6_FL_S_USER: - fl->owner = current_euid(); + fl->owner.uid = current_euid(); break; default: err = -EINVAL; @@ -561,7 +564,10 @@ recheck: err = -EPERM; if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || - fl1->owner != fl->owner) + ((fl1->share == IPV6_FL_S_PROCESS) && + (fl1->owner.pid == fl->owner.pid)) || + ((fl1->share == IPV6_FL_S_USER) && + uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -EINVAL; @@ -621,6 +627,7 @@ done: struct ip6fl_iter_state { struct seq_net_private p; + struct pid_namespace *pid_ns; int bucket; }; @@ -699,6 +706,7 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) static int ip6fl_seq_show(struct seq_file *seq, void *v) { + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); if (v == SEQ_START_TOKEN) seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); @@ -708,7 +716,11 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", (unsigned int)ntohl(fl->label), fl->share, - (int)fl->owner, + ((fl->share == IPV6_FL_S_PROCESS) ? + pid_nr_ns(fl->owner.pid, state->pid_ns) : + ((fl->share == IPV6_FL_S_USER) ? + from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : + 0)), atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, @@ -727,8 +739,29 @@ static const struct seq_operations ip6fl_seq_ops = { static int ip6fl_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ip6fl_seq_ops, - sizeof(struct ip6fl_iter_state)); + struct seq_file *seq; + struct ip6fl_iter_state *state; + int err; + + err = seq_open_net(inode, file, &ip6fl_seq_ops, + sizeof(struct ip6fl_iter_state)); + + if (!err) { + seq = file->private_data; + state = ip6fl_seq_private(seq); + rcu_read_lock(); + state->pid_ns = get_pid_ns(task_active_pid_ns(current)); + rcu_read_unlock(); + } + return err; +} + +static int ip6fl_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); + put_pid_ns(state->pid_ns); + return seq_release_net(inode, file); } static const struct file_operations ip6fl_seq_fops = { @@ -736,7 +769,7 @@ static const struct file_operations ip6fl_seq_fops = { .open = ip6fl_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_net, + .release = ip6fl_seq_release, }; static int __net_init ip6_flowlabel_proc_init(struct net *net) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c new file mode 100644 index 000000000000..0185679c5f53 --- /dev/null +++ b/net/ipv6/ip6_gre.c @@ -0,0 +1,1770 @@ +/* + * GRE over IPv6 protocol decoder. + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/hash.h> +#include <linux/if_tunnel.h> +#include <linux/ip6_tunnel.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/addrconf.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#include <net/ip6_tunnel.h> + + +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + +#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) +#define IPV6_TCLASS_SHIFT 20 + +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static int ip6gre_net_id __read_mostly; +struct ip6gre_net { + struct ip6_tnl __rcu *tunnels[4][HASH_SIZE]; + + struct net_device *fb_tunnel_dev; +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static int ip6gre_tunnel_init(struct net_device *dev); +static void ip6gre_tunnel_setup(struct net_device *dev); +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); + +/* Tunnel hash table */ + +/* + 4 hash tables: + + 3: (remote,local) + 2: (remote,*) + 1: (*,local) + 0: (*,*) + + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + */ + +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1)) +static u32 HASH_ADDR(const struct in6_addr *addr) +{ + u32 hash = ipv6_addr_hash(addr); + + return hash_32(hash, HASH_SIZE_SHIFT); +} + +#define tunnels_r_l tunnels[3] +#define tunnels_r tunnels[2] +#define tunnels_l tunnels[1] +#define tunnels_wc tunnels[0] +/* + * Locking : hash tables are protected by RCU and RTNL + */ + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +/* often modified stats are per cpu, other are shared (netdev->stats) */ +struct pcpu_tstats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; + +static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; + tot->rx_errors = dev->stats.rx_errors; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + return tot; +} + +/* Given src, dst and key, find appropriate for input tunnel. */ + +static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, + const struct in6_addr *remote, const struct in6_addr *local, + __be32 key, __be16 gre_proto) +{ + struct net *net = dev_net(dev); + int link = dev->ifindex; + unsigned int h0 = HASH_ADDR(remote); + unsigned int h1 = HASH_KEY(key); + struct ip6_tnl *t, *cand = NULL; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + ARPHRD_ETHER : ARPHRD_IP6GRE; + int score, cand_score = 4; + + for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { + if (!ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { + if ((!ipv6_addr_equal(local, &t->parms.laddr) && + (!ipv6_addr_equal(local, &t->parms.raddr) || + !ipv6_addr_is_multicast(local))) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + if (cand != NULL) + return cand; + + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); + + return NULL; +} + +static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, + const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = HASH_KEY(p->i_key); + int prio = 0; + + if (!ipv6_addr_any(local)) + prio |= 1; + if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) { + prio |= 2; + h ^= HASH_ADDR(remote); + } + + return &ign->tunnels[prio][h]; +} + +static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, + const struct ip6_tnl *t) +{ + return __ip6gre_bucket(ign, &t->parms); +} + +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + for (tp = ip6gre_bucket(ign, t); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +static struct ip6_tnl *ip6gre_tunnel_find(struct net *net, + const struct __ip6_tnl_parm *parms, + int type) +{ + const struct in6_addr *remote = &parms->raddr; + const struct in6_addr *local = &parms->laddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip6_tnl *t; + struct ip6_tnl __rcu **tp; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + for (tp = __ip6gre_bucket(ign, parms); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + + return t; +} + +static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, + const struct __ip6_tnl_parm *parms, int create) +{ + struct ip6_tnl *t, *nt; + struct net_device *dev; + char name[IFNAMSIZ]; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); + if (t || !create) + return t; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else + strcpy(name, "ip6gre%d"); + + dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup); + if (!dev) + return NULL; + + dev_net_set(dev, net); + + nt = netdev_priv(dev); + nt->parms = *parms; + dev->rtnl_link_ops = &ip6gre_link_ops; + + nt->dev = dev; + ip6gre_tnl_link_config(nt, 1); + + if (register_netdevice(dev) < 0) + goto failed_free; + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + return nt; + +failed_free: + free_netdev(dev); + return NULL; +} + +static void ip6gre_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + dev_put(dev); +} + + +static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; + __be16 *p = (__be16 *)(skb->data + offset); + int grehlen = offset + 4; + struct ip6_tnl *t; + __be16 flags; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_KEY) { + grehlen += 4; + if (flags&GRE_CSUM) + grehlen += 4; + } + } + + /* If only 8 bytes returned, keyed message will be dropped here */ + if (!pskb_may_pull(skb, grehlen)) + return; + ipv6h = (const struct ipv6hdr *)skb->data; + p = (__be16 *)(skb->data + offset); + + t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, + flags & GRE_KEY ? + *(((__be32 *)p) + (grehlen / 4) - 1) : 0, + p[1]); + if (t == NULL) + return; + + switch (type) { + __u32 teli; + struct ipv6_tlv_tnl_enc_lim *tel; + __u32 mtu; + case ICMPV6_DEST_UNREACH: + net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); + break; + case ICMPV6_TIME_EXCEED: + if (code == ICMPV6_EXC_HOPLIMIT) { + net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); + } + break; + case ICMPV6_PARAMPROB: + teli = 0; + if (code == ICMPV6_HDR_FIELD) + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); + + if (teli && teli == info - 2) { + tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; + if (tel->encap_limit == 0) { + net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); + } + } else { + net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); + } + break; + case ICMPV6_PKT_TOOBIG: + mtu = info - offset; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; + break; + } + + if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; +} + +static int ip6gre_rcv(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + u8 *h; + __be16 flags; + __sum16 csum = 0; + __be32 key = 0; + u32 seqno = 0; + struct ip6_tnl *tunnel; + int offset = 4; + __be16 gre_proto; + int err; + + if (!pskb_may_pull(skb, sizeof(struct in6_addr))) + goto drop; + + ipv6h = ipv6_hdr(skb); + h = skb->data; + flags = *(__be16 *)h; + + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { + /* - Version must be 0. + - We do not support routing headers. + */ + if (flags&(GRE_VERSION|GRE_ROUTING)) + goto drop; + + if (flags&GRE_CSUM) { + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + if (!csum) + break; + /* fall through */ + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + } + offset += 4; + } + if (flags&GRE_KEY) { + key = *(__be32 *)(h + offset); + offset += 4; + } + if (flags&GRE_SEQ) { + seqno = ntohl(*(__be32 *)(h + offset)); + offset += 4; + } + } + + gre_proto = *(__be16 *)(h + 2); + + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, key, + gre_proto); + if (tunnel) { + struct pcpu_tstats *tstats; + + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) { + tunnel->dev->stats.rx_dropped++; + goto drop; + } + + secpath_reset(skb); + + skb->protocol = gre_proto; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { + skb->protocol = htons(ETH_P_IP); + if ((*(h + offset) & 0xF0) != 0x40) + offset += 4; + } + + skb->mac_header = skb->network_header; + __pskb_pull(skb, offset); + skb_postpull_rcsum(skb, skb_transport_header(skb), offset); + skb->pkt_type = PACKET_HOST; + + if (((flags&GRE_CSUM) && csum) || + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + if (tunnel->parms.i_flags&GRE_SEQ) { + if (!(flags&GRE_SEQ) || + (tunnel->i_seqno && + (s32)(seqno - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = seqno + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + ipv6h = ipv6_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + + err = IP6_ECN_decapsulate(ipv6h, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", + &ipv6h->saddr, + ipv6_get_dsfield(ipv6h)); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + netif_rx(skb); + + return 0; + } + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} + +struct ipv6_tel_txoption { + struct ipv6_txoptions ops; + __u8 dst_opt[8]; +}; + +static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +{ + memset(opt, 0, sizeof(struct ipv6_tel_txoption)); + + opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; + opt->dst_opt[3] = 1; + opt->dst_opt[4] = encap_limit; + opt->dst_opt[5] = IPV6_TLV_PADN; + opt->dst_opt[6] = 1; + + opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; + opt->ops.opt_nflen = 8; +} + +static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, + struct net_device *dev, + __u8 dsfield, + struct flowi6 *fl6, + int encap_limit, + __u32 *pmtu) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *tunnel = netdev_priv(dev); + struct net_device *tdev; /* Device to other host */ + struct ipv6hdr *ipv6h; /* Our new IP header */ + unsigned int max_headroom; /* The extra header space needed */ + int gre_hlen; + struct ipv6_tel_txoption opt; + int mtu; + struct dst_entry *dst = NULL, *ndst = NULL; + struct net_device_stats *stats = &tunnel->dev->stats; + int err = -1; + u8 proto; + int pkt_len; + struct sk_buff *new_skb; + + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IP6GRE) { + gre_hlen = 0; + ipv6h = (struct ipv6hdr *)skb->data; + fl6->daddr = ipv6h->daddr; + } else { + gre_hlen = tunnel->hlen; + fl6->daddr = tunnel->parms.raddr; + } + + if (!fl6->flowi6_mark) + dst = ip6_tnl_dst_check(tunnel); + + if (!dst) { + ndst = ip6_route_output(net, NULL, fl6); + + if (ndst->error) + goto tx_err_link_failure; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; + goto tx_err_link_failure; + } + dst = ndst; + } + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + tunnel->parms.name); + goto tx_err_dst_release; + } + + mtu = dst_mtu(dst) - sizeof(*ipv6h); + if (encap_limit >= 0) { + max_headroom += 8; + mtu -= 8; + } + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->len > mtu) { + *pmtu = mtu; + err = -EMSGSIZE; + goto tx_err_dst_release; + } + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + new_skb = skb_realloc_headroom(skb, max_headroom); + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; + if (!new_skb) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + consume_skb(skb); + skb = new_skb; + } + + skb_dst_drop(skb); + + if (fl6->flowi6_mark) { + skb_dst_set(skb, dst); + ndst = NULL; + } else { + skb_dst_set_noref(skb, dst); + } + + skb->transport_header = skb->network_header; + + proto = NEXTHDR_GRE; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + } + + skb_push(skb, gre_hlen); + skb_reset_network_header(skb); + + /* + * Push down and install the IP header. + */ + ipv6h = ipv6_hdr(skb); + *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000); + dsfield = INET_ECN_encapsulate(0, dsfield); + ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ipv6h->hop_limit = tunnel->parms.hop_limit; + ipv6h->nexthdr = proto; + ipv6h->saddr = fl6->saddr; + ipv6h->daddr = fl6->daddr; + + ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; + ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; + + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); + + if (tunnel->parms.o_flags&GRE_SEQ) { + ++tunnel->o_seqno; + *ptr = htonl(tunnel->o_seqno); + ptr--; + } + if (tunnel->parms.o_flags&GRE_KEY) { + *ptr = tunnel->parms.o_key; + ptr--; + } + if (tunnel->parms.o_flags&GRE_CSUM) { + *ptr = 0; + *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1), + skb->len - sizeof(struct ipv6hdr)); + } + } + + nf_reset(skb); + pkt_len = skb->len; + err = ip6_local_out(skb); + + if (net_xmit_eval(err) == 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); + + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } + + if (ndst) + ip6_tnl_dst_store(tunnel, ndst); + + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(ndst); + return err; +} + +static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + const struct iphdr *iph = ip_hdr(skb); + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + dsfield = ipv4_get_dsfield(iph); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + return -1; + } + + return 0; +} + +static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + return -1; + + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + + dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + if (err == -EMSGSIZE) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -1; + } + + return 0; +} + +/** + * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ + +static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t, + const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int encap_limit = -1; + struct flowi6 fl6; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = skb->protocol; + + err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); + + return err; +} + +static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + if (!ip6_tnl_xmit_ctl(t)) + return -1; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ret = ip6gre_xmit_ipv4(skb, dev); + break; + case htons(ETH_P_IPV6): + ret = ip6gre_xmit_ipv6(skb, dev); + break; + default: + ret = ip6gre_xmit_other(skb, dev); + break; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + int addend = sizeof(struct ipv6hdr) + 4; + + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + } + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; + + if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags&IP6_TNL_F_CAP_XMIT && + p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + dev->iflink = p->link; + + /* Precalculate GRE options length */ + if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { + if (t->parms.o_flags&GRE_CSUM) + addend += 4; + if (t->parms.o_flags&GRE_KEY) + addend += 4; + if (t->parms.o_flags&GRE_SEQ) + addend += 4; + } + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); + + struct rt6_info *rt = rt6_lookup(dev_net(dev), + &p->raddr, &p->laddr, + p->link, strict); + + if (rt == NULL) + return; + + if (rt->dst.dev) { + dev->hard_header_len = rt->dst.dev->hard_header_len + addend; + + if (set_mtu) { + dev->mtu = rt->dst.dev->mtu - addend; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + } + dst_release(&rt->dst); + } + + t->hlen = addend; +} + +static int ip6gre_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.flags = p->flags; + t->parms.hop_limit = p->hop_limit; + t->parms.encap_limit = p->encap_limit; + t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; + t->parms.proto = p->proto; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; + ip6_tnl_dst_reset(t); + ip6gre_tnl_link_config(t, set_mtu); + return 0; +} + +static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, + const struct ip6_tnl_parm2 *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->i_key = u->i_key; + p->o_key = u->o_key; + p->i_flags = u->i_flags; + p->o_flags = u->o_flags; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, + const struct __ip6_tnl_parm *p) +{ + u->proto = IPPROTO_GRE; + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; + u->i_flags = p->i_flags; + u->o_flags = p->o_flags; + memcpy(u->name, p->name, sizeof(u->name)); +} + +static int ip6gre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm2 p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == ign->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + } + if (t == NULL) + t = netdev_priv(dev); + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)) + goto done; + + if (!(p.i_flags&GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags&GRE_KEY)) + p.o_key = 0; + + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); + + if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + t = netdev_priv(dev); + + ip6gre_tunnel_unlink(ign, t); + synchronize_net(); + ip6gre_tnl_change(t, &p1, 1); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + } + + if (t) { + err = 0; + + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + if (dev == ign->fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(ign->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} + +static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned int len) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); + __be16 *p = (__be16 *)(ipv6h+1); + + *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000); + ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->nexthdr = NEXTHDR_GRE; + ipv6h->saddr = t->parms.laddr; + ipv6h->daddr = t->parms.raddr; + + p[0] = t->parms.o_flags; + p[1] = htons(type); + + /* + * Set the source hardware address. + */ + + if (saddr) + memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr)); + if (daddr) + memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr)); + if (!ipv6_addr_any(&ipv6h->daddr)) + return t->hlen; + + return -t->hlen; +} + +static const struct header_ops ip6gre_header_ops = { + .create = ip6gre_header, +}; + +static const struct net_device_ops ip6gre_netdev_ops = { + .ndo_init = ip6gre_tunnel_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + +static void ip6gre_tunnel_setup(struct net_device *dev) +{ + struct ip6_tnl *t; + + dev->netdev_ops = &ip6gre_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->type = ARPHRD_IP6GRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4; + dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4; + t = netdev_priv(dev); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + dev->flags |= IFF_NOARP; + dev->iflink = 0; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} + +static int ip6gre_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); + + if (ipv6_addr_any(&tunnel->parms.raddr)) + dev->header_ops = &ip6gre_header_ops; + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void ip6gre_fb_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + tunnel->hlen = sizeof(struct ipv6hdr) + 4; + + dev_hold(dev); +} + + +static struct inet6_protocol ip6gre_protocol __read_mostly = { + .handler = ip6gre_rcv, + .err_handler = ip6gre_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, + struct list_head *head) +{ + int prio; + + for (prio = 0; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip6_tnl *t; + + t = rtnl_dereference(ign->tunnels[prio][h]); + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = rtnl_dereference(t->next); + } + } + } +} + +static int __net_init ip6gre_init_net(struct net *net) +{ + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + ip6gre_tunnel_setup); + if (!ign->fb_tunnel_dev) { + err = -ENOMEM; + goto err_alloc_dev; + } + dev_net_set(ign->fb_tunnel_dev, net); + + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); + ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; + + err = register_netdev(ign->fb_tunnel_dev); + if (err) + goto err_reg_dev; + + rcu_assign_pointer(ign->tunnels_wc[0], + netdev_priv(ign->fb_tunnel_dev)); + return 0; + +err_reg_dev: + ip6gre_dev_free(ign->fb_tunnel_dev); +err_alloc_dev: + return err; +} + +static void __net_exit ip6gre_exit_net(struct net *net) +{ + struct ip6gre_net *ign; + LIST_HEAD(list); + + ign = net_generic(net, ip6gre_net_id); + rtnl_lock(); + ip6gre_destroy_tunnels(ign, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations ip6gre_net_ops = { + .init = ip6gre_init_net, + .exit = ip6gre_exit_net, + .id = &ip6gre_net_id, + .size = sizeof(struct ip6gre_net), +}; + +static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} + +static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + struct in6_addr daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + if (ipv6_addr_any(&daddr)) + return -EINVAL; + } + +out: + return ip6gre_tunnel_validate(tb, data); +} + + +static void ip6gre_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_REMOTE]) + nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_TTL]) + parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_ENCAP_LIMIT]) + parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); + + if (data[IFLA_GRE_FLOWINFO]) + parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + + if (data[IFLA_GRE_FLAGS]) + parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); +} + +static int ip6gre_tap_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + ip6gre_tnl_link_config(tunnel, 1); + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static const struct net_device_ops ip6gre_tap_netdev_ops = { + .ndo_init = ip6gre_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); + + dev->netdev_ops = &ip6gre_tap_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->iflink = 0; + dev->features |= NETIF_F_NETNS_LOCAL; +} + +static int ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ip6_tnl *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &nt->parms); + + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + nt->dev = dev; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + err = register_netdevice(dev); + if (err) + goto out; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + +out: + return err; +} + +static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip6_tnl *t, *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct __ip6_tnl_parm p; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &p); + + t = ip6gre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + + return 0; +} + +static size_t ip6gre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(4) + + /* IFLA_GRE_REMOTE */ + nla_total_size(4) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_TOS */ + nla_total_size(1) + + /* IFLA_GRE_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_GRE_FLOWINFO */ + nla_total_size(4) + + /* IFLA_GRE_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_parm *p = &t->parms; + + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) || + nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) || + nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || + /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ + nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || + nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || + nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, + [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, + [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { + .kind = "ip6gre", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tunnel_setup, + .validate = ip6gre_tunnel_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { + .kind = "ip6gretap", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tap_setup, + .validate = ip6gre_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +/* + * And now the modules code and kernel interface. + */ + +static int __init ip6gre_init(void) +{ + int err; + + pr_info("GRE over IPv6 tunneling driver\n"); + + err = register_pernet_device(&ip6gre_net_ops); + if (err < 0) + return err; + + err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE); + if (err < 0) { + pr_info("%s: can't add protocol\n", __func__); + goto add_proto_failed; + } + + err = rtnl_link_register(&ip6gre_link_ops); + if (err < 0) + goto rtnl_link_failed; + + err = rtnl_link_register(&ip6gre_tap_ops); + if (err < 0) + goto tap_ops_failed; + +out: + return err; + +tap_ops_failed: + rtnl_link_unregister(&ip6gre_link_ops); +rtnl_link_failed: + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); +add_proto_failed: + unregister_pernet_device(&ip6gre_net_ops); + goto out; +} + +static void __exit ip6gre_fini(void) +{ + rtnl_link_unregister(&ip6gre_tap_ops); + rtnl_link_unregister(&ip6gre_link_ops); + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); + unregister_pernet_device(&ip6gre_net_ops); +} + +module_init(ip6gre_init); +module_exit(ip6gre_fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); +MODULE_ALIAS_RTNL_LINK("ip6gre"); +MODULE_ALIAS_NETDEV("ip6gre0"); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5ab923e51af3..a52d864d562b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -47,9 +47,16 @@ -inline int ip6_rcv_finish( struct sk_buff *skb) +int ip6_rcv_finish(struct sk_buff *skb) { - if (skb_dst(skb) == NULL) + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct inet6_protocol *ipprot; + + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + } + if (!skb_dst(skb)) ip6_route_input(skb); return dst_input(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5b2d63ed793e..aece3e792f84 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -123,16 +123,11 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } - rcu_read_lock(); rt = (struct rt6_info *) dst; neigh = rt->n; - if (neigh) { - int res = dst_neigh_output(dst, neigh, skb); + if (neigh) + return dst_neigh_output(dst, neigh, skb); - rcu_read_unlock(); - return res; - } - rcu_read_unlock(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); @@ -493,7 +488,8 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb->len > mtu && !skb_is_gso(skb)) { + if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || + (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -636,7 +632,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu)) { + if (unlikely(!skb->local_df && skb->len > mtu) || + (IP6CB(skb)->frag_max_size && + IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); @@ -980,7 +978,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - rcu_read_lock(); rt = (struct rt6_info *) *dst; n = rt->n; if (n && !(n->nud_state & NUD_VALID)) { @@ -988,7 +985,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct flowi6 fl_gw6; int redirect; - rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1008,8 +1004,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } - } else { - rcu_read_unlock(); } #endif @@ -1285,8 +1279,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; @@ -1510,48 +1502,31 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = sk->sk_sndmsg_page; - int off = sk->sk_sndmsg_off; - unsigned int left; - - if (page && (left = PAGE_SIZE - off) > 0) { - if (copy >= left) - copy = left; - if (page != skb_frag_page(frag)) { - if (i == MAX_SKB_FRAGS) { - err = -EMSGSIZE; - goto error; - } - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); - skb_frag_ref(skb, i); - frag = &skb_shinfo(skb)->frags[i]; - } - } else if(i < MAX_SKB_FRAGS) { - if (copy > PAGE_SIZE) - copy = PAGE_SIZE; - page = alloc_pages(sk->sk_allocation, 0); - if (page == NULL) { - err = -ENOMEM; - goto error; - } - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; + struct page_frag *pfrag = sk_page_frag(sk); - skb_fill_page_desc(skb, i, page, 0, 0); - frag = &skb_shinfo(skb)->frags[i]; - } else { - err = -EMSGSIZE; + err = -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) goto error; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + err = -EMSGSIZE; + if (i == MAX_SKB_FRAGS) + goto error; + + __skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, 0); + skb_shinfo(skb)->nr_frags = ++i; + get_page(pfrag->page); } + copy = min_t(int, copy, pfrag->size - pfrag->offset); if (getfrag(from, - skb_frag_address(frag) + skb_frag_size(frag), - offset, copy, skb->len, skb) < 0) { - err = -EFAULT; - goto error; - } - sk->sk_sndmsg_off += copy; - skb_frag_size_add(frag, copy); + page_address(pfrag->page) + pfrag->offset, + offset, copy, skb->len, skb) < 0) + goto error_efault; + + pfrag->offset += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1560,7 +1535,11 @@ alloc_new_skb: offset += copy; length -= copy; } + return 0; + +error_efault: + err = -EFAULT; error: cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9a1d5fe6aef8..cb7e2ded6f08 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) { struct dst_entry *dst = t->dst_cache; @@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) return dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); -static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) +void ip6_tnl_dst_reset(struct ip6_tnl *t) { dst_release(t->dst_cache); t->dst_cache = NULL; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); -static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; dst_release(t->dst_cache); t->dst_cache = dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses @@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ **/ static struct ip6_tnl __rcu ** -ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) +ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev) * created tunnel or NULL **/ -static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) +static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; @@ -322,7 +325,7 @@ failed: **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, - struct ip6_tnl_parm *p, int create) + struct __ip6_tnl_parm *p, int create) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) * else index to encapsulation limit **/ -static __u16 -parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) +__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; __u8 nexthdr = ipv6h->nexthdr; @@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) } return 0; } +EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); /** * ip6_tnl_err - tunnel error handler @@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, case ICMPV6_PARAMPROB: teli = 0; if ((*code) == ICMPV6_HDR_FIELD) - teli = parse_tlv_tnl_enc_lim(skb, skb->data); + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; @@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } -static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, +__u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ltype = ipv6_addr_type(laddr); int rtype = ipv6_addr_type(raddr); __u32 flags = 0; @@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, } return flags; } +EXPORT_SYMBOL(ip6_tnl_get_cap); /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, +int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); /** * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally @@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); + /** * ip6_tnl_xmit2 - encapsulate packet and send * @skb: the outgoing socket buffer @@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; @@ -1152,7 +1159,7 @@ tx_err: static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) **/ static int -ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) +ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; @@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) return 0; } +static void +ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->proto = u->proto; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) +{ + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->proto = p->proto; + memcpy(u->name, p->name, sizeof(u->name)); +} + /** * ip6_tnl_ioctl - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel @@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip6_tnl_parm p; + struct __ip6_tnl_parm p1; struct ip6_tnl *t = NULL; struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -1274,11 +1310,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; break; } - t = ip6_tnl_locate(net, &p, 0); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + } else { + memset(&p, 0, sizeof(p)); } if (t == NULL) t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof (p)); + ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; } @@ -1295,7 +1334,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && p.proto != 0) break; - t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { @@ -1307,13 +1347,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ip6_tnl_unlink(ip6n, t); synchronize_net(); - err = ip6_tnl_change(t, &p); + err = ip6_tnl_change(t, &p1); ip6_tnl_link(ip6n, t); netdev_state_change(dev); } if (t) { err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) + ip6_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; } else @@ -1329,7 +1370,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) break; err = -ENOENT; - if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + if (t == NULL) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4532973f0dd4..08ea3f0b6e55 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); } @@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else ip6_mr_forward(net, mrt, skb, c); } @@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc6_cache *c) + u32 portid, u32 seq, struct mfc6_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 5b087c31d87b..0f9bdc5ee9f3 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -86,28 +86,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index db31561cc8df..429089cb073d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -15,6 +15,7 @@ int ip6_route_me_harder(struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); const struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int hh_len; struct dst_entry *dst; struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, @@ -47,6 +48,13 @@ int ip6_route_me_harder(struct sk_buff *skb) } #endif + /* Change in oif may mean change in hh_len. */ + hh_len = skb_dst(skb)->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), + 0, GFP_ATOMIC)) + return -1; + return 0; } EXPORT_SYMBOL(ip6_route_me_harder); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 10135342799e..c72532a60d88 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -181,9 +181,44 @@ config IP6_NF_SECURITY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. - + If unsure, say N. +config NF_NAT_IPV6 + tristate "IPv6 NAT" + depends on NF_CONNTRACK_IPV6 + depends on NETFILTER_ADVANCED + select NF_NAT + help + The IPv6 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in ip6tables, see the man page for ip6tables(8). + + To compile it as a module, choose M here. If unsure, say N. + +if NF_NAT_IPV6 + +config IP6_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_NPT + tristate "NPT (Network Prefix translation) target support" + help + This option adds the `SNPT' and `DNPT' target, which perform + stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. + + To compile it as a module, choose M here. If unsure, say N. + +endif # NF_NAT_IPV6 + endif # IP6_NF_IPTABLES endmenu diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 534d3f216f7b..2d11fcc2cf3c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o +obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o # objects for l3 independent conntrack nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o @@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o +nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o +obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o + # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o @@ -30,4 +34,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets +obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o +obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c new file mode 100644 index 000000000000..60e9053bab05 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/ipv6.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/addrconf.h> +#include <net/ipv6.h> + +static unsigned int +masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct in6_addr src; + struct nf_conn *ct; + struct nf_nat_range newrange; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + if (ipv6_dev_get_saddr(dev_net(par->out), par->out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) + return NF_DROP; + + nfct_nat(ct)->masq_index = par->out->ifindex; + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = src; + newrange.max_addr.in6 = src; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} + +static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +static int device_cmp(struct nf_conn *ct, void *ifindex) +{ + const struct nf_conn_nat *nat = nfct_nat(ct); + + if (!nat) + return 0; + if (nf_ct_l3num(ct) != NFPROTO_IPV6) + return 0; + return nat->masq_index == (int)(long)ifindex; +} + +static int masq_device_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + const struct net_device *dev = ptr; + struct net *net = dev_net(dev); + + if (event == NETDEV_DOWN) + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex); + + return NOTIFY_DONE; +} + +static struct notifier_block masq_dev_notifier = { + .notifier_call = masq_device_event, +}; + +static int masq_inet_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + + return masq_device_event(this, event, ifa->idev->dev); +} + +static struct notifier_block masq_inet_notifier = { + .notifier_call = masq_inet_event, +}; + +static struct xt_target masquerade_tg6_reg __read_mostly = { + .name = "MASQUERADE", + .family = NFPROTO_IPV6, + .checkentry = masquerade_tg6_checkentry, + .target = masquerade_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .me = THIS_MODULE, +}; + +static int __init masquerade_tg6_init(void) +{ + int err; + + err = xt_register_target(&masquerade_tg6_reg); + if (err == 0) { + register_netdevice_notifier(&masq_dev_notifier); + register_inet6addr_notifier(&masq_inet_notifier); + } + + return err; +} +static void __exit masquerade_tg6_exit(void) +{ + unregister_inet6addr_notifier(&masq_inet_notifier); + unregister_netdevice_notifier(&masq_dev_notifier); + xt_unregister_target(&masquerade_tg6_reg); +} + +module_init(masquerade_tg6_init); +module_exit(masquerade_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Xtables: automatic address SNAT"); diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c new file mode 100644 index 000000000000..e9486915eff6 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ipv6.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv6/ip6t_NPT.h> +#include <linux/netfilter/x_tables.h> + +static __sum16 csum16_complement(__sum16 a) +{ + return (__force __sum16)(0xffff - (__force u16)a); +} + +static __sum16 csum16_add(__sum16 a, __sum16 b) +{ + u16 sum; + + sum = (__force u16)a + (__force u16)b; + sum += (__force u16)a < (__force u16)b; + return (__force __sum16)sum; +} + +static __sum16 csum16_sub(__sum16 a, __sum16 b) +{ + return csum16_add(a, csum16_complement(b)); +} + +static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) +{ + struct ip6t_npt_tginfo *npt = par->targinfo; + __sum16 src_sum = 0, dst_sum = 0; + unsigned int i; + + if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { + src_sum = csum16_add(src_sum, + (__force __sum16)npt->src_pfx.in6.s6_addr16[i]); + dst_sum = csum16_add(dst_sum, + (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]); + } + + npt->adjustment = csum16_sub(src_sum, dst_sum); + return 0; +} + +static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, + struct in6_addr *addr) +{ + unsigned int pfx_len; + unsigned int i, idx; + __be32 mask; + __sum16 sum; + + pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); + for (i = 0; i < pfx_len; i += 32) { + if (pfx_len - i >= 32) + mask = 0; + else + mask = htonl(~((1 << (pfx_len - i)) - 1)); + + idx = i / 32; + addr->s6_addr32[idx] &= mask; + addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx]; + } + + if (pfx_len <= 48) + idx = 3; + else { + for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { + if ((__force __sum16)addr->s6_addr16[idx] != + CSUM_MANGLED_0) + break; + } + if (idx == ARRAY_SIZE(addr->s6_addr16)) + return false; + } + + sum = csum16_add((__force __sum16)addr->s6_addr16[idx], + npt->adjustment); + if (sum == CSUM_MANGLED_0) + sum = 0; + *(__force __sum16 *)&addr->s6_addr16[idx] = sum; + + return true; +} + +static unsigned int +ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ip6t_npt_tginfo *npt = par->targinfo; + + if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { + icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, + offsetof(struct ipv6hdr, saddr)); + return NF_DROP; + } + return XT_CONTINUE; +} + +static unsigned int +ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ip6t_npt_tginfo *npt = par->targinfo; + + if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { + icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, + offsetof(struct ipv6hdr, daddr)); + return NF_DROP; + } + return XT_CONTINUE; +} + +static struct xt_target ip6t_npt_target_reg[] __read_mostly = { + { + .name = "SNPT", + .target = ip6t_snpt_tg, + .targetsize = sizeof(struct ip6t_npt_tginfo), + .checkentry = ip6t_npt_checkentry, + .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_POST_ROUTING), + .me = THIS_MODULE, + }, + { + .name = "DNPT", + .target = ip6t_dnpt_tg, + .targetsize = sizeof(struct ip6t_npt_tginfo), + .checkentry = ip6t_npt_checkentry, + .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, +}; + +static int __init ip6t_npt_init(void) +{ + return xt_register_targets(ip6t_npt_target_reg, + ARRAY_SIZE(ip6t_npt_target_reg)); +} + +static void __exit ip6t_npt_exit(void) +{ + xt_unregister_targets(ip6t_npt_target_reg, + ARRAY_SIZE(ip6t_npt_target_reg)); +} + +module_init(ip6t_npt_init); +module_exit(ip6t_npt_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS("ip6t_SNPT"); +MODULE_ALIAS("ip6t_DNPT"); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 325e59a0224f..beb5777d2043 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) net->ipv6.ip6table_filter = ip6t_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_filter)) - return PTR_ERR(net->ipv6.ip6table_filter); - return 0; + return PTR_RET(net->ipv6.ip6table_filter); } static void __net_exit ip6table_filter_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 4d782405f125..7431121b87de 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net) net->ipv6.ip6table_mangle = ip6t_register_table(net, &packet_mangler, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_mangle)) - return PTR_ERR(net->ipv6.ip6table_mangle); - return 0; + return PTR_RET(net->ipv6.ip6table_mangle); } static void __net_exit ip6table_mangle_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c new file mode 100644 index 000000000000..e418bd6350a4 --- /dev/null +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT + * funded by Astaro. + */ + +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> + +static const struct xt_table nf_nat_ipv6_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV6, +}; + +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} + +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; + + ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); + } + return ret; +} + +static unsigned int +nf_nat_ipv6_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (!nat) { + /* NAT module was loaded late. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + hooknum, hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + } else + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + } + + return nf_nat_packet(ct, ctinfo, hooknum, skb); +} + +static unsigned int +nf_nat_ipv6_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv6_out(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv6_local_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_NAT_SRC, + }, +}; + +static int __net_init ip6table_nat_net_init(struct net *net) +{ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); + if (repl == NULL) + return -ENOMEM; + net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); + kfree(repl); + if (IS_ERR(net->ipv6.ip6table_nat)) + return PTR_ERR(net->ipv6.ip6table_nat); + return 0; +} + +static void __net_exit ip6table_nat_net_exit(struct net *net) +{ + ip6t_unregister_table(net, net->ipv6.ip6table_nat); +} + +static struct pernet_operations ip6table_nat_net_ops = { + .init = ip6table_nat_net_init, + .exit = ip6table_nat_net_exit, +}; + +static int __init ip6table_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&ip6table_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&ip6table_nat_net_ops); +err1: + return err; +} + +static void __exit ip6table_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + unregister_pernet_subsys(&ip6table_nat_net_ops); +} + +module_init(ip6table_nat_init); +module_exit(ip6table_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5b9926a011bd..60d1bddff7a0 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net) net->ipv6.ip6table_raw = ip6t_register_table(net, &packet_raw, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_raw)) - return PTR_ERR(net->ipv6.ip6table_raw); - return 0; + return PTR_RET(net->ipv6.ip6table_raw); } static void __net_exit ip6table_raw_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 91aa2b4d83c9..db155351339c 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net) net->ipv6.ip6table_security = ip6t_register_table(net, &security_table, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_security)) - return PTR_ERR(net->ipv6.ip6table_security); - - return 0; + return PTR_RET(net->ipv6.ip6table_security); } static void __net_exit ip6table_security_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4794f96cf2e0..8860d23e61cf 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -28,6 +28,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> +#include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_log.h> @@ -64,82 +65,31 @@ static int ipv6_print_tuple(struct seq_file *s, tuple->src.u3.ip6, tuple->dst.u3.ip6); } -/* - * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c - * - * This function parses (probably truncated) exthdr set "hdr" - * of length "len". "nexthdrp" initially points to some place, - * where type of the first header can be found. - * - * It skips all well-known exthdrs, and returns pointer to the start - * of unparsable area i.e. the first header with unknown type. - * if success, *nexthdr is updated by type/protocol of this header. - * - * NOTES: - it may return pointer pointing beyond end of packet, - * if the last recognized header is truncated in the middle. - * - if packet is truncated, so that all parsed headers are skipped, - * it returns -1. - * - if packet is fragmented, return pointer of the fragment header. - * - ESP is unparsable for now and considered like - * normal payload protocol. - * - Note also special handling of AUTH header. Thanks to IPsec wizards. - */ - -static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, - u8 *nexthdrp, int len) -{ - u8 nexthdr = *nexthdrp; - - while (ipv6_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr hdr; - int hdrlen; - - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return -1; - if (nexthdr == NEXTHDR_NONE) - break; - if (nexthdr == NEXTHDR_FRAGMENT) - break; - if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) - BUG(); - if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hdr.hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(&hdr); - - nexthdr = hdr.nexthdr; - len -= hdrlen; - start += hdrlen; - } - - *nexthdrp = nexthdr; - return start; -} - static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { unsigned int extoff = nhoff + sizeof(struct ipv6hdr); - unsigned char pnum; + __be16 frag_off; int protoff; + u8 nexthdr; if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), - &pnum, sizeof(pnum)) != 0) { + &nexthdr, sizeof(nexthdr)) != 0) { pr_debug("ip6_conntrack_core: can't get nexthdr\n"); return -NF_ACCEPT; } - protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); + protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* * (protoff == skb->len) mean that the packet doesn't have no data * except of IPv6 & ext headers. but it's tracked anyway. - YK */ - if ((protoff < 0) || (protoff > skb->len)) { + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); return -NF_ACCEPT; } *dataoff = protoff; - *protonum = pnum; + *protonum = nexthdr; return NF_ACCEPT; } @@ -153,10 +103,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; - unsigned int ret, protoff; - unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; - unsigned char pnum = ipv6_hdr(skb)->nexthdr; - + unsigned int ret; + __be16 frag_off; + int protoff; + u8 nexthdr; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); @@ -171,9 +121,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, if (!helper) return NF_ACCEPT; - protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, - skb->len - extoff); - if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { + nexthdr = ipv6_hdr(skb)->nexthdr; + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); return NF_ACCEPT; } @@ -192,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; + int protoff; + __be16 frag_off; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; + + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { + pr_debug("proto header not found\n"); + goto out; + } + + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + typeof(nf_nat_seq_adjust_hook) seq_adjust; + + seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); + if (!seq_adjust || + !seq_adjust(skb, ct, ctinfo, protoff)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } + } +out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -199,9 +180,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum, static unsigned int __ipv6_conntrack_in(struct net *net, unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct sk_buff *reasm = skb->nfct_reasm; + const struct nf_conn_help *help; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; /* This packet is fragmented and has reassembled packet. */ if (reasm) { @@ -213,6 +199,25 @@ static unsigned int __ipv6_conntrack_in(struct net *net, if (ret != NF_ACCEPT) return ret; } + + /* Conntrack helpers need the entire reassembled packet in the + * POST_ROUTING hook. In case of unconfirmed connections NAT + * might reassign a helper, so the entire packet is also + * required. + */ + ct = nf_ct_get(reasm, &ctinfo); + if (ct != NULL && !nf_ct_is_untracked(ct)) { + help = nfct_help(ct); + if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { + nf_conntrack_get_reasm(skb); + NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, + (struct net_device *)in, + (struct net_device *)out, + okfn, NF_IP6_PRI_CONNTRACK + 1); + return NF_DROP_ERR(-ECANCELED); + } + } + nf_conntrack_get(reasm->nfct); skb->nfct = reasm->nfct; skb->nfctinfo = reasm->nfctinfo; @@ -228,7 +233,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); + return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -242,7 +247,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); + return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c9c78c2e666b..18bd9bbbd1c6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -57,41 +57,27 @@ struct nf_ct_frag6_skb_cb #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) -struct nf_ct_frag6_queue -{ - struct inet_frag_queue q; - - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - - unsigned int csum; - __u16 nhoffset; -}; - static struct inet_frags nf_frags; -static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", - .data = &nf_init_frags.timeout, + .data = &init_net.nf_frag.frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_init_frags.low_thresh, + .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_init_frags.high_thresh, + .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -99,68 +85,86 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { { } }; -static struct ctl_table_header *nf_ct_frag6_sysctl_header; -#endif - -static unsigned int nf_hashfn(struct inet_frag_queue *q) +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) { - const struct nf_ct_frag6_queue *nq; + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = nf_ct_frag6_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table), + GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; + table[2].data = &net->ipv6.frags.timeout; + } - nq = container_of(q, struct nf_ct_frag6_queue, q); - return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); + hdr = register_net_sysctl(net, "net/netfilter", table); + if (hdr == NULL) + goto err_reg; + + net->nf_frag.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; } -static void nf_skb_free(struct sk_buff *skb) +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); -} + struct ctl_table *table; -/* Destruction primitives. */ + table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) +#else +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) { - inet_frag_put(&fq->q, &nf_frags); + return 0; } +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ +} +#endif -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) +static unsigned int nf_hashfn(struct inet_frag_queue *q) { - inet_frag_kill(&fq->q, &nf_frags); + const struct frag_queue *nq; + + nq = container_of(q, struct frag_queue, q); + return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); } -static void nf_ct_frag6_evictor(void) +static void nf_skb_free(struct sk_buff *skb) { - local_bh_disable(); - inet_frag_evictor(&nf_init_frags, &nf_frags); - local_bh_enable(); + if (NFCT_FRAG6_CB(skb)->orig) + kfree_skb(NFCT_FRAG6_CB(skb)->orig); } static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq; - - fq = container_of((struct inet_frag_queue *)data, - struct nf_ct_frag6_queue, q); - - spin_lock(&fq->q.lock); + struct frag_queue *fq; + struct net *net; - if (fq->q.last_in & INET_FRAG_COMPLETE) - goto out; + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, nf_frag.frags); - fq_kill(fq); - -out: - spin_unlock(&fq->q.lock); - fq_put(fq); + ip6_expire_frag_queue(net, fq, &nf_frags); } /* Creation primitives. */ - -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) +static inline struct frag_queue *fq_find(struct net *net, __be32 id, + u32 user, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -174,22 +178,23 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) read_lock_bh(&nf_frags.lock); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); - q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) goto oom; - return container_of(q, struct nf_ct_frag6_queue, q); + return container_of(q, struct frag_queue, q); oom: return NULL; } -static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, +static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; + unsigned int payload_len; int offset, end; if (fq->q.last_in & INET_FRAG_COMPLETE) { @@ -197,8 +202,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, goto err; } + payload_len = ntohs(ipv6_hdr(skb)->payload_len); + offset = ntohs(fhdr->frag_off) & ~0x7; - end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - + end = offset + (payload_len - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { @@ -307,7 +314,9 @@ found: skb->dev = NULL; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &nf_init_frags.mem); + if (payload_len > fq->q.max_size) + fq->q.max_size = payload_len; + atomic_add(skb->truesize, &fq->q.net->mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -317,12 +326,12 @@ found: fq->q.last_in |= INET_FRAG_FIRST_IN; } write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); + list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); write_unlock(&nf_frags.lock); return 0; discard_fq: - fq_kill(fq); + inet_frag_kill(&fq->q, &nf_frags); err: return -1; } @@ -337,12 +346,12 @@ err: * the last and the first frames arrived and all the bits are here. */ static struct sk_buff * -nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) +nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) { struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; - fq_kill(fq); + inet_frag_kill(&fq->q, &nf_frags); WARN_ON(head == NULL); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); @@ -386,7 +395,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_init_frags.mem); + atomic_add(clone->truesize, &fq->q.net->mem); } /* We have to remove fragment header from datagram and to relocate @@ -410,12 +419,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &nf_init_frags.mem); + atomic_sub(head->truesize, &fq->q.net->mem); + head->local_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -520,8 +531,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; + struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) + : dev_net(skb->dev); struct frag_hdr *fhdr; - struct nf_ct_frag6_queue *fq; + struct frag_queue *fq; struct ipv6hdr *hdr; int fhoff, nhoff; u8 prevhdr; @@ -553,10 +566,11 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) - nf_ct_frag6_evictor(); + local_bh_disable(); + inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); + local_bh_enable(); - fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; @@ -567,7 +581,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); - fq_put(fq); + inet_frag_put(&fq->q, &nf_frags); goto ret_orig; } @@ -579,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) } spin_unlock_bh(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, &nf_frags); return ret_skb; ret_orig: @@ -592,6 +606,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { struct sk_buff *s, *s2; + unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { nf_conntrack_put_reasm(s->nfct_reasm); @@ -601,49 +616,62 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, s2 = s->next; s->next = NULL; - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + if (ret != -ECANCELED) + ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, + in, out, okfn, + NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + else + kfree_skb(s); + s = s2; } nf_conntrack_put_reasm(skb); } +static int nf_ct_net_init(struct net *net) +{ + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); +} + +static void nf_ct_net_exit(struct net *net) +{ + nf_ct_frags6_sysctl_unregister(net); + inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); +} + +static struct pernet_operations nf_ct_net_ops = { + .init = nf_ct_net_init, + .exit = nf_ct_net_exit, +}; + int nf_ct_frag6_init(void) { + int ret = 0; + nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; nf_frags.skb_free = nf_skb_free; - nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; - nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; - nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; - inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); -#ifdef CONFIG_SYSCTL - nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", - nf_ct_frag6_sysctl_table); - if (!nf_ct_frag6_sysctl_header) { + ret = register_pernet_subsys(&nf_ct_net_ops); + if (ret) inet_frags_fini(&nf_frags); - return -ENOMEM; - } -#endif - return 0; + return ret; } void nf_ct_frag6_cleanup(void) { -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(nf_ct_frag6_sysctl_header); - nf_ct_frag6_sysctl_header = NULL; -#endif + unregister_pernet_subsys(&nf_ct_net_ops); inet_frags_fini(&nf_frags); - - nf_init_frags.low_thresh = 0; - nf_ct_frag6_evictor(); } diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c new file mode 100644 index 000000000000..abfe75a2e316 --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of IPv6 NAT funded by Astaro. + */ +#include <linux/types.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ipv6.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <net/secure_seq.h> +#include <net/checksum.h> +#include <net/ip6_checksum.h> +#include <net/ip6_route.h> +#include <net/ipv6.h> + +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv6_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi6 *fl6 = &fl->u.ip6; + + if (ct->status & statusbit) { + fl6->daddr = t->dst.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl6->saddr = t->src.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_sport = t->src.u.all; + } +} +#endif + +static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && + ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; +} + +static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport); +} + +static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct ipv6hdr *ipv6h; + __be16 frag_off; + int hdroff; + u8 nexthdr; + + if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) + return false; + + ipv6h = (void *)skb->data + iphdroff; + nexthdr = ipv6h->nexthdr; + hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), + &nexthdr, &frag_off); + if (hdroff < 0) + goto manip_addr; + + if ((frag_off & htons(~0x7)) == 0 && + !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, + target, maniptype)) + return false; +manip_addr: + if (maniptype == NF_NAT_MANIP_SRC) + ipv6h->saddr = target->src.u3.in6; + else + ipv6h->daddr = target->dst.u3.in6; + + return true; +} + +static void nf_nat_ipv6_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); + const struct in6_addr *oldip, *newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = &ipv6h->saddr; + newip = &t->src.u3.in6; + } else { + oldip = &ipv6h->daddr; + newip = &t->dst.u3.in6; + } + inet_proto_csum_replace16(check, skb, oldip->s6_addr32, + newip->s6_addr32, 1); +} + +static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt6i_flags & RTF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + (data - (void *)skb->data); + skb->csum_offset = (void *)check - data; + *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V6_MINIP]) { + nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], + sizeof(struct in6_addr)); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V6_MAXIP]) + nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], + sizeof(struct in6_addr)); + else + range->max_addr = range->min_addr; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { + .l3proto = NFPROTO_IPV6, + .secure_port = nf_nat_ipv6_secure_port, + .in_range = nf_nat_ipv6_in_range, + .manip_pkt = nf_nat_ipv6_manip_pkt, + .csum_update = nf_nat_ipv6_csum_update, + .csum_recalc = nf_nat_ipv6_csum_recalc, + .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv6_decode_session, +#endif +}; + +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + unsigned int hdrlen) +{ + struct { + struct icmp6hdr icmp6; + struct ipv6hdr ip6; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr); + if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + inside = (void *)skb->data + hdrlen; + inside->icmp6.icmp6_cksum = 0; + inside->icmp6.icmp6_cksum = + csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + skb->len - hdrlen, IPPROTO_ICMPV6, + csum_partial(&inside->icmp6, + skb->len - hdrlen, 0)); + } + + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6); + if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); + +static int __init nf_nat_l3proto_ipv6_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv6_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET6)); + +module_init(nf_nat_l3proto_ipv6_init); +module_exit(nf_nat_l3proto_ipv6_exit); diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c new file mode 100644 index 000000000000..5d6da784305b --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6 + * NAT funded by Astaro. + */ + +#include <linux/types.h> +#include <linux/init.h> +#include <linux/icmpv6.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> + +static bool +icmpv6_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); +} + +static void +icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u16 id; + unsigned int range_size; + unsigned int i; + + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xffff; + + for (i = 0; ; ++id) { + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + + (id % range_size)); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } +} + +static bool +icmpv6_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmp6hdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmp6hdr *)(skb->data + hdroff); + l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, + tuple, maniptype); + if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST || + hdr->icmp6_code == ICMPV6_ECHO_REPLY) { + inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, + hdr->icmp6_identifier, + tuple->src.u.icmp.id, 0); + hdr->icmp6_identifier = tuple->src.u.icmp.id; + } + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { + .l4proto = IPPROTO_ICMPV6, + .manip_pkt = icmpv6_manip_pkt, + .in_range = icmpv6_in_range, + .unique_tuple = icmpv6_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index da2e92d05c15..745a32042950 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net) goto proc_dev_snmp6_fail; return 0; +proc_dev_snmp6_fail: + proc_net_remove(net, "snmp6"); proc_snmp6_fail: proc_net_remove(net, "sockstat6"); -proc_dev_snmp6_fail: - proc_net_remove(net, "dev_snmp6"); return -ENOMEM; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d5bca6..d8e95c77db99 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -107,21 +107,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); - - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -1251,7 +1250,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - sock_i_uid(sp), 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4ff9af628e72..da8a4e301b1b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -65,36 +65,8 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) -/* - * Equivalent of ipv4 struct ipq - */ - -struct frag_queue -{ - struct inet_frag_queue q; - - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - - int iif; - unsigned int csum; - __u16 nhoffset; -}; - static struct inet_frags ip6_frags; -int ip6_frag_nqueues(struct net *net) -{ - return net->ipv6.frags.nqueues; -} - -int ip6_frag_mem(struct net *net) -{ - return atomic_read(&net->ipv6.frags.mem); -} - static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev); @@ -159,46 +131,18 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_init); -/* Destruction primitives. */ - -static __inline__ void fq_put(struct frag_queue *fq) -{ - inet_frag_put(&fq->q, &ip6_frags); -} - -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct frag_queue *fq) -{ - inet_frag_kill(&fq->q, &ip6_frags); -} - -static void ip6_evictor(struct net *net, struct inet6_dev *idev) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, + struct inet_frags *frags) { - int evicted; - - evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); - if (evicted) - IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); -} - -static void ip6_frag_expire(unsigned long data) -{ - struct frag_queue *fq; struct net_device *dev = NULL; - struct net *net; - - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); spin_lock(&fq->q.lock); if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; - fq_kill(fq); + inet_frag_kill(&fq->q, frags); - net = container_of(fq->q.net, struct net, ipv6.frags); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) @@ -222,7 +166,19 @@ out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, frags); +} +EXPORT_SYMBOL(ip6_expire_frag_queue); + +static void ip6_frag_expire(unsigned long data) +{ + struct frag_queue *fq; + struct net *net; + + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, ipv6.frags); + + ip6_expire_frag_queue(net, fq, &ip6_frags); } static __inline__ struct frag_queue * @@ -391,7 +347,7 @@ found: return -1; discard_fq: - fq_kill(fq); + inet_frag_kill(&fq->q, &ip6_frags); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); @@ -417,7 +373,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, unsigned int nhoff; int sum_truesize; - fq_kill(fq); + inet_frag_kill(&fq->q, &ip6_frags); /* Make the one we just received the head. */ if (prev) { @@ -550,6 +506,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); + int evicted; IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); @@ -574,8 +531,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) - ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); + evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); + if (evicted) + IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS, evicted); fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); if (fq != NULL) { @@ -586,7 +545,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, &ip6_frags); return ret; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cf02cb97bbdd..7c7e963260e1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -222,11 +222,11 @@ static const u32 ip6_template_metrics[RTAX_MAX] = { [RTAX_HOPLIMIT - 1] = 255, }; -static struct rt6_info ip6_null_entry_template = { +static const struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -ENETUNREACH, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, @@ -242,11 +242,11 @@ static struct rt6_info ip6_null_entry_template = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -static struct rt6_info ip6_prohibit_entry_template = { +static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EACCES, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, @@ -257,11 +257,11 @@ static struct rt6_info ip6_prohibit_entry_template = { .rt6i_ref = ATOMIC_INIT(1), }; -static struct rt6_info ip6_blk_hole_entry_template = { +static const struct rt6_info ip6_blk_hole_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, .output = dst_discard, @@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, DST_OBSOLETE_NONE, flags); + 0, DST_OBSOLETE_FORCE_CHK, flags); if (rt) { struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); + rt->rt6i_genid = rt_genid(net); } return rt; } @@ -369,15 +370,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static bool rt6_check_expired(const struct rt6_info *rt) { - struct rt6_info *ort = NULL; - if (rt->rt6i_flags & RTF_EXPIRES) { if (time_after(jiffies, rt->dst.expires)) return true; } else if (rt->dst.from) { - ort = (struct rt6_info *) rt->dst.from; - return (ort->rt6i_flags & RTF_EXPIRES) && - time_after(jiffies, ort->dst.expires); + return rt6_check_expired((struct rt6_info *) rt->dst.from); } return false; } @@ -451,10 +448,9 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - rcu_read_lock(); neigh = rt ? rt->n : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - goto out; + return; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -470,8 +466,6 @@ static void rt6_probe(struct rt6_info *rt) } else { read_unlock_bh(&neigh->lock); } -out: - rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -498,7 +492,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt) struct neighbour *neigh; int m; - rcu_read_lock(); neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) @@ -516,7 +509,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; - rcu_read_unlock(); return m; } @@ -965,7 +957,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, { int flags = 0; - fl6->flowi6_iif = net->loopback_dev->ifindex; + fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; @@ -1031,6 +1023,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; + /* All IPV6 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + */ + if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) + return NULL; + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { if (!rt6_has_peer(rt)) @@ -1397,8 +1396,6 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->dst.obsolete = -1; - if (cfg->fc_flags & RTF_EXPIRES) rt6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); @@ -1463,8 +1460,21 @@ int ip6_route_add(struct fib6_config *cfg) } rt->dst.output = ip6_pkt_discard_out; rt->dst.input = ip6_pkt_discard; - rt->dst.error = -ENETUNREACH; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; + switch (cfg->fc_type) { + case RTN_BLACKHOLE: + rt->dst.error = -EINVAL; + break; + case RTN_PROHIBIT: + rt->dst.error = -EACCES; + break; + case RTN_THROW: + rt->dst.error = -EAGAIN; + break; + default: + rt->dst.error = -ENETUNREACH; + break; + } goto install_route; } @@ -1583,17 +1593,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->dst.dev); - if (rt == net->ipv6.ip6_null_entry) - return -ENOENT; + if (rt == net->ipv6.ip6_null_entry) { + err = -ENOENT; + goto out; + } table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_del(rt, info); - dst_release(&rt->dst); - write_unlock_bh(&table->tb6_lock); +out: + dst_release(&rt->dst); return err; } @@ -1829,7 +1840,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!table) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); if (!fn) goto out; @@ -1845,7 +1856,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, break; } out: - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } @@ -1861,7 +1872,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, }; @@ -1888,7 +1899,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev if (!table) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && @@ -1897,7 +1908,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev } if (rt) dst_hold(&rt->dst); - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } @@ -1911,7 +1922,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = dev_net(dev), }; @@ -2080,7 +2091,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; - rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) @@ -2261,14 +2271,18 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_src_len = rtm->rtm_src_len; cfg->fc_flags = RTF_UP; cfg->fc_protocol = rtm->rtm_protocol; + cfg->fc_type = rtm->rtm_type; - if (rtm->rtm_type == RTN_UNREACHABLE) + if (rtm->rtm_type == RTN_UNREACHABLE || + rtm->rtm_type == RTN_BLACKHOLE || + rtm->rtm_type == RTN_PROHIBIT || + rtm->rtm_type == RTN_THROW) cfg->fc_flags |= RTF_REJECT; if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2359,7 +2373,7 @@ static inline size_t rt6_nlmsg_size(void) static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, - int iif, int type, u32 pid, u32 seq, + int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; @@ -2375,7 +2389,7 @@ static int rt6_fill_node(struct net *net, } } - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -2391,8 +2405,22 @@ static int rt6_fill_node(struct net *net, rtm->rtm_table = table; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; - if (rt->rt6i_flags & RTF_REJECT) - rtm->rtm_type = RTN_UNREACHABLE; + if (rt->rt6i_flags & RTF_REJECT) { + switch (rt->dst.error) { + case -EINVAL: + rtm->rtm_type = RTN_BLACKHOLE; + break; + case -EACCES: + rtm->rtm_type = RTN_PROHIBIT; + break; + case -EAGAIN: + rtm->rtm_type = RTN_THROW; + break; + default: + rtm->rtm_type = RTN_UNREACHABLE; + break; + } + } else if (rt->rt6i_flags & RTF_LOCAL) rtm->rtm_type = RTN_LOCAL; else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) @@ -2465,27 +2493,19 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - rcu_read_lock(); n = rt->n; if (n) { - if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { - rcu_read_unlock(); + if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) goto nla_put_failure; - } } - rcu_read_unlock(); if (rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) goto nla_put_failure; if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) goto nla_put_failure; - if (!(rt->rt6i_flags & RTF_EXPIRES)) - expires = 0; - else if (rt->dst.expires - jiffies < INT_MAX) - expires = rt->dst.expires - jiffies; - else - expires = INT_MAX; + + expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; @@ -2510,7 +2530,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) return rt6_fill_node(arg->net, arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, + NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, prefix, 0, NLM_F_MULTI); } @@ -2590,14 +2610,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).pid, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; } @@ -2617,14 +2637,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0, 0); + event, info->portid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); return; errout: @@ -2679,14 +2699,12 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - rcu_read_lock(); n = rt->n; if (n) { seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } - rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3bd1bfc01f85..3ed54ffd8d50 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -545,7 +545,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = -ENOENT; - rcu_read_lock(); t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->daddr, @@ -579,7 +578,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); return err; } @@ -599,7 +597,6 @@ static int ipip6_rcv(struct sk_buff *skb) iph = ip_hdr(skb); - rcu_read_lock(); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -615,7 +612,6 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; - rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -630,12 +626,10 @@ static int ipip6_rcv(struct sk_buff *skb) netif_rx(skb); - rcu_read_unlock(); return 0; } /* no tunnel matched, let upstream know, ipsec may handle it */ - rcu_read_unlock(); return 1; out: kfree_skb(skb); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb46061c813a..182ab9a85d6c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq6 = inet6_rsk(req); treq = tcp_rsk(req); + treq->listener = NULL; if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f49476e2d884..49c890386ce9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } #endif +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { @@ -391,8 +403,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, + &tp->tsq_flags)) + sock_hold(sk); goto out; } @@ -463,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp); + skb = tcp_make_synack(sk, dst, req, rvp, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -750,6 +763,8 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct ipv6hdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -758,11 +773,23 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - - /* fall through */ - case CHECKSUM_NONE: +flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; + + case CHECKSUM_NONE: + wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), + IPPROTO_TCP, 0)); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; } return tcp_gro_receive(head, skb); @@ -975,7 +1002,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, inet6_iif(skb)); if (req) - return tcp_check_req(sk, skb, req, prev); + return tcp_check_req(sk, skb, req, prev, false); nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, @@ -1156,7 +1183,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) } have_isn: tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; if (security_inet_conn_request(sk, skb, req)) goto drop_and_release; @@ -1167,6 +1193,8 @@ have_isn: want_cookie) goto drop_and_free; + tcp_rsk(req)->snt_synack = tcp_time_stamp; + tcp_rsk(req)->listener = NULL; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; @@ -1270,6 +1298,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL, NULL); + inet6_sk_rx_dst_set(newsk, skb); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1299,7 +1328,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone(treq->pktopts, + sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(treq->pktopts); treq->pktopts = NULL; if (newnp->pktoptions) @@ -1332,9 +1362,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); - if (tcp_rsk(req)->snt_synack) - tcp_valid_rtt_meas(newsk, - tcp_time_stamp - tcp_rsk(req)->snt_synack); + tcp_synack_rtt_meas(newsk, req); newtp->total_retrans = req->retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; @@ -1349,7 +1377,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, - AF_INET6, key->key, key->keylen, GFP_ATOMIC); + AF_INET6, key->key, key->keylen, + sk_gfp_atomic(sk, GFP_ATOMIC)); } #endif @@ -1442,10 +1471,20 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); + if (dst) { + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1674,6 +1713,43 @@ do_time_wait: goto discard_it; } +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); + if (dst) + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + if (dst && + icsk->rx_dst_ifindex == skb->skb_iif) + skb_dst_set_noref(skb, dst); + } + } +} + static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1684,6 +1760,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, + .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), @@ -1715,6 +1792,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), @@ -1764,7 +1842,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - const struct sock *sk, struct request_sock *req, int i, int uid) + const struct sock *sk, struct request_sock *req, int i, kuid_t uid) { int ttd = req->expires - jiffies; const struct in6_addr *src = &inet6_rsk(req)->loc_addr; @@ -1788,7 +1866,7 @@ static void get_openreq6(struct seq_file *seq, 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->retrans, - uid, + from_kuid_munged(seq_user_ns(seq), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); @@ -1836,9 +1914,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) tp->write_seq-tp->snd_una, (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - sock_i_uid(sp), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, @@ -1856,10 +1934,7 @@ static void get_timewait6_sock(struct seq_file *seq, const struct in6_addr *dest, *src; __u16 destp, srcp; const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = &tw6->tw_v6_daddr; src = &tw6->tw_v6_rcv_saddr; @@ -1875,7 +1950,7 @@ static void get_timewait6_sock(struct seq_file *seq, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw); } @@ -1978,12 +2053,13 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .proto_cgroup = tcp_proto_cgroup, #endif }; static const struct inet6_protocol tcpv6_protocol = { + .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .gso_send_check = tcp_v6_gso_send_check, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 99d0077b56b8..fc9997260a6b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -394,6 +394,17 @@ try_again: } if (unlikely(err)) { trace_kfree_skb(skb, udpv6_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + } goto out_free; } if (!peeked) { @@ -1458,7 +1469,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - sock_i_uid(sp), 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef39812107b1..f8c4c08ffb60 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } +static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) +{ + struct rt6_info *rt = (struct rt6_info *)xdst; + + rt6_init_peer(rt, net->ipv6.peers); +} + static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, + .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index f8ba30dfecae..02ff7f2f60d4 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -217,7 +217,8 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v) seq_printf(seq, "%08X %08X %02X %03d\n", sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), - s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); + s->sk_state, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s))); out: return 0; } diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index bb738c9f9146..b833677d83d6 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -468,7 +468,7 @@ static int irda_open_tsap(struct irda_sock *self, __u8 tsap_sel, char *name) notify_t notify; if (self->tsap) { - IRDA_WARNING("%s: busy!\n", __func__); + IRDA_DEBUG(0, "%s: busy!\n", __func__); return -EBUSY; } diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c index 8b915f3ac3b9..308939128359 100644 --- a/net/irda/ircomm/ircomm_param.c +++ b/net/irda/ircomm/ircomm_param.c @@ -99,7 +99,6 @@ pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 }; */ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush) { - struct tty_struct *tty; unsigned long flags; struct sk_buff *skb; int count; @@ -109,10 +108,6 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush) IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - tty = self->tty; - if (!tty) - return 0; - /* Make sure we don't send parameters for raw mode */ if (self->service_type == IRCOMM_3_WIRE_RAW) return 0; diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 6b9d5a0e42f9..95a3a7a336ba 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -52,6 +52,8 @@ #include <net/irda/ircomm_tty_attach.h> #include <net/irda/ircomm_tty.h> +static int ircomm_tty_install(struct tty_driver *driver, + struct tty_struct *tty); static int ircomm_tty_open(struct tty_struct *tty, struct file *filp); static void ircomm_tty_close(struct tty_struct * tty, struct file *filp); static int ircomm_tty_write(struct tty_struct * tty, @@ -82,6 +84,7 @@ static struct tty_driver *driver; static hashbin_t *ircomm_tty = NULL; static const struct tty_operations ops = { + .install = ircomm_tty_install, .open = ircomm_tty_open, .close = ircomm_tty_close, .write = ircomm_tty_write, @@ -104,6 +107,35 @@ static const struct tty_operations ops = { #endif /* CONFIG_PROC_FS */ }; +static void ircomm_port_raise_dtr_rts(struct tty_port *port, int raise) +{ + struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb, + port); + /* + * Here, we use to lock those two guys, but as ircomm_param_request() + * does it itself, I don't see the point (and I see the deadlock). + * Jean II + */ + if (raise) + self->settings.dte |= IRCOMM_RTS | IRCOMM_DTR; + else + self->settings.dte &= ~(IRCOMM_RTS | IRCOMM_DTR); + + ircomm_param_request(self, IRCOMM_DTE, TRUE); +} + +static int ircomm_port_carrier_raised(struct tty_port *port) +{ + struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb, + port); + return self->settings.dce & IRCOMM_CD; +} + +static const struct tty_port_operations ircomm_port_ops = { + .dtr_rts = ircomm_port_raise_dtr_rts, + .carrier_raised = ircomm_port_carrier_raised, +}; + /* * Function ircomm_tty_init() * @@ -194,7 +226,7 @@ static int ircomm_tty_startup(struct ircomm_tty_cb *self) IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); /* Check if already open */ - if (test_and_set_bit(ASYNC_B_INITIALIZED, &self->flags)) { + if (test_and_set_bit(ASYNCB_INITIALIZED, &self->port.flags)) { IRDA_DEBUG(2, "%s(), already open so break out!\n", __func__ ); return 0; } @@ -231,7 +263,7 @@ static int ircomm_tty_startup(struct ircomm_tty_cb *self) return 0; err: - clear_bit(ASYNC_B_INITIALIZED, &self->flags); + clear_bit(ASYNCB_INITIALIZED, &self->port.flags); return ret; } @@ -242,72 +274,62 @@ err: * */ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, - struct file *filp) + struct tty_struct *tty, struct file *filp) { + struct tty_port *port = &self->port; DECLARE_WAITQUEUE(wait, current); int retval; int do_clocal = 0, extra_count = 0; unsigned long flags; - struct tty_struct *tty; IRDA_DEBUG(2, "%s()\n", __func__ ); - tty = self->tty; - /* * If non-blocking mode is set, or the port is not enabled, * then make the check up front and then exit. */ if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ /* nonblock mode is set or port is not enabled */ - self->flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ ); return 0; } - if (tty->termios->c_cflag & CLOCAL) { + if (tty->termios.c_cflag & CLOCAL) { IRDA_DEBUG(1, "%s(), doing CLOCAL!\n", __func__ ); do_clocal = 1; } /* Wait for carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, self->open_count is dropped by one, so that + * this loop, port->count is dropped by one, so that * mgsl_close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ retval = 0; - add_wait_queue(&self->open_wait, &wait); + add_wait_queue(&port->open_wait, &wait); IRDA_DEBUG(2, "%s(%d):block_til_ready before block on %s open_count=%d\n", - __FILE__,__LINE__, tty->driver->name, self->open_count ); + __FILE__, __LINE__, tty->driver->name, port->count); - /* As far as I can see, we protect open_count - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); + spin_lock_irqsave(&port->lock, flags); if (!tty_hung_up_p(filp)) { extra_count = 1; - self->open_count--; + port->count--; } - spin_unlock_irqrestore(&self->spinlock, flags); - self->blocked_open++; + spin_unlock_irqrestore(&port->lock, flags); + port->blocked_open++; while (1) { - if (tty->termios->c_cflag & CBAUD) { - /* Here, we use to lock those two guys, but - * as ircomm_param_request() does it itself, - * I don't see the point (and I see the deadlock). - * Jean II */ - self->settings.dte |= IRCOMM_RTS + IRCOMM_DTR; - - ircomm_param_request(self, IRCOMM_DTE, TRUE); - } + if (tty->termios.c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); current->state = TASK_INTERRUPTIBLE; if (tty_hung_up_p(filp) || - !test_bit(ASYNC_B_INITIALIZED, &self->flags)) { - retval = (self->flags & ASYNC_HUP_NOTIFY) ? + !test_bit(ASYNCB_INITIALIZED, &port->flags)) { + retval = (port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; break; } @@ -317,8 +339,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, * specified, we cannot return before the IrCOMM link is * ready */ - if (!test_bit(ASYNC_B_CLOSING, &self->flags) && - (do_clocal || (self->settings.dce & IRCOMM_CD)) && + if (!test_bit(ASYNCB_CLOSING, &port->flags) && + (do_clocal || tty_port_carrier_raised(port)) && self->state == IRCOMM_TTY_READY) { break; @@ -330,46 +352,36 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, } IRDA_DEBUG(1, "%s(%d):block_til_ready blocking on %s open_count=%d\n", - __FILE__,__LINE__, tty->driver->name, self->open_count ); + __FILE__, __LINE__, tty->driver->name, port->count); schedule(); } __set_current_state(TASK_RUNNING); - remove_wait_queue(&self->open_wait, &wait); + remove_wait_queue(&port->open_wait, &wait); if (extra_count) { /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); - self->open_count++; - spin_unlock_irqrestore(&self->spinlock, flags); + spin_lock_irqsave(&port->lock, flags); + port->count++; + spin_unlock_irqrestore(&port->lock, flags); } - self->blocked_open--; + port->blocked_open--; IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n", - __FILE__,__LINE__, tty->driver->name, self->open_count); + __FILE__, __LINE__, tty->driver->name, port->count); if (!retval) - self->flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return retval; } -/* - * Function ircomm_tty_open (tty, filp) - * - * This routine is called when a particular tty device is opened. This - * routine is mandatory; if this routine is not filled in, the attempted - * open will fail with ENODEV. - */ -static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) + +static int ircomm_tty_install(struct tty_driver *driver, struct tty_struct *tty) { struct ircomm_tty_cb *self; unsigned int line = tty->index; - unsigned long flags; - int ret; - - IRDA_DEBUG(2, "%s()\n", __func__ ); /* Check if instance already exists */ self = hashbin_lock_find(ircomm_tty, line, NULL); @@ -381,6 +393,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) return -ENOMEM; } + tty_port_init(&self->port); + self->port.ops = &ircomm_port_ops; self->magic = IRCOMM_TTY_MAGIC; self->flow = FLOW_STOP; @@ -388,13 +402,9 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) INIT_WORK(&self->tqueue, ircomm_tty_do_softint); self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED; self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED; - self->close_delay = 5*HZ/10; - self->closing_wait = 30*HZ; /* Init some important stuff */ init_timer(&self->watchdog_timer); - init_waitqueue_head(&self->open_wait); - init_waitqueue_head(&self->close_wait); spin_lock_init(&self->spinlock); /* @@ -404,31 +414,48 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) * * Note this is completely usafe and doesn't work properly */ - tty->termios->c_iflag = 0; - tty->termios->c_oflag = 0; + tty->termios.c_iflag = 0; + tty->termios.c_oflag = 0; /* Insert into hash */ hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL); } - /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); - self->open_count++; - tty->driver_data = self; - self->tty = tty; - spin_unlock_irqrestore(&self->spinlock, flags); + return tty_port_install(&self->port, driver, tty); +} + +/* + * Function ircomm_tty_open (tty, filp) + * + * This routine is called when a particular tty device is opened. This + * routine is mandatory; if this routine is not filled in, the attempted + * open will fail with ENODEV. + */ +static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) +{ + struct ircomm_tty_cb *self = tty->driver_data; + unsigned long flags; + int ret; + + IRDA_DEBUG(2, "%s()\n", __func__ ); + + /* ++ is not atomic, so this should be protected - Jean II */ + spin_lock_irqsave(&self->port.lock, flags); + self->port.count++; + spin_unlock_irqrestore(&self->port.lock, flags); + tty_port_tty_set(&self->port, tty); IRDA_DEBUG(1, "%s(), %s%d, count = %d\n", __func__ , tty->driver->name, - self->line, self->open_count); + self->line, self->port.count); /* Not really used by us, but lets do it anyway */ - self->tty->low_latency = (self->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + tty->low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0; /* * If the port is the middle of closing, bail out now */ if (tty_hung_up_p(filp) || - test_bit(ASYNC_B_CLOSING, &self->flags)) { + test_bit(ASYNCB_CLOSING, &self->port.flags)) { /* Hm, why are we blocking on ASYNC_CLOSING if we * do return -EAGAIN/-ERESTARTSYS below anyway? @@ -438,14 +465,15 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) * probably better sleep uninterruptible? */ - if (wait_event_interruptible(self->close_wait, !test_bit(ASYNC_B_CLOSING, &self->flags))) { + if (wait_event_interruptible(self->port.close_wait, + !test_bit(ASYNCB_CLOSING, &self->port.flags))) { IRDA_WARNING("%s - got signal while blocking on ASYNC_CLOSING!\n", __func__); return -ERESTARTSYS; } #ifdef SERIAL_DO_RESTART - return (self->flags & ASYNC_HUP_NOTIFY) ? + return (self->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; #else return -EAGAIN; @@ -453,7 +481,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) } /* Check if this is a "normal" ircomm device, or an irlpt device */ - if (line < 0x10) { + if (self->line < 0x10) { self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE; self->settings.service_type = IRCOMM_9_WIRE; /* 9 wire as default */ /* Jan Kiszka -> add DSR/RI -> Conform to IrCOMM spec */ @@ -469,7 +497,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) if (ret) return ret; - ret = ircomm_tty_block_til_ready(self, filp); + ret = ircomm_tty_block_til_ready(self, tty, filp); if (ret) { IRDA_DEBUG(2, "%s(), returning after block_til_ready with %d\n", __func__ , @@ -489,81 +517,22 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned long flags; + struct tty_port *port = &self->port; IRDA_DEBUG(0, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - spin_lock_irqsave(&self->spinlock, flags); - - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&self->spinlock, flags); - - IRDA_DEBUG(0, "%s(), returning 1\n", __func__ ); - return; - } - - if ((tty->count == 1) && (self->open_count != 1)) { - /* - * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. state->count should always - * be one in these conditions. If it's greater than - * one, we've got real problems, since it means the - * serial port won't be shutdown. - */ - IRDA_DEBUG(0, "%s(), bad serial port count; " - "tty->count is 1, state->count is %d\n", __func__ , - self->open_count); - self->open_count = 1; - } - - if (--self->open_count < 0) { - IRDA_ERROR("%s(), bad serial port count for ttys%d: %d\n", - __func__, self->line, self->open_count); - self->open_count = 0; - } - if (self->open_count) { - spin_unlock_irqrestore(&self->spinlock, flags); - - IRDA_DEBUG(0, "%s(), open count > 0\n", __func__ ); + if (tty_port_close_start(port, tty, filp) == 0) return; - } - - /* Hum... Should be test_and_set_bit ??? - Jean II */ - set_bit(ASYNC_B_CLOSING, &self->flags); - - /* We need to unlock here (we were unlocking at the end of this - * function), because tty_wait_until_sent() may schedule. - * I don't know if the rest should be protected somehow, - * so someone should check. - Jean II */ - spin_unlock_irqrestore(&self->spinlock, flags); - - /* - * Now we wait for the transmit buffer to clear; and we notify - * the line discipline to only process XON/XOFF characters. - */ - tty->closing = 1; - if (self->closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent_from_close(tty, self->closing_wait); ircomm_tty_shutdown(self); tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - - tty->closing = 0; - self->tty = NULL; - if (self->blocked_open) { - if (self->close_delay) - schedule_timeout_interruptible(self->close_delay); - wake_up_interruptible(&self->open_wait); - } - - self->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - wake_up_interruptible(&self->close_wait); + tty_port_close_end(port, tty); + tty_port_tty_set(port, NULL); } /* @@ -606,7 +575,7 @@ static void ircomm_tty_do_softint(struct work_struct *work) if (!self || self->magic != IRCOMM_TTY_MAGIC) return; - tty = self->tty; + tty = tty_port_tty_get(&self->port); if (!tty) return; @@ -627,7 +596,7 @@ static void ircomm_tty_do_softint(struct work_struct *work) } if (tty->hw_stopped) - return; + goto put; /* Unlink transmit buffer */ spin_lock_irqsave(&self->spinlock, flags); @@ -646,6 +615,8 @@ static void ircomm_tty_do_softint(struct work_struct *work) /* Check if user (still) wants to be waken up */ tty_wakeup(tty); +put: + tty_kref_put(tty); } /* @@ -880,7 +851,7 @@ static void ircomm_tty_throttle(struct tty_struct *tty) ircomm_tty_send_xchar(tty, STOP_CHAR(tty)); /* Hardware flow control? */ - if (tty->termios->c_cflag & CRTSCTS) { + if (tty->termios.c_cflag & CRTSCTS) { self->settings.dte &= ~IRCOMM_RTS; self->settings.dte |= IRCOMM_DELTA_RTS; @@ -912,7 +883,7 @@ static void ircomm_tty_unthrottle(struct tty_struct *tty) } /* Using hardware flow control? */ - if (tty->termios->c_cflag & CRTSCTS) { + if (tty->termios.c_cflag & CRTSCTS) { self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS); ircomm_param_request(self, IRCOMM_DTE, TRUE); @@ -955,7 +926,7 @@ static void ircomm_tty_shutdown(struct ircomm_tty_cb *self) IRDA_DEBUG(0, "%s()\n", __func__ ); - if (!test_and_clear_bit(ASYNC_B_INITIALIZED, &self->flags)) + if (!test_and_clear_bit(ASYNCB_INITIALIZED, &self->port.flags)) return; ircomm_tty_detach_cable(self); @@ -994,6 +965,7 @@ static void ircomm_tty_shutdown(struct ircomm_tty_cb *self) static void ircomm_tty_hangup(struct tty_struct *tty) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; + struct tty_port *port = &self->port; unsigned long flags; IRDA_DEBUG(0, "%s()\n", __func__ ); @@ -1004,14 +976,17 @@ static void ircomm_tty_hangup(struct tty_struct *tty) /* ircomm_tty_flush_buffer(tty); */ ircomm_tty_shutdown(self); - /* I guess we need to lock here - Jean II */ - spin_lock_irqsave(&self->spinlock, flags); - self->flags &= ~ASYNC_NORMAL_ACTIVE; - self->tty = NULL; - self->open_count = 0; - spin_unlock_irqrestore(&self->spinlock, flags); + spin_lock_irqsave(&port->lock, flags); + port->flags &= ~ASYNC_NORMAL_ACTIVE; + if (port->tty) { + set_bit(TTY_IO_ERROR, &port->tty->flags); + tty_kref_put(port->tty); + } + port->tty = NULL; + port->count = 0; + spin_unlock_irqrestore(&port->lock, flags); - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&port->open_wait); } /* @@ -1071,20 +1046,20 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - tty = self->tty; + tty = tty_port_tty_get(&self->port); status = self->settings.dce; if (status & IRCOMM_DCE_DELTA_ANY) { /*wake_up_interruptible(&self->delta_msr_wait);*/ } - if ((self->flags & ASYNC_CHECK_CD) && (status & IRCOMM_DELTA_CD)) { + if ((self->port.flags & ASYNC_CHECK_CD) && (status & IRCOMM_DELTA_CD)) { IRDA_DEBUG(2, "%s(), ircomm%d CD now %s...\n", __func__ , self->line, (status & IRCOMM_CD) ? "on" : "off"); if (status & IRCOMM_CD) { - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); } else { IRDA_DEBUG(2, "%s(), Doing serial hangup..\n", __func__ ); @@ -1092,10 +1067,10 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) tty_hangup(tty); /* Hangup will remote the tty, so better break out */ - return; + goto put; } } - if (self->flags & ASYNC_CTS_FLOW) { + if (tty && tty_port_cts_enabled(&self->port)) { if (tty->hw_stopped) { if (status & IRCOMM_CTS) { IRDA_DEBUG(2, @@ -1103,10 +1078,10 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) tty->hw_stopped = 0; /* Wake up processes blocked on open */ - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); schedule_work(&self->tqueue); - return; + goto put; } } else { if (!(status & IRCOMM_CTS)) { @@ -1116,6 +1091,8 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) } } } +put: + tty_kref_put(tty); } /* @@ -1128,6 +1105,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap, struct sk_buff *skb) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; + struct tty_struct *tty; IRDA_DEBUG(2, "%s()\n", __func__ ); @@ -1135,7 +1113,8 @@ static int ircomm_tty_data_indication(void *instance, void *sap, IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); IRDA_ASSERT(skb != NULL, return -1;); - if (!self->tty) { + tty = tty_port_tty_get(&self->port); + if (!tty) { IRDA_DEBUG(0, "%s(), no tty!\n", __func__ ); return 0; } @@ -1146,7 +1125,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap, * Devices like WinCE can do this, and since they don't send any * params, we can just as well declare the hardware for running. */ - if (self->tty->hw_stopped && (self->flow == FLOW_START)) { + if (tty->hw_stopped && (self->flow == FLOW_START)) { IRDA_DEBUG(0, "%s(), polling for line settings!\n", __func__ ); ircomm_param_request(self, IRCOMM_POLL, TRUE); @@ -1159,8 +1138,9 @@ static int ircomm_tty_data_indication(void *instance, void *sap, * Use flip buffer functions since the code may be called from interrupt * context */ - tty_insert_flip_string(self->tty, skb->data, skb->len); - tty_flip_buffer_push(self->tty); + tty_insert_flip_string(tty, skb->data, skb->len); + tty_flip_buffer_push(tty); + tty_kref_put(tty); /* No need to kfree_skb - see ircomm_ttp_data_indication() */ @@ -1211,12 +1191,13 @@ static void ircomm_tty_flow_indication(void *instance, void *sap, IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - tty = self->tty; + tty = tty_port_tty_get(&self->port); switch (cmd) { case FLOW_START: IRDA_DEBUG(2, "%s(), hw start!\n", __func__ ); - tty->hw_stopped = 0; + if (tty) + tty->hw_stopped = 0; /* ircomm_tty_do_softint will take care of the rest */ schedule_work(&self->tqueue); @@ -1224,15 +1205,19 @@ static void ircomm_tty_flow_indication(void *instance, void *sap, default: /* If we get here, something is very wrong, better stop */ case FLOW_STOP: IRDA_DEBUG(2, "%s(), hw stopped!\n", __func__ ); - tty->hw_stopped = 1; + if (tty) + tty->hw_stopped = 1; break; } + + tty_kref_put(tty); self->flow = cmd; } #ifdef CONFIG_PROC_FS static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) { + struct tty_struct *tty; char sep; seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]); @@ -1328,40 +1313,43 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) seq_puts(m, "Flags:"); sep = ' '; - if (self->flags & ASYNC_CTS_FLOW) { + if (tty_port_cts_enabled(&self->port)) { seq_printf(m, "%cASYNC_CTS_FLOW", sep); sep = '|'; } - if (self->flags & ASYNC_CHECK_CD) { + if (self->port.flags & ASYNC_CHECK_CD) { seq_printf(m, "%cASYNC_CHECK_CD", sep); sep = '|'; } - if (self->flags & ASYNC_INITIALIZED) { + if (self->port.flags & ASYNC_INITIALIZED) { seq_printf(m, "%cASYNC_INITIALIZED", sep); sep = '|'; } - if (self->flags & ASYNC_LOW_LATENCY) { + if (self->port.flags & ASYNC_LOW_LATENCY) { seq_printf(m, "%cASYNC_LOW_LATENCY", sep); sep = '|'; } - if (self->flags & ASYNC_CLOSING) { + if (self->port.flags & ASYNC_CLOSING) { seq_printf(m, "%cASYNC_CLOSING", sep); sep = '|'; } - if (self->flags & ASYNC_NORMAL_ACTIVE) { + if (self->port.flags & ASYNC_NORMAL_ACTIVE) { seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); sep = '|'; } seq_putc(m, '\n'); seq_printf(m, "Role: %s\n", self->client ? "client" : "server"); - seq_printf(m, "Open count: %d\n", self->open_count); + seq_printf(m, "Open count: %d\n", self->port.count); seq_printf(m, "Max data size: %d\n", self->max_data_size); seq_printf(m, "Max header size: %d\n", self->max_header_size); - if (self->tty) + tty = tty_port_tty_get(&self->port); + if (tty) { seq_printf(m, "Hardware: %s\n", - self->tty->hw_stopped ? "Stopped" : "Running"); + tty->hw_stopped ? "Stopped" : "Running"); + tty_kref_put(tty); + } } static int ircomm_tty_proc_show(struct seq_file *m, void *v) diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index b65d66e0d817..edab393e0c82 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -130,6 +130,8 @@ static int (*state[])(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, */ int ircomm_tty_attach_cable(struct ircomm_tty_cb *self) { + struct tty_struct *tty; + IRDA_DEBUG(0, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return -1;); @@ -142,7 +144,11 @@ int ircomm_tty_attach_cable(struct ircomm_tty_cb *self) } /* Make sure nobody tries to write before the link is up */ - self->tty->hw_stopped = 1; + tty = tty_port_tty_get(&self->port); + if (tty) { + tty->hw_stopped = 1; + tty_kref_put(tty); + } ircomm_tty_ias_register(self); @@ -398,23 +404,26 @@ void ircomm_tty_disconnect_indication(void *instance, void *sap, struct sk_buff *skb) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; + struct tty_struct *tty; IRDA_DEBUG(2, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - if (!self->tty) + tty = tty_port_tty_get(&self->port); + if (!tty) return; /* This will stop control data transfers */ self->flow = FLOW_STOP; /* Stop data transfers */ - self->tty->hw_stopped = 1; + tty->hw_stopped = 1; ircomm_tty_do_event(self, IRCOMM_TTY_DISCONNECT_INDICATION, NULL, NULL); + tty_kref_put(tty); } /* @@ -550,12 +559,15 @@ void ircomm_tty_connect_indication(void *instance, void *sap, */ void ircomm_tty_link_established(struct ircomm_tty_cb *self) { + struct tty_struct *tty; + IRDA_DEBUG(2, "%s()\n", __func__ ); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - if (!self->tty) + tty = tty_port_tty_get(&self->port); + if (!tty) return; del_timer(&self->watchdog_timer); @@ -566,19 +578,22 @@ void ircomm_tty_link_established(struct ircomm_tty_cb *self) * will have to wait for the peer device (DCE) to raise the CTS * line. */ - if ((self->flags & ASYNC_CTS_FLOW) && ((self->settings.dce & IRCOMM_CTS) == 0)) { + if (tty_port_cts_enabled(&self->port) && + ((self->settings.dce & IRCOMM_CTS) == 0)) { IRDA_DEBUG(0, "%s(), waiting for CTS ...\n", __func__ ); - return; + goto put; } else { IRDA_DEBUG(1, "%s(), starting hardware!\n", __func__ ); - self->tty->hw_stopped = 0; + tty->hw_stopped = 0; /* Wake up processes blocked on open */ - wake_up_interruptible(&self->open_wait); + wake_up_interruptible(&self->port.open_wait); } schedule_work(&self->tqueue); +put: + tty_kref_put(tty); } /* @@ -977,14 +992,17 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH); ircomm_tty_start_watchdog_timer(self, 3*HZ); - if (self->flags & ASYNC_CHECK_CD) { + if (self->port.flags & ASYNC_CHECK_CD) { /* Drop carrier */ self->settings.dce = IRCOMM_DELTA_CD; ircomm_tty_check_modem_status(self); } else { + struct tty_struct *tty = tty_port_tty_get(&self->port); IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ ); - if (self->tty) - tty_hangup(self->tty); + if (tty) { + tty_hangup(tty); + tty_kref_put(tty); + } } break; default: diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index d0667d68351d..b343f50dc8d7 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -52,17 +52,18 @@ * Change speed of the driver. If the remote device is a DCE, then this * should make it change the speed of its serial port */ -static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) +static void ircomm_tty_change_speed(struct ircomm_tty_cb *self, + struct tty_struct *tty) { unsigned int cflag, cval; int baud; IRDA_DEBUG(2, "%s()\n", __func__ ); - if (!self->tty || !self->tty->termios || !self->ircomm) + if (!self->ircomm) return; - cflag = self->tty->termios->c_cflag; + cflag = tty->termios.c_cflag; /* byte size and parity */ switch (cflag & CSIZE) { @@ -81,7 +82,7 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) cval |= IRCOMM_PARITY_EVEN; /* Determine divisor based on baud rate */ - baud = tty_get_baud_rate(self->tty); + baud = tty_get_baud_rate(tty); if (!baud) baud = 9600; /* B0 transition handled in rs_set_termios */ @@ -90,19 +91,19 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) /* CTS flow control flag and modem status interrupts */ if (cflag & CRTSCTS) { - self->flags |= ASYNC_CTS_FLOW; + self->port.flags |= ASYNC_CTS_FLOW; self->settings.flow_control |= IRCOMM_RTS_CTS_IN; /* This got me. Bummer. Jean II */ if (self->service_type == IRCOMM_3_WIRE_RAW) IRDA_WARNING("%s(), enabling RTS/CTS on link that doesn't support it (3-wire-raw)\n", __func__); } else { - self->flags &= ~ASYNC_CTS_FLOW; + self->port.flags &= ~ASYNC_CTS_FLOW; self->settings.flow_control &= ~IRCOMM_RTS_CTS_IN; } if (cflag & CLOCAL) - self->flags &= ~ASYNC_CHECK_CD; + self->port.flags &= ~ASYNC_CHECK_CD; else - self->flags |= ASYNC_CHECK_CD; + self->port.flags |= ASYNC_CHECK_CD; #if 0 /* * Set up parity check flag @@ -148,18 +149,18 @@ void ircomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned int cflag = tty->termios->c_cflag; + unsigned int cflag = tty->termios.c_cflag; IRDA_DEBUG(2, "%s()\n", __func__ ); if ((cflag == old_termios->c_cflag) && - (RELEVANT_IFLAG(tty->termios->c_iflag) == + (RELEVANT_IFLAG(tty->termios.c_iflag) == RELEVANT_IFLAG(old_termios->c_iflag))) { return; } - ircomm_tty_change_speed(self); + ircomm_tty_change_speed(self, tty); /* Handle transition to B0 status */ if ((old_termios->c_cflag & CBAUD) && @@ -172,7 +173,7 @@ void ircomm_tty_set_termios(struct tty_struct *tty, if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) { self->settings.dte |= IRCOMM_DTR; - if (!(tty->termios->c_cflag & CRTSCTS) || + if (!(tty->termios.c_cflag & CRTSCTS) || !test_bit(TTY_THROTTLED, &tty->flags)) { self->settings.dte |= IRCOMM_RTS; } @@ -181,7 +182,7 @@ void ircomm_tty_set_termios(struct tty_struct *tty, /* Handle turning off CRTSCTS */ if ((old_termios->c_cflag & CRTSCTS) && - !(tty->termios->c_cflag & CRTSCTS)) + !(tty->termios.c_cflag & CRTSCTS)) { tty->hw_stopped = 0; ircomm_tty_start(tty); @@ -270,10 +271,10 @@ static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self, memset(&info, 0, sizeof(info)); info.line = self->line; - info.flags = self->flags; + info.flags = self->port.flags; info.baud_base = self->settings.data_rate; - info.close_delay = self->close_delay; - info.closing_wait = self->closing_wait; + info.close_delay = self->port.close_delay; + info.closing_wait = self->port.closing_wait; /* For compatibility */ info.type = PORT_16550A; diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 6c7c4b92e4f8..c32971269280 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -100,7 +100,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) goto err_out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); if (hdr == NULL) { ret = -EMSGSIZE; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 5c93f2952b08..1002e3396f72 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -440,7 +440,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) */ lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0); if (lsap == NULL) { - IRDA_WARNING("%s: unable to allocate LSAP!!\n", __func__); + IRDA_DEBUG(0, "%s: unable to allocate LSAP!!\n", __func__); return NULL; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 34e418508a67..08897a3c7ec7 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -54,7 +54,7 @@ struct pfkey_sock { struct { uint8_t msg_version; - uint32_t msg_pid; + uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { @@ -1447,7 +1447,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); @@ -1486,7 +1486,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); @@ -1523,7 +1523,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: @@ -1701,7 +1701,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -1736,7 +1736,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m c.data.proto = proto; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); @@ -1764,7 +1764,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -1798,7 +1798,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms return -EINVAL; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto); @@ -1923,6 +1923,9 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy); struct sadb_x_ipsecrequest *rq = (void*)(pol+1); + if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) + return -EINVAL; + while (len >= sizeof(struct sadb_x_ipsecrequest)) { if ((err = parse_ipsecrequest(xp, rq)) < 0) return err; @@ -2157,7 +2160,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; - out_hdr->sadb_msg_pid = c->pid; + out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; @@ -2272,7 +2275,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); @@ -2351,7 +2354,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); @@ -2597,7 +2600,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (err) goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); @@ -2634,7 +2637,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -2663,7 +2666,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb return -EBUSY; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); @@ -2682,7 +2685,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -2711,7 +2714,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); @@ -3024,7 +3027,7 @@ static u32 get_acqseq(void) return res; } -static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir) +static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -3105,7 +3108,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; - pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; pol->sadb_x_policy_id = xp->index; /* Set sadb_comb's. */ @@ -3661,7 +3664,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) atomic_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), sock_i_ino(s) ); return 0; diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig index 4b1e71751e10..147a8fd47a17 100644 --- a/net/l2tp/Kconfig +++ b/net/l2tp/Kconfig @@ -4,6 +4,7 @@ menuconfig L2TP tristate "Layer Two Tunneling Protocol (L2TP)" + depends on (IPV6 || IPV6=n) depends on INET ---help--- Layer Two Tunneling Protocol diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 393355d37b47..1a9f3723c13c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1347,11 +1347,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) /* Remove from tunnel list */ spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_del_rcu(&tunnel->list); + kfree_rcu(tunnel, rcu); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - synchronize_rcu(); atomic_dec(&l2tp_tunnel_count); - kfree(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1502,6 +1501,8 @@ out: return err; } +static struct lock_class_key l2tp_socket_class; + int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; @@ -1606,6 +1607,8 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; tunnel->sock = sk; + lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); + sk->sk_allocation = GFP_ATOMIC; /* Add tunnel to our list */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a38ec6cdeee1..56d583e083a7 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -163,6 +163,7 @@ struct l2tp_tunnel_cfg { struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ + struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; /* hashed list of sessions, diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f9ee74deeac2..37b8b8ba31f7 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -67,6 +67,7 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) return net_generic(net, l2tp_eth_net_id); } +static struct lock_class_key l2tp_eth_tx_busylock; static int l2tp_eth_dev_init(struct net_device *dev) { struct l2tp_eth *priv = netdev_priv(dev); @@ -74,7 +75,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) priv->dev = dev; eth_hw_addr_random(dev); memset(&dev->broadcast[0], 0xff, 6); - + dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock; return 0; } @@ -153,7 +154,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length); } - if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) + if (!pskb_may_pull(skb, ETH_HLEN)) goto error; secpath_reset(skb); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 35e1e4bde587..927547171bc7 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_family = AF_INET6; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_unused = 0; if (peer) { if (!lsk->peer_conn_id) return -ENOTCONN; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6c4cc12c7414 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -78,16 +78,16 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (!hdr) { + ret = -EMSGSIZE; goto err_out; } genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -235,7 +235,7 @@ out: return ret; } -static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel) { void *hdr; @@ -248,10 +248,10 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, struct l2tp_stats stats; unsigned int start; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) || nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || @@ -359,12 +359,12 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel); if (ret < 0) goto err_out; - return genlmsg_unicast(net, msg, info->snd_pid); + return genlmsg_unicast(net, msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -384,7 +384,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (tunnel == NULL) goto out; - if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, tunnel) <= 0) goto out; @@ -604,7 +604,7 @@ out: return ret; } -static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_session *session) { void *hdr; @@ -616,9 +616,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || @@ -705,12 +705,12 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session); if (ret < 0) goto err_out; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -742,7 +742,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback continue; } - if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, session) <= 0) break; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f6fe4d400502..c2190005a114 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = 0; + int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; *uaddrlen = sizeof(sllc); - memset(uaddr, 0, *uaddrlen); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) @@ -1206,7 +1205,7 @@ static int __init llc2_init(void) rc = llc_proc_init(); if (rc != 0) { printk(llc_proc_err_msg); - goto out_unregister_llc_proto; + goto out_station; } rc = llc_sysctl_init(); if (rc) { @@ -1226,7 +1225,8 @@ out_sysctl: llc_sysctl_exit(); out_proc: llc_proc_exit(); -out_unregister_llc_proto: +out_station: + llc_station_exit(); proto_unregister(&llc_proto); goto out; } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index e32cab44ea95..dd3e83328ad5 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap, void llc_add_pack(int type, void (*handler)(struct llc_sap *sap, struct sk_buff *skb)) { + smp_wmb(); /* ensure initialisation is complete before it's called */ if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = handler; } @@ -50,11 +51,19 @@ void llc_remove_pack(int type) { if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = NULL; + synchronize_net(); } void llc_set_station_handler(void (*handler)(struct sk_buff *skb)) { + /* Ensure initialisation is complete before it's called */ + if (handler) + smp_wmb(); + llc_station_handler = handler; + + if (!handler) + synchronize_net(); } /** @@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int dest; int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + void (*sta_handler)(struct sk_buff *skb); + void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); if (!net_eq(dev_net(dev), &init_net)) goto drop; @@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ rcv = rcu_dereference(sap->rcv_func); dest = llc_pdu_type(skb); - if (unlikely(!dest || !llc_type_handlers[dest - 1])) { + sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; + if (unlikely(!sap_handler)) { if (rcv) rcv(skb, dev, pt, orig_dev); else @@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, if (cskb) rcv(cskb, dev, pt, orig_dev); } - llc_type_handlers[dest - 1](sap, skb); + sap_handler(sap, skb); } llc_sap_put(sap); out: @@ -202,9 +214,10 @@ drop: kfree_skb(skb); goto out; handle_station: - if (!llc_station_handler) + sta_handler = ACCESS_ONCE(llc_station_handler); + if (!sta_handler) goto drop; - llc_station_handler(skb); + sta_handler(skb); goto out; } diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index a1839c004357..7b4799cfbf8d 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -151,7 +151,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk) - llc->copied_seq, sk->sk_state, - sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), llc->link); out: return 0; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 39a8d8924b9c..204a8351efff 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -25,253 +25,26 @@ #include <net/llc_s_st.h> #include <net/llc_pdu.h> -/** - * struct llc_station - LLC station component - * - * SAP and connection resource manager, one per adapter. - * - * @state: state of station - * @xid_r_count: XID response PDU counter - * @mac_sa: MAC source address - * @sap_list: list of related SAPs - * @ev_q: events entering state mach. - * @mac_pdu_q: PDUs ready to send to MAC - */ -struct llc_station { - u8 state; - u8 xid_r_count; - struct timer_list ack_timer; - u8 retry_count; - u8 maximum_retry; - struct { - struct sk_buff_head list; - spinlock_t lock; - } ev_q; - struct sk_buff_head mac_pdu_q; -}; - -#define LLC_STATION_ACK_TIME (3 * HZ) - -int sysctl_llc_station_ack_timeout = LLC_STATION_ACK_TIME; - -/* Types of events (possible values in 'ev->type') */ -#define LLC_STATION_EV_TYPE_SIMPLE 1 -#define LLC_STATION_EV_TYPE_CONDITION 2 -#define LLC_STATION_EV_TYPE_PRIM 3 -#define LLC_STATION_EV_TYPE_PDU 4 /* command/response PDU */ -#define LLC_STATION_EV_TYPE_ACK_TMR 5 -#define LLC_STATION_EV_TYPE_RPT_STATUS 6 - -/* Events */ -#define LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK 1 -#define LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK 2 -#define LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY 3 -#define LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY 4 -#define LLC_STATION_EV_RX_NULL_DSAP_XID_C 5 -#define LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ 6 -#define LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ 7 -#define LLC_STATION_EV_RX_NULL_DSAP_TEST_C 8 -#define LLC_STATION_EV_DISABLE_REQ 9 - -struct llc_station_state_ev { - u8 type; - u8 prim; - u8 prim_type; - u8 reason; - struct list_head node; /* node in station->ev_q.list */ -}; - -static __inline__ struct llc_station_state_ev * - llc_station_ev(struct sk_buff *skb) -{ - return (struct llc_station_state_ev *)skb->cb; -} - -typedef int (*llc_station_ev_t)(struct sk_buff *skb); - -#define LLC_STATION_STATE_DOWN 1 /* initial state */ -#define LLC_STATION_STATE_DUP_ADDR_CHK 2 -#define LLC_STATION_STATE_UP 3 - -#define LLC_NBR_STATION_STATES 3 /* size of state table */ - -typedef int (*llc_station_action_t)(struct sk_buff *skb); - -/* Station component state table structure */ -struct llc_station_state_trans { - llc_station_ev_t ev; - u8 next_state; - llc_station_action_t *ev_actions; -}; - -struct llc_station_state { - u8 curr_state; - struct llc_station_state_trans **transitions; -}; - -static struct llc_station llc_main_station; - -static int llc_stat_ev_enable_with_dup_addr_check(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_SIMPLE && - ev->prim_type == - LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK ? 0 : 1; -} - -static int llc_stat_ev_enable_without_dup_addr_check(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_SIMPLE && - ev->prim_type == - LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK ? 0 : 1; -} - -static int llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_ACK_TMR && - llc_main_station.retry_count < - llc_main_station.maximum_retry ? 0 : 1; -} - -static int llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_ACK_TMR && - llc_main_station.retry_count == - llc_main_station.maximum_retry ? 0 : 1; -} - static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_CMD(pdu) && /* command PDU */ + return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID && !pdu->dsap ? 0 : 1; /* NULL DSAP value */ } -static int llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_RSP(pdu) && /* response PDU */ - LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ - LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap && /* NULL DSAP value */ - !llc_main_station.xid_r_count ? 0 : 1; -} - -static int llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_RSP(pdu) && /* response PDU */ - LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ - LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap && /* NULL DSAP value */ - llc_main_station.xid_r_count == 1 ? 0 : 1; -} - static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); - return ev->type == LLC_STATION_EV_TYPE_PDU && - LLC_PDU_IS_CMD(pdu) && /* command PDU */ + return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST && !pdu->dsap ? 0 : 1; /* NULL DSAP */ } -static int llc_stat_ev_disable_req(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - return ev->type == LLC_STATION_EV_TYPE_PRIM && - ev->prim == LLC_DISABLE_PRIM && - ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; -} - -/** - * llc_station_send_pdu - queues PDU to send - * @skb: Address of the PDU - * - * Queues a PDU to send to the MAC layer. - */ -static void llc_station_send_pdu(struct sk_buff *skb) -{ - skb_queue_tail(&llc_main_station.mac_pdu_q, skb); - while ((skb = skb_dequeue(&llc_main_station.mac_pdu_q)) != NULL) - if (dev_queue_xmit(skb)) - break; -} - -static int llc_station_ac_start_ack_timer(struct sk_buff *skb) -{ - mod_timer(&llc_main_station.ack_timer, - jiffies + sysctl_llc_station_ack_timeout); - return 0; -} - -static int llc_station_ac_set_retry_cnt_0(struct sk_buff *skb) -{ - llc_main_station.retry_count = 0; - return 0; -} - -static int llc_station_ac_inc_retry_cnt_by_1(struct sk_buff *skb) -{ - llc_main_station.retry_count++; - return 0; -} - -static int llc_station_ac_set_xid_r_cnt_0(struct sk_buff *skb) -{ - llc_main_station.xid_r_count = 0; - return 0; -} - -static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb) -{ - llc_main_station.xid_r_count++; - return 0; -} - -static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) -{ - int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, - sizeof(struct llc_xid_info)); - - if (!nskb) - goto out; - llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD); - llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127); - rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, skb->dev->dev_addr); - if (unlikely(rc)) - goto free; - llc_station_send_pdu(nskb); -out: - return rc; -free: - kfree_skb(skb); - goto out; -} - static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; @@ -289,11 +62,11 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; - llc_station_send_pdu(nskb); + dev_queue_xmit(nskb); out: return rc; free: - kfree_skb(skb); + kfree_skb(nskb); goto out; } @@ -318,360 +91,14 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; - llc_station_send_pdu(nskb); + dev_queue_xmit(nskb); out: return rc; free: - kfree_skb(skb); + kfree_skb(nskb); goto out; } -static int llc_station_ac_report_status(struct sk_buff *skb) -{ - return 0; -} - -/* COMMON STATION STATE transitions */ - -/* dummy last-transition indicator; common to all state transition groups - * last entry for this state - * all members are zeros, .bss zeroes it - */ -static struct llc_station_state_trans llc_stat_state_trans_end; - -/* DOWN STATE transitions */ - -/* state transition for LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK event */ -static llc_station_action_t llc_stat_down_state_actions_1[] = { - [0] = llc_station_ac_start_ack_timer, - [1] = llc_station_ac_set_retry_cnt_0, - [2] = llc_station_ac_set_xid_r_cnt_0, - [3] = llc_station_ac_send_null_dsap_xid_c, - [4] = NULL, -}; - -static struct llc_station_state_trans llc_stat_down_state_trans_1 = { - .ev = llc_stat_ev_enable_with_dup_addr_check, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_down_state_actions_1, -}; - -/* state transition for LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK event */ -static llc_station_action_t llc_stat_down_state_actions_2[] = { - [0] = llc_station_ac_report_status, /* STATION UP */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_down_state_trans_2 = { - .ev = llc_stat_ev_enable_without_dup_addr_check, - .next_state = LLC_STATION_STATE_UP, - .ev_actions = llc_stat_down_state_actions_2, -}; - -/* array of pointers; one to each transition */ -static struct llc_station_state_trans *llc_stat_dwn_state_trans[] = { - [0] = &llc_stat_down_state_trans_1, - [1] = &llc_stat_down_state_trans_2, - [2] = &llc_stat_state_trans_end, -}; - -/* UP STATE transitions */ -/* state transition for LLC_STATION_EV_DISABLE_REQ event */ -static llc_station_action_t llc_stat_up_state_actions_1[] = { - [0] = llc_station_ac_report_status, /* STATION DOWN */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_up_state_trans_1 = { - .ev = llc_stat_ev_disable_req, - .next_state = LLC_STATION_STATE_DOWN, - .ev_actions = llc_stat_up_state_actions_1, -}; - -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */ -static llc_station_action_t llc_stat_up_state_actions_2[] = { - [0] = llc_station_ac_send_xid_r, - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_up_state_trans_2 = { - .ev = llc_stat_ev_rx_null_dsap_xid_c, - .next_state = LLC_STATION_STATE_UP, - .ev_actions = llc_stat_up_state_actions_2, -}; - -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_TEST_C event */ -static llc_station_action_t llc_stat_up_state_actions_3[] = { - [0] = llc_station_ac_send_test_r, - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_up_state_trans_3 = { - .ev = llc_stat_ev_rx_null_dsap_test_c, - .next_state = LLC_STATION_STATE_UP, - .ev_actions = llc_stat_up_state_actions_3, -}; - -/* array of pointers; one to each transition */ -static struct llc_station_state_trans *llc_stat_up_state_trans [] = { - [0] = &llc_stat_up_state_trans_1, - [1] = &llc_stat_up_state_trans_2, - [2] = &llc_stat_up_state_trans_3, - [3] = &llc_stat_state_trans_end, -}; - -/* DUP ADDR CHK STATE transitions */ -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_1[] = { - [0] = llc_station_ac_inc_xid_r_cnt_by_1, - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_1 = { - .ev = llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_dupaddr_state_actions_1, -}; - -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_2[] = { - [0] = llc_station_ac_report_status, /* DUPLICATE ADDRESS FOUND */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_2 = { - .ev = llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq, - .next_state = LLC_STATION_STATE_DOWN, - .ev_actions = llc_stat_dupaddr_state_actions_2, -}; - -/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */ -static llc_station_action_t llc_stat_dupaddr_state_actions_3[] = { - [0] = llc_station_ac_send_xid_r, - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_3 = { - .ev = llc_stat_ev_rx_null_dsap_xid_c, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_dupaddr_state_actions_3, -}; - -/* state transition for LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_4[] = { - [0] = llc_station_ac_start_ack_timer, - [1] = llc_station_ac_inc_retry_cnt_by_1, - [2] = llc_station_ac_set_xid_r_cnt_0, - [3] = llc_station_ac_send_null_dsap_xid_c, - [4] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_4 = { - .ev = llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry, - .next_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .ev_actions = llc_stat_dupaddr_state_actions_4, -}; - -/* state transition for LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY - * event - */ -static llc_station_action_t llc_stat_dupaddr_state_actions_5[] = { - [0] = llc_station_ac_report_status, /* STATION UP */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_5 = { - .ev = llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry, - .next_state = LLC_STATION_STATE_UP, - .ev_actions = llc_stat_dupaddr_state_actions_5, -}; - -/* state transition for LLC_STATION_EV_DISABLE_REQ event */ -static llc_station_action_t llc_stat_dupaddr_state_actions_6[] = { - [0] = llc_station_ac_report_status, /* STATION DOWN */ - [1] = NULL, -}; - -static struct llc_station_state_trans llc_stat_dupaddr_state_trans_6 = { - .ev = llc_stat_ev_disable_req, - .next_state = LLC_STATION_STATE_DOWN, - .ev_actions = llc_stat_dupaddr_state_actions_6, -}; - -/* array of pointers; one to each transition */ -static struct llc_station_state_trans *llc_stat_dupaddr_state_trans[] = { - [0] = &llc_stat_dupaddr_state_trans_6, /* Request */ - [1] = &llc_stat_dupaddr_state_trans_4, /* Timer */ - [2] = &llc_stat_dupaddr_state_trans_5, - [3] = &llc_stat_dupaddr_state_trans_1, /* Receive frame */ - [4] = &llc_stat_dupaddr_state_trans_2, - [5] = &llc_stat_dupaddr_state_trans_3, - [6] = &llc_stat_state_trans_end, -}; - -static struct llc_station_state - llc_station_state_table[LLC_NBR_STATION_STATES] = { - [LLC_STATION_STATE_DOWN - 1] = { - .curr_state = LLC_STATION_STATE_DOWN, - .transitions = llc_stat_dwn_state_trans, - }, - [LLC_STATION_STATE_DUP_ADDR_CHK - 1] = { - .curr_state = LLC_STATION_STATE_DUP_ADDR_CHK, - .transitions = llc_stat_dupaddr_state_trans, - }, - [LLC_STATION_STATE_UP - 1] = { - .curr_state = LLC_STATION_STATE_UP, - .transitions = llc_stat_up_state_trans, - }, -}; - -/** - * llc_exec_station_trans_actions - executes actions for transition - * @trans: Address of the transition - * @skb: Address of the event that caused the transition - * - * Executes actions of a transition of the station state machine. Returns - * 0 if all actions complete successfully, nonzero otherwise. - */ -static u16 llc_exec_station_trans_actions(struct llc_station_state_trans *trans, - struct sk_buff *skb) -{ - u16 rc = 0; - llc_station_action_t *next_action = trans->ev_actions; - - for (; next_action && *next_action; next_action++) - if ((*next_action)(skb)) - rc = 1; - return rc; -} - -/** - * llc_find_station_trans - finds transition for this event - * @skb: Address of the event - * - * Search thru events of the current state of the station until list - * exhausted or it's obvious that the event is not valid for the current - * state. Returns the address of the transition if cound, %NULL otherwise. - */ -static struct llc_station_state_trans * - llc_find_station_trans(struct sk_buff *skb) -{ - int i = 0; - struct llc_station_state_trans *rc = NULL; - struct llc_station_state_trans **next_trans; - struct llc_station_state *curr_state = - &llc_station_state_table[llc_main_station.state - 1]; - - for (next_trans = curr_state->transitions; next_trans[i]->ev; i++) - if (!next_trans[i]->ev(skb)) { - rc = next_trans[i]; - break; - } - return rc; -} - -/** - * llc_station_free_ev - frees an event - * @skb: Address of the event - * - * Frees an event. - */ -static void llc_station_free_ev(struct sk_buff *skb) -{ - struct llc_station_state_ev *ev = llc_station_ev(skb); - - if (ev->type == LLC_STATION_EV_TYPE_PDU) - kfree_skb(skb); -} - -/** - * llc_station_next_state - processes event and goes to the next state - * @skb: Address of the event - * - * Processes an event, executes any transitions related to that event and - * updates the state of the station. - */ -static u16 llc_station_next_state(struct sk_buff *skb) -{ - u16 rc = 1; - struct llc_station_state_trans *trans; - - if (llc_main_station.state > LLC_NBR_STATION_STATES) - goto out; - trans = llc_find_station_trans(skb); - if (trans) { - /* got the state to which we next transition; perform the - * actions associated with this transition before actually - * transitioning to the next state - */ - rc = llc_exec_station_trans_actions(trans, skb); - if (!rc) - /* transition station to next state if all actions - * execute successfully; done; wait for next event - */ - llc_main_station.state = trans->next_state; - } else - /* event not recognized in current state; re-queue it for - * processing again at a later time; return failure - */ - rc = 0; -out: - llc_station_free_ev(skb); - return rc; -} - -/** - * llc_station_service_events - service events in the queue - * - * Get an event from the station event queue (if any); attempt to service - * the event; if event serviced, get the next event (if any) on the event - * queue; if event not service, re-queue the event on the event queue and - * attempt to service the next event; when serviced all events in queue, - * finished; if don't transition to different state, just service all - * events once; if transition to new state, service all events again. - * Caller must hold llc_main_station.ev_q.lock. - */ -static void llc_station_service_events(void) -{ - struct sk_buff *skb; - - while ((skb = skb_dequeue(&llc_main_station.ev_q.list)) != NULL) - llc_station_next_state(skb); -} - -/** - * llc_station_state_process - queue event and try to process queue. - * @skb: Address of the event - * - * Queues an event (on the station event queue) for handling by the - * station state machine and attempts to process any queued-up events. - */ -static void llc_station_state_process(struct sk_buff *skb) -{ - spin_lock_bh(&llc_main_station.ev_q.lock); - skb_queue_tail(&llc_main_station.ev_q.list, skb); - llc_station_service_events(); - spin_unlock_bh(&llc_main_station.ev_q.lock); -} - -static void llc_station_ack_tmr_cb(unsigned long timeout_data) -{ - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - if (skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); - - ev->type = LLC_STATION_EV_TYPE_ACK_TMR; - llc_station_state_process(skb); - } -} - /** * llc_station_rcv - send received pdu to the station state machine * @skb: received frame. @@ -680,43 +107,19 @@ static void llc_station_ack_tmr_cb(unsigned long timeout_data) */ static void llc_station_rcv(struct sk_buff *skb) { - struct llc_station_state_ev *ev = llc_station_ev(skb); - - ev->type = LLC_STATION_EV_TYPE_PDU; - ev->reason = 0; - llc_station_state_process(skb); + if (llc_stat_ev_rx_null_dsap_xid_c(skb)) + llc_station_ac_send_xid_r(skb); + else if (llc_stat_ev_rx_null_dsap_test_c(skb)) + llc_station_ac_send_test_r(skb); + kfree_skb(skb); } -int __init llc_station_init(void) +void __init llc_station_init(void) { - int rc = -ENOBUFS; - struct sk_buff *skb; - struct llc_station_state_ev *ev; - - skb_queue_head_init(&llc_main_station.mac_pdu_q); - skb_queue_head_init(&llc_main_station.ev_q.list); - spin_lock_init(&llc_main_station.ev_q.lock); - setup_timer(&llc_main_station.ack_timer, llc_station_ack_tmr_cb, - (unsigned long)&llc_main_station); - llc_main_station.ack_timer.expires = jiffies + - sysctl_llc_station_ack_timeout; - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) - goto out; - rc = 0; llc_set_station_handler(llc_station_rcv); - ev = llc_station_ev(skb); - memset(ev, 0, sizeof(*ev)); - llc_main_station.maximum_retry = 1; - llc_main_station.state = LLC_STATION_STATE_DOWN; - ev->type = LLC_STATION_EV_TYPE_SIMPLE; - ev->prim_type = LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK; - rc = llc_station_next_state(skb); -out: - return rc; } -void __exit llc_station_exit(void) +void llc_station_exit(void) { llc_set_station_handler(NULL); } diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index d75306b9c2f3..612a5ddaf93b 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -47,13 +47,6 @@ static struct ctl_table llc2_timeout_table[] = { }; static struct ctl_table llc_station_table[] = { - { - .procname = "ack_timeout", - .data = &sysctl_llc_station_ack_timeout, - .maxlen = sizeof(long), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { }, }; diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 8dfd70d8fcfb..a04752e91023 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -38,14 +38,10 @@ static void gf_mulx(u8 *pad) static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, const u8 *addr[], const size_t *len, u8 *mac) { - u8 scratch[2 * AES_BLOCK_SIZE]; - u8 *cbc, *pad; + u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; const u8 *pos, *end; size_t i, e, left, total_len; - cbc = scratch; - pad = scratch + AES_BLOCK_SIZE; - memset(cbc, 0, AES_BLOCK_SIZE); total_len = 0; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index d0deb3edae21..3195a6307f50 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -869,7 +869,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } else { ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR, - true); + false); } out: diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a58c0b649ba1..05f3a313db88 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -20,7 +20,8 @@ #include "rate.h" #include "mesh.h" -static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, char *name, +static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, + const char *name, enum nl80211_iftype type, u32 *flags, struct vif_params *params) @@ -102,6 +103,18 @@ static int ieee80211_change_iface(struct wiphy *wiphy, return 0; } +static int ieee80211_start_p2p_device(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + return ieee80211_do_open(wdev, true); +} + +static void ieee80211_stop_p2p_device(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -158,6 +171,38 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } } + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) + key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* Keys without a station are used for TX only */ + if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP)) + key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; + break; + case NL80211_IFTYPE_ADHOC: + /* no MFP (yet) */ + break; + case NL80211_IFTYPE_MESH_POINT: +#ifdef CONFIG_MAC80211_MESH + if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) + key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; + break; +#endif + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + /* shouldn't happen */ + WARN_ON_ONCE(1); + break; + } + err = ieee80211_key_link(key, sdata, sta); if (err) ieee80211_key_free(sdata->local, key); @@ -330,7 +375,7 @@ static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, in if (!(rate->flags & RATE_INFO_FLAGS_MCS)) { struct ieee80211_supported_band *sband; sband = sta->local->hw.wiphy->bands[ - sta->local->hw.conf.channel->band]; + sta->local->oper_channel->band]; rate->legacy = sband->bitrates[idx].bitrate; } else rate->mcs = idx; @@ -725,25 +770,23 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len) { - struct sk_buff *new, *old; + struct probe_resp *new, *old; if (!resp || !resp_len) return 1; old = rtnl_dereference(sdata->u.ap.probe_resp); - new = dev_alloc_skb(resp_len); + new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) return -ENOMEM; - memcpy(skb_put(new, resp_len), resp, resp_len); + new->len = resp_len; + memcpy(new->data, resp, resp_len); rcu_assign_pointer(sdata->u.ap.probe_resp, new); - if (old) { - /* TODO: use call_rcu() */ - synchronize_rcu(); - dev_kfree_skb(old); - } + if (old) + kfree_rcu(old, rcu_head); return 0; } @@ -950,7 +993,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ - memset(msg->da, 0xff, ETH_ALEN); + eth_broadcast_addr(msg->da); memcpy(msg->sa, sta->sta.addr, ETH_ALEN); msg->len = htons(6); msg->dsap = 0; @@ -1285,9 +1328,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, mutex_unlock(&local->sta_mtx); if (sdata->vif.type == NL80211_IFTYPE_STATION && - params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) + params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { ieee80211_recalc_ps(local, -1); - + ieee80211_recalc_ps_vif(sdata); + } return 0; } @@ -1660,7 +1704,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (!sdata->vif.bss_conf.use_short_slot && - sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) { + sdata->local->oper_channel->band == IEEE80211_BAND_5GHZ) { sdata->vif.bss_conf.use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } @@ -1774,6 +1818,7 @@ static int ieee80211_scan(struct wiphy *wiphy, case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) @@ -1926,7 +1971,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct ieee80211_channel *chan = local->hw.conf.channel; + struct ieee80211_channel *chan = local->oper_channel; u32 changes = 0; switch (type) { @@ -2026,9 +2071,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, */ if (!sdata->u.mgd.associated || sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) { - mutex_lock(&sdata->local->iflist_mtx); ieee80211_recalc_smps(sdata->local); - mutex_unlock(&sdata->local->iflist_mtx); return 0; } @@ -2078,6 +2121,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps_vif(sdata); return 0; } @@ -2460,6 +2504,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (!sdata->u.mgd.associated) need_offchan = true; break; + case NL80211_IFTYPE_P2P_DEVICE: + need_offchan = true; + break; default: return -EOPNOTSUPP; } @@ -2652,6 +2699,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct ieee80211_tdls_data *tf; tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); @@ -2671,8 +2719,10 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_req.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(sdata, skb, false); - ieee80211_add_ext_srates_ie(sdata, skb, false); + ieee80211_add_srates_ie(sdata, skb, false, + local->oper_channel->band); + ieee80211_add_ext_srates_ie(sdata, skb, false, + local->oper_channel->band); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_RESPONSE: @@ -2685,8 +2735,10 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(sdata, skb, false); - ieee80211_add_ext_srates_ie(sdata, skb, false); + ieee80211_add_srates_ie(sdata, skb, false, + local->oper_channel->band); + ieee80211_add_ext_srates_ie(sdata, skb, false, + local->oper_channel->band); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_CONFIRM: @@ -2724,6 +2776,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; mgmt = (void *)skb_put(skb, 24); @@ -2746,8 +2799,10 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, mgmt->u.action.u.tdls_discover_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(sdata, skb, false); - ieee80211_add_ext_srates_ie(sdata, skb, false); + ieee80211_add_srates_ie(sdata, skb, false, + local->oper_channel->band); + ieee80211_add_ext_srates_ie(sdata, skb, false, + local->oper_channel->band); ieee80211_tdls_add_ext_capab(skb); break; default: @@ -3004,6 +3059,8 @@ struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, .change_virtual_intf = ieee80211_change_iface, + .start_p2p_device = ieee80211_start_p2p_device, + .stop_p2p_device = ieee80211_stop_p2p_device, .add_key = ieee80211_add_key, .del_key = ieee80211_del_key, .get_key = ieee80211_get_key, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f0f87e5a1d35..0bfc914ddd15 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -68,16 +68,14 @@ ieee80211_get_channel_mode(struct ieee80211_local *local, return mode; } -bool ieee80211_set_channel_type(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - enum nl80211_channel_type chantype) +static enum nl80211_channel_type +ieee80211_get_superchan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *tmp; enum nl80211_channel_type superchan = NL80211_CHAN_NO_HT; - bool result; + struct ieee80211_sub_if_data *tmp; mutex_lock(&local->iflist_mtx); - list_for_each_entry(tmp, &local->interfaces, list) { if (tmp == sdata) continue; @@ -103,39 +101,70 @@ bool ieee80211_set_channel_type(struct ieee80211_local *local, break; } } + mutex_unlock(&local->iflist_mtx); - switch (superchan) { + return superchan; +} + +static bool +ieee80211_channel_types_are_compatible(enum nl80211_channel_type chantype1, + enum nl80211_channel_type chantype2, + enum nl80211_channel_type *compat) +{ + /* + * start out with chantype1 being the result, + * overwriting later if needed + */ + if (compat) + *compat = chantype1; + + switch (chantype1) { case NL80211_CHAN_NO_HT: + if (compat) + *compat = chantype2; + break; case NL80211_CHAN_HT20: /* * allow any change that doesn't go to no-HT * (if it already is no-HT no change is needed) */ - if (chantype == NL80211_CHAN_NO_HT) + if (chantype2 == NL80211_CHAN_NO_HT) break; - superchan = chantype; + if (compat) + *compat = chantype2; break; case NL80211_CHAN_HT40PLUS: case NL80211_CHAN_HT40MINUS: /* allow smaller bandwidth and same */ - if (chantype == NL80211_CHAN_NO_HT) + if (chantype2 == NL80211_CHAN_NO_HT) break; - if (chantype == NL80211_CHAN_HT20) + if (chantype2 == NL80211_CHAN_HT20) break; - if (superchan == chantype) + if (chantype2 == chantype1) break; - result = false; - goto out; + return false; } - local->_oper_channel_type = superchan; + return true; +} + +bool ieee80211_set_channel_type(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_channel_type chantype) +{ + enum nl80211_channel_type superchan; + enum nl80211_channel_type compatchan; + + superchan = ieee80211_get_superchan(local, sdata); + if (!ieee80211_channel_types_are_compatible(superchan, chantype, + &compatchan)) + return false; + + local->_oper_channel_type = compatchan; if (sdata) sdata->vif.bss_conf.channel_type = chantype; - result = true; - out: - mutex_unlock(&local->iflist_mtx); + return true; - return result; } diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index b8dfb440c8ef..466f4b45dd94 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -63,8 +63,6 @@ DEBUGFS_READONLY_FILE(user_power, "%d", local->user_power_level); DEBUGFS_READONLY_FILE(power, "%d", local->hw.conf.power_level); -DEBUGFS_READONLY_FILE(frequency, "%d", - local->hw.conf.channel->center_freq); DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", local->total_ps_buffered); DEBUGFS_READONLY_FILE(wep_iv, "%#08x", @@ -72,6 +70,7 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x", DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); +#ifdef CONFIG_PM static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { @@ -90,33 +89,7 @@ static const struct file_operations reset_ops = { .open = simple_open, .llseek = noop_llseek, }; - -static ssize_t channel_type_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - const char *buf; - - switch (local->hw.conf.channel_type) { - case NL80211_CHAN_NO_HT: - buf = "no ht\n"; - break; - case NL80211_CHAN_HT20: - buf = "ht20\n"; - break; - case NL80211_CHAN_HT40MINUS: - buf = "ht40-\n"; - break; - case NL80211_CHAN_HT40PLUS: - buf = "ht40+\n"; - break; - default: - buf = "???"; - break; - } - - return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); -} +#endif static ssize_t hwflags_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -205,7 +178,6 @@ static ssize_t queues_read(struct file *file, char __user *user_buf, } DEBUGFS_READONLY_FILE_OPS(hwflags); -DEBUGFS_READONLY_FILE_OPS(channel_type); DEBUGFS_READONLY_FILE_OPS(queues); /* statistics stuff */ @@ -272,12 +244,12 @@ void debugfs_hw_add(struct ieee80211_local *local) local->debugfs.keys = debugfs_create_dir("keys", phyd); - DEBUGFS_ADD(frequency); DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(queues); +#ifdef CONFIG_PM DEBUGFS_ADD_MODE(reset, 0200); - DEBUGFS_ADD(channel_type); +#endif DEBUGFS_ADD(hwflags); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index df9203199102..da9003b20004 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -9,7 +9,7 @@ static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) { WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", - sdata->dev->name, sdata->flags); + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); } static inline struct ieee80211_sub_if_data * @@ -22,9 +22,11 @@ get_bss_sdata(struct ieee80211_sub_if_data *sdata) return sdata; } -static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb) +static inline void drv_tx(struct ieee80211_local *local, + struct ieee80211_tx_control *control, + struct sk_buff *skb) { - local->ops->tx(&local->hw, skb); + local->ops->tx(&local->hw, control, skb); } static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata, @@ -526,6 +528,9 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, sdata = get_bss_sdata(sdata); check_sdata_in_driver(sdata); + WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && + sdata->vif.type != NL80211_IFTYPE_ADHOC); + trace_drv_sta_rc_update(local, sdata, sta, changed); if (local->ops->sta_rc_update) local->ops->sta_rc_update(&local->hw, &sdata->vif, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 5746d62faba1..5f3620f0bc0a 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -109,7 +109,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); - memset(mgmt->da, 0xff, ETH_ALEN); + eth_broadcast_addr(mgmt->da); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(beacon_int); @@ -205,7 +205,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); - bss = cfg80211_inform_bss_frame(local->hw.wiphy, local->hw.conf.channel, + bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, mgmt, skb->len, 0, GFP_KERNEL); cfg80211_put_bss(bss); netif_carrier_on(sdata->dev); @@ -278,7 +278,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, if (auth && !sdata->u.ibss.auth_frame_registrations) { ibss_dbg(sdata, "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, sdata->u.ibss.bssid, addr); + sdata->vif.addr, addr, sdata->u.ibss.bssid); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, NULL, 0, addr, sdata->u.ibss.bssid, NULL, 0, 0); } @@ -294,7 +294,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; - int band = local->hw.conf.channel->band; + int band = local->oper_channel->band; /* * XXX: Consider removing the least recently used entry and @@ -332,11 +332,27 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, return ieee80211_ibss_finish_sta(sta, auth); } +static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + u16 reason = le16_to_cpu(mgmt->u.deauth.reason_code); + + if (len < IEEE80211_DEAUTH_FRAME_LEN) + return; + + ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n", + mgmt->sa, mgmt->da, mgmt->bssid, reason); + sta_info_destroy_addr(sdata, mgmt->sa); +} + static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { u16 auth_alg, auth_transaction; + struct sta_info *sta; + u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; lockdep_assert_held(&sdata->u.ibss.mtx); @@ -352,10 +368,22 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction); sta_info_destroy_addr(sdata, mgmt->sa); - ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); + sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); rcu_read_unlock(); /* + * if we have any problem in allocating the new station, we reply with a + * DEAUTH frame to tell the other end that we had a problem + */ + if (!sta) { + ieee80211_send_deauth_disassoc(sdata, sdata->u.ibss.bssid, + IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, true, + deauth_frame_buf); + return; + } + + /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. * However, try to reply to authentication attempts if someone @@ -459,8 +487,11 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } } - if (sta && rates_updated) + if (sta && rates_updated) { + drv_sta_rc_update(local, sdata, &sta->sta, + IEEE80211_RC_SUPP_RATES_CHANGED); rate_control_rate_init(sta); + } rcu_read_unlock(); } @@ -561,7 +592,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; - int band = local->hw.conf.channel->band; + int band = local->oper_channel->band; /* * XXX: Consider removing the least recently used entry and @@ -759,7 +790,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) return; } sdata_info(sdata, "IBSS not allowed on %d MHz\n", - local->hw.conf.channel->center_freq); + local->oper_channel->center_freq); /* No IBSS found - decrease scan interval and continue * scanning. */ @@ -899,6 +930,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case IEEE80211_STYPE_AUTH: ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); break; + case IEEE80211_STYPE_DEAUTH: + ieee80211_rx_mgmt_deauth_ibss(sdata, mgmt, skb->len); + break; } mgmt_out: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index bb61f7718c4c..8c804550465b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -68,6 +68,8 @@ struct ieee80211_local; #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL +#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) + struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; @@ -193,8 +195,6 @@ struct ieee80211_tx_data { struct sta_info *sta; struct ieee80211_key *key; - struct ieee80211_channel *channel; - unsigned int flags; }; @@ -274,9 +274,15 @@ struct beacon_data { struct rcu_head rcu_head; }; +struct probe_resp { + struct rcu_head rcu_head; + int len; + u8 data[0]; +}; + struct ieee80211_if_ap { struct beacon_data __rcu *beacon; - struct sk_buff __rcu *probe_resp; + struct probe_resp __rcu *probe_resp; struct list_head vlans; @@ -359,6 +365,7 @@ enum ieee80211_sta_flags { IEEE80211_STA_NULLFUNC_ACKED = BIT(8), IEEE80211_STA_RESET_SIGNAL_AVE = BIT(9), IEEE80211_STA_DISABLE_40MHZ = BIT(10), + IEEE80211_STA_DISABLE_VHT = BIT(11), }; struct ieee80211_mgd_auth_data { @@ -406,6 +413,7 @@ struct ieee80211_if_managed { struct work_struct monitor_work; struct work_struct chswitch_work; struct work_struct beacon_connection_loss_work; + struct work_struct csa_connection_drop_work; unsigned long beacon_timeout; unsigned long probe_timeout; @@ -965,7 +973,6 @@ struct ieee80211_local { int scan_channel_idx; int scan_ies_len; - struct ieee80211_sched_scan_ies sched_scan_ies; struct work_struct sched_scan_stopped_work; struct ieee80211_sub_if_data __rcu *sched_scan_sdata; @@ -1052,7 +1059,7 @@ struct ieee80211_local { bool disable_dynamic_ps; int user_power_level; /* in dBm */ - int power_constr_level; /* in dBm */ + int ap_power_level; /* in dBm */ enum ieee80211_smps_mode smps_mode; @@ -1075,6 +1082,8 @@ struct ieee80211_local { struct idr ack_status_frames; spinlock_t ack_status_lock; + struct ieee80211_sub_if_data __rcu *p2p_sdata; + /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; @@ -1131,7 +1140,7 @@ struct ieee802_11_elems { u8 *prep; u8 *perr; struct ieee80211_rann_ie *rann; - u8 *ch_switch_elem; + struct ieee80211_channel_sw_ie *ch_switch_ie; u8 *country_elem; u8 *pwr_constr_elem; u8 *quiet_elem; /* first quite element */ @@ -1157,9 +1166,7 @@ struct ieee802_11_elems { u8 preq_len; u8 prep_len; u8 perr_len; - u8 ch_switch_elem_len; u8 country_elem_len; - u8 pwr_constr_elem_len; u8 quiet_elem_len; u8 num_of_quiet_elem; /* can be more the one */ u8 timeout_int_len; @@ -1202,6 +1209,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); +void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); @@ -1291,6 +1299,8 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local); void ieee80211_recalc_idle(struct ieee80211_local *local); void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset); +int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up); +void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { @@ -1358,7 +1368,6 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, int ieee80211_reconfig(struct ieee80211_local *local); void ieee80211_stop_device(struct ieee80211_local *local); -#ifdef CONFIG_PM int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan); @@ -1372,18 +1381,6 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) return ieee80211_reconfig(hw_to_local(hw)); } -#else -static inline int __ieee80211_suspend(struct ieee80211_hw *hw, - struct cfg80211_wowlan *wowlan) -{ - return 0; -} - -static inline int __ieee80211_resume(struct ieee80211_hw *hw) -{ - return 0; -} -#endif /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ @@ -1425,7 +1422,6 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr); void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack); -void ieee80211_beacon_connection_loss_work(struct work_struct *work); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); @@ -1451,19 +1447,24 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u8 *extra, size_t extra_len, const u8 *bssid, const u8 *da, const u8 *key, u8 key_len, u8 key_idx); +void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, u16 stype, u16 reason, + bool send_frame, u8 *frame_buf); int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len, enum ieee80211_band band, u32 rate_mask, u8 channel); struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u32 ratemask, + struct ieee80211_channel *chan, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, bool directed); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck); + u32 ratemask, bool directed, bool no_cck, + struct ieee80211_channel *channel); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, const size_t supp_rates_len, @@ -1487,9 +1488,11 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic); + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band); int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic); + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band); /* channel management */ enum ieee80211_chan_mode { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bfb57dcc1538..6f8a73c64fb3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -100,6 +100,10 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local) sdata->vif.bss_conf.idle = true; continue; } + + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; + /* count everything else */ sdata->vif.bss_conf.idle = false; count++; @@ -121,7 +125,8 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local) list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) continue; if (sdata->old_idle == sdata->vif.bss_conf.idle) continue; @@ -204,6 +209,8 @@ static inline int identical_mac_addr_allowed(int type1, int type2) { return type1 == NL80211_IFTYPE_MONITOR || type2 == NL80211_IFTYPE_MONITOR || + type1 == NL80211_IFTYPE_P2P_DEVICE || + type2 == NL80211_IFTYPE_P2P_DEVICE || (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) || (type1 == NL80211_IFTYPE_WDS && (type2 == NL80211_IFTYPE_WDS || @@ -271,13 +278,15 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata) int n_queues = sdata->local->hw.queues; int i; - for (i = 0; i < IEEE80211_NUM_ACS; i++) { - if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == - IEEE80211_INVAL_HW_QUEUE)) - return -EINVAL; - if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >= - n_queues)) - return -EINVAL; + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == + IEEE80211_INVAL_HW_QUEUE)) + return -EINVAL; + if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >= + n_queues)) + return -EINVAL; + } } if ((sdata->vif.type != NL80211_IFTYPE_AP) || @@ -406,9 +415,10 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) * an error on interface type changes that have been pre-checked, so most * checks should be in ieee80211_check_concurrent_iface. */ -static int ieee80211_do_open(struct net_device *dev, bool coming_up) +int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct net_device *dev = wdev->netdev; struct ieee80211_local *local = sdata->local; struct sta_info *sta; u32 changed = 0; @@ -443,6 +453,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_DEVICE: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -471,7 +482,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) * Copy the hopefully now-present MAC address to * this interface, if it has the special null one. */ - if (is_zero_ether_addr(dev->dev_addr)) { + if (dev && is_zero_ether_addr(dev->dev_addr)) { memcpy(dev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); @@ -536,15 +547,23 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) local->fif_probe_req++; } - changed |= ieee80211_reset_erp_info(sdata); + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_AP) + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: netif_carrier_off(dev); - else + break; + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: + break; + default: netif_carrier_on(dev); + } /* * set default queue parameters so drivers don't @@ -576,6 +595,9 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) } rate_control_rate_init(sta); + netif_carrier_on(dev); + } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + rcu_assign_pointer(local->p2p_sdata, sdata); } /* @@ -601,7 +623,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) ieee80211_recalc_ps(local, -1); - netif_tx_start_all_queues(dev); + if (dev) + netif_tx_start_all_queues(dev); return 0; err_del_interface: @@ -631,7 +654,7 @@ static int ieee80211_open(struct net_device *dev) if (err) return err; - return ieee80211_do_open(dev, true); + return ieee80211_do_open(&sdata->wdev, true); } static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, @@ -652,7 +675,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* * Stop TX on this interface first. */ - netif_tx_stop_all_queues(sdata->dev); + if (sdata->dev) + netif_tx_stop_all_queues(sdata->dev); ieee80211_roc_purge(sdata); @@ -691,14 +715,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, local->fif_probe_req--; } - netif_addr_lock_bh(sdata->dev); - spin_lock_bh(&local->filter_lock); - __hw_addr_unsync(&local->mc_list, &sdata->dev->mc, - sdata->dev->addr_len); - spin_unlock_bh(&local->filter_lock); - netif_addr_unlock_bh(sdata->dev); + if (sdata->dev) { + netif_addr_lock_bh(sdata->dev); + spin_lock_bh(&local->filter_lock); + __hw_addr_unsync(&local->mc_list, &sdata->dev->mc, + sdata->dev->addr_len); + spin_unlock_bh(&local->filter_lock); + netif_addr_unlock_bh(sdata->dev); - ieee80211_configure_filter(local); + ieee80211_configure_filter(local); + } del_timer_sync(&local->dynamic_ps_timer); cancel_work_sync(&local->dynamic_ps_enable_work); @@ -708,7 +734,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct ieee80211_sub_if_data *vlan, *tmpsdata; struct beacon_data *old_beacon = rtnl_dereference(sdata->u.ap.beacon); - struct sk_buff *old_probe_resp = + struct probe_resp *old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp); /* sdata_running will return false, so this will disable */ @@ -720,7 +746,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, RCU_INIT_POINTER(sdata->u.ap.probe_resp, NULL); synchronize_rcu(); kfree(old_beacon); - kfree_skb(old_probe_resp); + kfree(old_probe_resp); /* down all dependent devices, that is VLANs */ list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, @@ -759,24 +785,29 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); ieee80211_configure_filter(local); break; + case NL80211_IFTYPE_P2P_DEVICE: + /* relies on synchronize_rcu() below */ + rcu_assign_pointer(local->p2p_sdata, NULL); + /* fall through */ default: flush_work(&sdata->work); /* * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path + * Call rcu_barrier() to wait both for the RX path * should it be using the interface and enqueuing - * frames at this very time on another CPU. + * frames at this very time on another CPU, and + * for the sta free call_rcu callbacks. */ - synchronize_rcu(); - skb_queue_purge(&sdata->skb_queue); + rcu_barrier(); /* - * Disable beaconing here for mesh only, AP and IBSS - * are already taken care of. + * free_sta_rcu() enqueues a work for the actual + * sta cleanup, so we need to flush it while + * sdata is still valid. */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_BEACON_ENABLED); + flush_workqueue(local->workqueue); + + skb_queue_purge(&sdata->skb_queue); /* * Free all remaining keys, there shouldn't be any, @@ -877,9 +908,8 @@ static void ieee80211_set_multicast_list(struct net_device *dev) * Called when the netdev is removed or, by the code below, before * the interface type changes. */ -static void ieee80211_teardown_sdata(struct net_device *dev) +static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; int flushed; int i; @@ -900,6 +930,11 @@ static void ieee80211_teardown_sdata(struct net_device *dev) WARN_ON(flushed); } +static void ieee80211_uninit(struct net_device *dev) +{ + ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev)); +} + static u16 ieee80211_netdev_select_queue(struct net_device *dev, struct sk_buff *skb) { @@ -909,7 +944,7 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev, static const struct net_device_ops ieee80211_dataif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, - .ndo_uninit = ieee80211_teardown_sdata, + .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_subif_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, @@ -940,7 +975,7 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, - .ndo_uninit = ieee80211_teardown_sdata, + .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, @@ -1099,7 +1134,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* and set some type-dependent values */ sdata->vif.type = type; sdata->vif.p2p = false; - sdata->dev->netdev_ops = &ieee80211_dataif_ops; sdata->wdev.iftype = type; sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); @@ -1107,8 +1141,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->noack_map = 0; - /* only monitor differs */ - sdata->dev->type = ARPHRD_ETHER; + /* only monitor/p2p-device differ */ + if (sdata->dev) { + sdata->dev->netdev_ops = &ieee80211_dataif_ops; + sdata->dev->type = ARPHRD_ETHER; + } skb_queue_head_init(&sdata->skb_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); @@ -1146,6 +1183,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -1156,18 +1194,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, ieee80211_debugfs_add_netdev(sdata); } -static void ieee80211_clean_sdata(struct ieee80211_sub_if_data *sdata) -{ - switch (sdata->vif.type) { - case NL80211_IFTYPE_MESH_POINT: - mesh_path_flush_by_iface(sdata); - break; - - default: - break; - } -} - static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type) { @@ -1225,7 +1251,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, ieee80211_do_stop(sdata, false); - ieee80211_teardown_sdata(sdata->dev); + ieee80211_teardown_sdata(sdata); ret = drv_change_interface(local, sdata, internal_type, p2p); if (ret) @@ -1240,7 +1266,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, ieee80211_setup_sdata(sdata, type); - err = ieee80211_do_open(sdata->dev, false); + err = ieee80211_do_open(&sdata->wdev, false); WARN(err, "type change: do_open returned %d", err); return ret; @@ -1267,14 +1293,14 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, return ret; } else { /* Purge and reset type-dependent state. */ - ieee80211_teardown_sdata(sdata->dev); + ieee80211_teardown_sdata(sdata); ieee80211_setup_sdata(sdata, type); } /* reset some values that shouldn't be kept across type changes */ sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sdata->local, - sdata->local->hw.conf.channel->band); + sdata->local->oper_channel->band); sdata->drop_unencrypted = 0; if (type == NL80211_IFTYPE_STATION) sdata->u.mgd.use_4addr = false; @@ -1283,8 +1309,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, } static void ieee80211_assign_perm_addr(struct ieee80211_local *local, - struct net_device *dev, - enum nl80211_iftype type) + u8 *perm_addr, enum nl80211_iftype type) { struct ieee80211_sub_if_data *sdata; u64 mask, start, addr, val, inc; @@ -1293,13 +1318,12 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, int i; /* default ... something at least */ - memcpy(dev->perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN); + memcpy(perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN); if (is_zero_ether_addr(local->hw.wiphy->addr_mask) && local->hw.wiphy->n_addresses <= 1) return; - mutex_lock(&local->iflist_mtx); switch (type) { @@ -1312,11 +1336,24 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type != NL80211_IFTYPE_AP) continue; - memcpy(dev->perm_addr, sdata->vif.addr, ETH_ALEN); + memcpy(perm_addr, sdata->vif.addr, ETH_ALEN); break; } /* keep default if no AP interface present */ break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + if (local->hw.flags & IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF) { + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + continue; + if (!ieee80211_sdata_running(sdata)) + continue; + memcpy(perm_addr, sdata->vif.addr, ETH_ALEN); + goto out_unlock; + } + } + /* otherwise fall through */ default: /* assign a new address if possible -- try n_addresses first */ for (i = 0; i < local->hw.wiphy->n_addresses; i++) { @@ -1331,7 +1368,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } if (!used) { - memcpy(dev->perm_addr, + memcpy(perm_addr, local->hw.wiphy->addresses[i].addr, ETH_ALEN); break; @@ -1382,7 +1419,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } if (!used) { - memcpy(dev->perm_addr, tmp_addr, ETH_ALEN); + memcpy(perm_addr, tmp_addr, ETH_ALEN); break; } addr = (start & ~mask) | (val & mask); @@ -1391,6 +1428,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, break; } + out_unlock: mutex_unlock(&local->iflist_mtx); } @@ -1398,49 +1436,68 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) { - struct net_device *ndev; + struct net_device *ndev = NULL; struct ieee80211_sub_if_data *sdata = NULL; int ret, i; int txqs = 1; ASSERT_RTNL(); - if (local->hw.queues >= IEEE80211_NUM_ACS) - txqs = IEEE80211_NUM_ACS; - - ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size, - name, ieee80211_if_setup, txqs, 1); - if (!ndev) - return -ENOMEM; - dev_net_set(ndev, wiphy_net(local->hw.wiphy)); - - ndev->needed_headroom = local->tx_headroom + - 4*6 /* four MAC addresses */ - + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ - + 6 /* mesh */ - + 8 /* rfc1042/bridge tunnel */ - - ETH_HLEN /* ethernet hard_header_len */ - + IEEE80211_ENCRYPT_HEADROOM; - ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; - - ret = dev_alloc_name(ndev, ndev->name); - if (ret < 0) - goto fail; - - ieee80211_assign_perm_addr(local, ndev, type); - memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); - SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); - - /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ - sdata = netdev_priv(ndev); - ndev->ieee80211_ptr = &sdata->wdev; - memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN); - memcpy(sdata->name, ndev->name, IFNAMSIZ); + if (type == NL80211_IFTYPE_P2P_DEVICE) { + struct wireless_dev *wdev; + + sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, + GFP_KERNEL); + if (!sdata) + return -ENOMEM; + wdev = &sdata->wdev; + + sdata->dev = NULL; + strlcpy(sdata->name, name, IFNAMSIZ); + ieee80211_assign_perm_addr(local, wdev->address, type); + memcpy(sdata->vif.addr, wdev->address, ETH_ALEN); + } else { + if (local->hw.queues >= IEEE80211_NUM_ACS) + txqs = IEEE80211_NUM_ACS; + + ndev = alloc_netdev_mqs(sizeof(*sdata) + + local->hw.vif_data_size, + name, ieee80211_if_setup, txqs, 1); + if (!ndev) + return -ENOMEM; + dev_net_set(ndev, wiphy_net(local->hw.wiphy)); + + ndev->needed_headroom = local->tx_headroom + + 4*6 /* four MAC addresses */ + + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ + + 6 /* mesh */ + + 8 /* rfc1042/bridge tunnel */ + - ETH_HLEN /* ethernet hard_header_len */ + + IEEE80211_ENCRYPT_HEADROOM; + ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; + + ret = dev_alloc_name(ndev, ndev->name); + if (ret < 0) { + free_netdev(ndev); + return ret; + } + + ieee80211_assign_perm_addr(local, ndev->perm_addr, type); + memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); + SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); + + /* don't use IEEE80211_DEV_TO_SUB_IF -- it checks too much */ + sdata = netdev_priv(ndev); + ndev->ieee80211_ptr = &sdata->wdev; + memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN); + memcpy(sdata->name, ndev->name, IFNAMSIZ); + + sdata->dev = ndev; + } /* initialise type-independent data */ sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; - sdata->dev = ndev; #ifdef CONFIG_INET sdata->arp_filter_state = true; #endif @@ -1469,17 +1526,21 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); - if (params) { - ndev->ieee80211_ptr->use_4addr = params->use_4addr; - if (type == NL80211_IFTYPE_STATION) - sdata->u.mgd.use_4addr = params->use_4addr; - } + if (ndev) { + if (params) { + ndev->ieee80211_ptr->use_4addr = params->use_4addr; + if (type == NL80211_IFTYPE_STATION) + sdata->u.mgd.use_4addr = params->use_4addr; + } - ndev->features |= local->hw.netdev_features; + ndev->features |= local->hw.netdev_features; - ret = register_netdevice(ndev); - if (ret) - goto fail; + ret = register_netdevice(ndev); + if (ret) { + free_netdev(ndev); + return ret; + } + } mutex_lock(&local->iflist_mtx); list_add_tail_rcu(&sdata->list, &local->interfaces); @@ -1489,10 +1550,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, *new_wdev = &sdata->wdev; return 0; - - fail: - free_netdev(ndev); - return ret; } void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) @@ -1503,11 +1560,22 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); - /* clean up type-dependent data */ - ieee80211_clean_sdata(sdata); - synchronize_rcu(); - unregister_netdevice(sdata->dev); + + if (sdata->dev) { + unregister_netdevice(sdata->dev); + } else { + cfg80211_unregister_wdev(&sdata->wdev); + kfree(sdata); + } +} + +void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata) +{ + if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state))) + return; + ieee80211_do_stop(sdata, true); + ieee80211_teardown_sdata(sdata); } /* @@ -1518,6 +1586,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata, *tmp; LIST_HEAD(unreg_list); + LIST_HEAD(wdev_list); ASSERT_RTNL(); @@ -1525,13 +1594,20 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); - ieee80211_clean_sdata(sdata); - - unregister_netdevice_queue(sdata->dev, &unreg_list); + if (sdata->dev) + unregister_netdevice_queue(sdata->dev, &unreg_list); + else + list_add(&sdata->list, &wdev_list); } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); list_del(&unreg_list); + + list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { + list_del(&sdata->list); + cfg80211_unregister_wdev(&sdata->wdev); + kfree(sdata); + } } static int netdev_notify(struct notifier_block *nb, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 7ae678ba5d67..d27e61aaa71b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -402,7 +402,7 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) * Synchronize so the TX path can no longer be using * this key before we free/remove it. */ - synchronize_rcu(); + synchronize_net(); if (key->local) ieee80211_key_disable_hw_accel(key); diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 1bf7903496f8..bcffa6903129 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -276,7 +276,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) read_lock(&tpt_trig->trig.leddev_list_lock); list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) - led_brightness_set(led_cdev, LED_OFF); + led_set_brightness(led_cdev, LED_OFF); read_unlock(&tpt_trig->trig.leddev_list_lock); } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c26e231c733a..c80c4490351c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -150,13 +150,11 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) if (test_bit(SCAN_SW_SCANNING, &local->scanning) || test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) || - test_bit(SCAN_HW_SCANNING, &local->scanning)) + test_bit(SCAN_HW_SCANNING, &local->scanning) || + !local->ap_power_level) power = chan->max_power; else - power = local->power_constr_level ? - min(chan->max_power, - (chan->max_reg_power - local->power_constr_level)) : - chan->max_power; + power = min(chan->max_power, local->ap_power_level); if (local->user_power_level >= 0) power = min(power, local->user_power_level); @@ -207,6 +205,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = NULL; else if (ieee80211_vif_is_mesh(&sdata->vif)) { sdata->vif.bss_conf.bssid = zero; + } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + sdata->vif.bss_conf.bssid = sdata->vif.addr; + WARN_ONCE(changed & ~(BSS_CHANGED_IDLE), + "P2P Device BSS changed %#x", changed); } else { WARN_ON(1); return; @@ -362,9 +364,7 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, recalc_smps); - mutex_lock(&local->iflist_mtx); ieee80211_recalc_smps(local); - mutex_unlock(&local->iflist_mtx); } #ifdef CONFIG_INET @@ -514,6 +514,11 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { BIT(IEEE80211_STYPE_AUTH >> 4) | BIT(IEEE80211_STYPE_DEAUTH >> 4), }, + [NL80211_IFTYPE_P2P_DEVICE] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + }, }; static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { @@ -536,6 +541,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, int priv_size, i; struct wiphy *wiphy; + if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config || + !ops->add_interface || !ops->remove_interface || + !ops->configure_filter)) + return NULL; + if (WARN_ON(ops->sta_state && (ops->sta_add || ops->sta_remove))) return NULL; @@ -588,13 +598,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); - BUG_ON(!ops->tx); - BUG_ON(!ops->start); - BUG_ON(!ops->stop); - BUG_ON(!ops->config); - BUG_ON(!ops->add_interface); - BUG_ON(!ops->remove_interface); - BUG_ON(!ops->configure_filter); local->ops = ops; /* set up some defaults */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 85572353a7e3..ff0296c7bab8 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -109,11 +109,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, /* Disallow HT40+/- mismatch */ if (ie->ht_operation && - (local->_oper_channel_type == NL80211_CHAN_HT40MINUS || - local->_oper_channel_type == NL80211_CHAN_HT40PLUS) && + (sdata->vif.bss_conf.channel_type == NL80211_CHAN_HT40MINUS || + sdata->vif.bss_conf.channel_type == NL80211_CHAN_HT40PLUS) && (sta_channel_type == NL80211_CHAN_HT40MINUS || sta_channel_type == NL80211_CHAN_HT40PLUS) && - local->_oper_channel_type != sta_channel_type) + sdata->vif.bss_conf.channel_type != sta_channel_type) goto mismatch; return true; @@ -136,10 +136,13 @@ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) * mesh_accept_plinks_update - update accepting_plink in local mesh beacons * * @sdata: mesh interface in which mesh beacons are going to be updated + * + * Returns: beacon changed flag if the beacon content changed. */ -void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) +u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) { bool free_plinks; + u32 changed = 0; /* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0, * the mesh interface might be able to establish plinks with peers that @@ -149,8 +152,12 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) */ free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.mesh.accepting_plinks) - ieee80211_mesh_housekeeping_timer((unsigned long) sdata); + if (free_plinks != sdata->u.mesh.accepting_plinks) { + sdata->u.mesh.accepting_plinks = free_plinks; + changed = BSS_CHANGED_BEACON; + } + + return changed; } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -262,7 +269,6 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) neighbors = (neighbors > 15) ? 15 : neighbors; *pos++ = neighbors << 1; /* Mesh capability */ - ifmsh->accepting_plinks = mesh_plink_availables(sdata); *pos = MESHCONF_CAPAB_FORWARDING; *pos |= ifmsh->accepting_plinks ? MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; @@ -349,17 +355,18 @@ int mesh_add_ds_params_ie(struct sk_buff *skb, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan = local->oper_channel; u8 *pos; if (skb_tailroom(skb) < 3) return -ENOMEM; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[chan->band]; if (sband->band == IEEE80211_BAND_2GHZ) { pos = skb_put(skb, 2 + 1); *pos++ = WLAN_EID_DS_PARAMS; *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(local->hw.conf.channel->center_freq); + *pos++ = ieee80211_frequency_to_channel(chan->center_freq); } return 0; @@ -374,7 +381,7 @@ int mesh_add_ht_cap_ie(struct sk_buff *skb, sband = local->hw.wiphy->bands[local->oper_channel->band]; if (!sband->ht_cap.ht_supported || - local->_oper_channel_type == NL80211_CHAN_NO_HT) + sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) @@ -391,7 +398,8 @@ int mesh_add_ht_oper_ie(struct sk_buff *skb, { struct ieee80211_local *local = sdata->local; struct ieee80211_channel *channel = local->oper_channel; - enum nl80211_channel_type channel_type = local->_oper_channel_type; + enum nl80211_channel_type channel_type = + sdata->vif.bss_conf.channel_type; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[channel->band]; struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap; @@ -521,14 +529,13 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh) { - bool free_plinks; + u32 changed; ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); mesh_path_expire(sdata); - free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.mesh.accepting_plinks) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + changed = mesh_accept_plinks_update(sdata); + ieee80211_bss_info_change_notify(sdata, changed); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); @@ -603,12 +610,14 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sdata->local, - sdata->local->hw.conf.channel->band); + sdata->local->oper_channel->band); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT); + + netif_carrier_on(sdata->dev); } void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) @@ -616,9 +625,15 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + netif_carrier_off(sdata->dev); + + /* stop the beacon */ ifmsh->mesh_id_len = 0; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - sta_info_flush(local, NULL); + + /* flush STAs and mpaths on this iface */ + sta_info_flush(sdata->local, sdata); + mesh_path_flush_by_iface(sdata); del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index faaa39bcfd10..25d0f17dec71 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -215,6 +215,9 @@ struct mesh_rmc { /* Maximum number of paths per interface */ #define MESH_MAX_MPATHS 1024 +/* Number of frames buffered per destination for unresolved destinations */ +#define MESH_FRAME_QUEUE_LEN 10 + /* Public interfaces */ /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, @@ -282,7 +285,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *ie); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); -void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); +u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); void mesh_plink_deactivate(struct sta_info *sta); int mesh_plink_open(struct sta_info *sta); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 494bc39f61a4..47aeee2d8db1 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -17,8 +17,6 @@ #define MAX_METRIC 0xffffffff #define ARITH_SHIFT 8 -/* Number of frames buffered per destination for unresolved destinations */ -#define MESH_FRAME_QUEUE_LEN 10 #define MAX_PREQ_QUEUE_LEN 64 /* Destination only */ diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 075bc535c601..aa749818860e 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -203,23 +203,17 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) { struct sk_buff *skb; struct ieee80211_hdr *hdr; - struct sk_buff_head tmpq; unsigned long flags; rcu_assign_pointer(mpath->next_hop, sta); - __skb_queue_head_init(&tmpq); - spin_lock_irqsave(&mpath->frame_queue.lock, flags); - - while ((skb = __skb_dequeue(&mpath->frame_queue)) != NULL) { + skb_queue_walk(&mpath->frame_queue, skb) { hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); - __skb_queue_tail(&tmpq, skb); } - skb_queue_splice(&tmpq, &mpath->frame_queue); spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); } @@ -285,40 +279,42 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct mesh_path *from_mpath, bool copy) { - struct sk_buff *skb, *cp_skb = NULL; - struct sk_buff_head gateq, failq; + struct sk_buff *skb, *fskb, *tmp; + struct sk_buff_head failq; unsigned long flags; - int num_skbs; BUG_ON(gate_mpath == from_mpath); BUG_ON(!gate_mpath->next_hop); - __skb_queue_head_init(&gateq); __skb_queue_head_init(&failq); spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); skb_queue_splice_init(&from_mpath->frame_queue, &failq); spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); - num_skbs = skb_queue_len(&failq); - - while (num_skbs--) { - skb = __skb_dequeue(&failq); - if (copy) { - cp_skb = skb_copy(skb, GFP_ATOMIC); - if (cp_skb) - __skb_queue_tail(&failq, cp_skb); + skb_queue_walk_safe(&failq, fskb, tmp) { + if (skb_queue_len(&gate_mpath->frame_queue) >= + MESH_FRAME_QUEUE_LEN) { + mpath_dbg(gate_mpath->sdata, "mpath queue full!\n"); + break; } + skb = skb_copy(fskb, GFP_ATOMIC); + if (WARN_ON(!skb)) + break; + prepare_for_gate(skb, gate_mpath->dst, gate_mpath); - __skb_queue_tail(&gateq, skb); + skb_queue_tail(&gate_mpath->frame_queue, skb); + + if (copy) + continue; + + __skb_unlink(fskb, &failq); + kfree_skb(fskb); } - spin_lock_irqsave(&gate_mpath->frame_queue.lock, flags); - skb_queue_splice(&gateq, &gate_mpath->frame_queue); mpath_dbg(gate_mpath->sdata, "Mpath queue for gate %pM has %d frames\n", gate_mpath->dst, skb_queue_len(&gate_mpath->frame_queue)); - spin_unlock_irqrestore(&gate_mpath->frame_queue.lock, flags); if (!copy) return; @@ -531,7 +527,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) read_lock_bh(&pathtbl_resize_lock); memcpy(new_mpath->dst, dst, ETH_ALEN); - memset(new_mpath->rann_snd_addr, 0xff, ETH_ALEN); + eth_broadcast_addr(new_mpath->rann_snd_addr); new_mpath->is_root = false; new_mpath->sdata = sdata; new_mpath->flags = 0; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index af671b984df3..3ab34d816897 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -48,17 +48,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u8 *da, __le16 llid, __le16 plid, __le16 reason); static inline -void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) +u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); - mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata); } static inline -void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) +u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); - mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata); } /** @@ -117,7 +117,7 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; - if (local->_oper_channel_type == NL80211_CHAN_NO_HT) + if (sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) return 0; rcu_read_lock(); @@ -147,7 +147,8 @@ out: if (non_ht_sta) ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED; - else if (ht20_sta && local->_oper_channel_type > NL80211_CHAN_HT20) + else if (ht20_sta && + sdata->vif.bss_conf.channel_type > NL80211_CHAN_HT20) ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_20MHZ; else ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE; @@ -170,22 +171,21 @@ out: * @sta: mesh peer link to deactivate * * All mesh paths with this peer as next hop will be flushed + * Returns beacon changed flag if the beacon content changed. * * Locking: the caller must hold sta->lock */ -static bool __mesh_plink_deactivate(struct sta_info *sta) +static u32 __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated = false; + u32 changed = 0; - if (sta->plink_state == NL80211_PLINK_ESTAB) { - mesh_plink_dec_estab_count(sdata); - deactivated = true; - } + if (sta->plink_state == NL80211_PLINK_ESTAB) + changed = mesh_plink_dec_estab_count(sdata); sta->plink_state = NL80211_PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); - return deactivated; + return changed; } /** @@ -198,18 +198,17 @@ static bool __mesh_plink_deactivate(struct sta_info *sta) void mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated; + u32 changed; spin_lock_bh(&sta->lock); - deactivated = __mesh_plink_deactivate(sta); + changed = __mesh_plink_deactivate(sta); sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, sta->llid, sta->plid, sta->reason); spin_unlock_bh(&sta->lock); - if (deactivated) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + ieee80211_bss_info_change_notify(sdata, changed); } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, @@ -217,12 +216,14 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u8 *da, __le16 llid, __le16 plid, __le16 reason) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; + struct ieee80211_tx_info *info; struct ieee80211_mgmt *mgmt; bool include_plid = false; u16 peering_proto = 0; u8 *pos, ie_len = 4; int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + sizeof(mgmt->u.action.u.self_prot); + int err = -ENOMEM; skb = dev_alloc_skb(local->tx_headroom + hdr_len + @@ -238,6 +239,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.ie_len); if (!skb) return -1; + info = IEEE80211_SKB_CB(skb); skb_reserve(skb, local->tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); @@ -258,15 +260,18 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2); memcpy(pos + 2, &plid, 2); } - if (ieee80211_add_srates_ie(sdata, skb, true) || - ieee80211_add_ext_srates_ie(sdata, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true, + local->oper_channel->band) || + ieee80211_add_ext_srates_ie(sdata, skb, true, + local->oper_channel->band) || mesh_add_rsn_ie(skb, sdata) || mesh_add_meshid_ie(skb, sdata) || mesh_add_meshconf_ie(skb, sdata)) - return -1; + goto free; } else { /* WLAN_SP_MESH_PEERING_CLOSE */ + info->flags |= IEEE80211_TX_CTL_NO_ACK; if (mesh_add_meshid_ie(skb, sdata)) - return -1; + goto free; } /* Add Mesh Peering Management element */ @@ -285,11 +290,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, ie_len += 2; /* reason code */ break; default: - return -EINVAL; + err = -EINVAL; + goto free; } if (WARN_ON(skb_tailroom(skb) < 2 + ie_len)) - return -ENOMEM; + goto free; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PEER_MGMT; @@ -310,14 +316,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action != WLAN_SP_MESH_PEERING_CLOSE) { if (mesh_add_ht_cap_ie(skb, sdata) || mesh_add_ht_oper_ie(skb, sdata)) - return -1; + goto free; } if (mesh_add_vendor_ies(skb, sdata)) - return -1; + goto free; ieee80211_tx_skb(sdata, skb); return 0; +free: + kfree_skb(skb); + return err; } /** @@ -362,9 +371,14 @@ static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata, spin_lock_bh(&sta->lock); sta->last_rx = jiffies; + if (sta->plink_state == NL80211_PLINK_ESTAB) { + spin_unlock_bh(&sta->lock); + return sta; + } + sta->sta.supp_rates[band] = rates; if (elems->ht_cap_elem && - sdata->local->_oper_channel_type != NL80211_CHAN_NO_HT) + sdata->vif.bss_conf.channel_type != NL80211_CHAN_NO_HT) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems->ht_cap_elem, &sta->sta.ht_cap); @@ -523,7 +537,8 @@ int mesh_plink_open(struct sta_info *sta) spin_lock_bh(&sta->lock); get_random_bytes(&llid, 2); sta->llid = llid; - if (sta->plink_state != NL80211_PLINK_LISTEN) { + if (sta->plink_state != NL80211_PLINK_LISTEN && + sta->plink_state != NL80211_PLINK_BLOCKED) { spin_unlock_bh(&sta->lock); return -EBUSY; } @@ -541,15 +556,14 @@ int mesh_plink_open(struct sta_info *sta) void mesh_plink_block(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated; + u32 changed; spin_lock_bh(&sta->lock); - deactivated = __mesh_plink_deactivate(sta); + changed = __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_BLOCKED; spin_unlock_bh(&sta->lock); - if (deactivated) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + ieee80211_bss_info_change_notify(sdata, changed); } @@ -852,9 +866,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m del_timer(&sta->plink_timer); sta->plink_state = NL80211_PLINK_ESTAB; spin_unlock_bh(&sta->lock); - mesh_plink_inc_estab_count(sdata); + changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); break; @@ -888,9 +901,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m del_timer(&sta->plink_timer); sta->plink_state = NL80211_PLINK_ESTAB; spin_unlock_bh(&sta->lock); - mesh_plink_inc_estab_count(sdata); + changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, @@ -908,13 +920,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case CLS_ACPT: reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; - __mesh_plink_deactivate(sta); + changed |= __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_HOLDING; llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, llid, plid, reason); break; diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index accfa00ffcdf..a16b7b4b1e02 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -56,7 +56,6 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) u64 tsfdelta; spin_lock_bh(&ifmsh->sync_offset_lock); - if (ifmsh->sync_offset_clockdrift_max < beacon_int_fraction) { msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting\n", (long long) ifmsh->sync_offset_clockdrift_max); @@ -69,11 +68,11 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) tsfdelta = -beacon_int_fraction; ifmsh->sync_offset_clockdrift_max -= beacon_int_fraction; } + spin_unlock_bh(&ifmsh->sync_offset_lock); tsf = drv_get_tsf(local, sdata); if (tsf != -1ULL) drv_set_tsf(local, sdata, tsf + tsfdelta); - spin_unlock_bh(&ifmsh->sync_offset_lock); } static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f76b83341cf9..e714ed8bb198 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -88,8 +88,6 @@ MODULE_PARM_DESC(probe_wait_ms, #define TMR_RUNNING_TIMER 0 #define TMR_RUNNING_CHANSW 1 -#define DEAUTH_DISASSOC_LEN (24 /* hdr */ + 2 /* reason */) - /* * All cfg80211 functions have to be called outside a locked * section so that they can acquire a lock themselves... This @@ -146,6 +144,9 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) return; + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + return; + mod_timer(&sdata->u.mgd.bcn_mon_timer, round_jiffies_up(jiffies + sdata->u.mgd.beacon_timeout)); } @@ -182,15 +183,15 @@ static u32 ieee80211_config_ht_tx(struct ieee80211_sub_if_data *sdata, u16 ht_opmode; bool disable_40 = false; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[local->oper_channel->band]; switch (sdata->vif.bss_conf.channel_type) { case NL80211_CHAN_HT40PLUS: - if (local->hw.conf.channel->flags & IEEE80211_CHAN_NO_HT40PLUS) + if (local->oper_channel->flags & IEEE80211_CHAN_NO_HT40PLUS) disable_40 = true; break; case NL80211_CHAN_HT40MINUS: - if (local->hw.conf.channel->flags & IEEE80211_CHAN_NO_HT40MINUS) + if (local->oper_channel->flags & IEEE80211_CHAN_NO_HT40MINUS) disable_40 = true; break; default: @@ -326,6 +327,26 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, ieee80211_ie_build_ht_cap(pos, &ht_cap, cap); } +static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_supported_band *sband) +{ + u8 *pos; + u32 cap; + struct ieee80211_sta_vht_cap vht_cap; + + BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); + + memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); + + /* determine capability flags */ + cap = vht_cap.cap; + + /* reserve and fill IE */ + pos = skb_put(skb, sizeof(struct ieee80211_vht_capabilities) + 2); + ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); +} + static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -371,6 +392,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) 4 + /* power capability */ 2 + 2 * sband->n_channels + /* supported channels */ 2 + sizeof(struct ieee80211_ht_cap) + /* HT */ + 2 + sizeof(struct ieee80211_vht_capabilities) + /* VHT */ assoc_data->ie_len + /* extra IEs */ 9, /* WMM */ GFP_KERNEL); @@ -503,6 +525,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param, sband, local->oper_channel, ifmgd->ap_smps); + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) + ieee80211_add_vht_ie(sdata, skb, sband); + /* if present, add any custom non-vendor IEs that go after HT */ if (assoc_data->ie_len && assoc_data->ie) { noffset = ieee80211_ie_split_vendor(assoc_data->ie, @@ -547,48 +572,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) ieee80211_tx_skb(sdata, skb); } -static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, u16 stype, - u16 reason, bool send_frame, - u8 *frame_buf) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt = (void *)frame_buf; - - /* build frame */ - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); - mgmt->duration = 0; /* initialize only */ - mgmt->seq_ctrl = 0; /* initialize only */ - memcpy(mgmt->da, bssid, ETH_ALEN); - memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, bssid, ETH_ALEN); - /* u.deauth.reason_code == u.disassoc.reason_code */ - mgmt->u.deauth.reason_code = cpu_to_le16(reason); - - if (send_frame) { - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - DEAUTH_DISASSOC_LEN); - if (!skb) - return; - - skb_reserve(skb, local->hw.extra_tx_headroom); - - /* copy in frame */ - memcpy(skb_put(skb, DEAUTH_DISASSOC_LEN), - mgmt, DEAUTH_DISASSOC_LEN); - - if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) - IEEE80211_SKB_CB(skb)->flags |= - IEEE80211_TX_INTFL_DONT_ENCRYPT; - - drv_mgd_prepare_tx(local, sdata); - - ieee80211_tx_skb(sdata, skb); - } -} - void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { @@ -687,6 +670,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: shouldn't really modify cfg80211-owned data! */ ifmgd->associated->channel = sdata->local->oper_channel; + /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_QUEUE_STOP_REASON_CSA); out: @@ -704,16 +688,13 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) trace_api_chswitch_done(sdata, success); if (!success) { - /* - * If the channel switch was not successful, stay - * around on the old channel. We currently lack - * good handling of this situation, possibly we - * should just drop the association. - */ - sdata->local->csa_channel = sdata->local->oper_channel; + sdata_info(sdata, + "driver channel switch failed, disconnecting\n"); + ieee80211_queue_work(&sdata->local->hw, + &ifmgd->csa_connection_drop_work); + } else { + ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } - - ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } EXPORT_SYMBOL(ieee80211_chswitch_done); @@ -758,61 +739,111 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); - if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) + if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { + sdata_info(sdata, + "AP %pM switches to unsupported channel (%d MHz), disconnecting\n", + ifmgd->associated->bssid, new_freq); + ieee80211_queue_work(&sdata->local->hw, + &ifmgd->csa_connection_drop_work); return; + } sdata->local->csa_channel = new_ch; + ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + + if (sw_elem->mode) + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + if (sdata->local->ops->channel_switch) { /* use driver's channel switch callback */ - struct ieee80211_channel_switch ch_switch; - memset(&ch_switch, 0, sizeof(ch_switch)); - ch_switch.timestamp = timestamp; - if (sw_elem->mode) { - ch_switch.block_tx = true; - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - } - ch_switch.channel = new_ch; - ch_switch.count = sw_elem->count; - ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + struct ieee80211_channel_switch ch_switch = { + .timestamp = timestamp, + .block_tx = sw_elem->mode, + .channel = new_ch, + .count = sw_elem->count, + }; + drv_channel_switch(sdata->local, &ch_switch); return; } /* channel switch handled in software */ - if (sw_elem->count <= 1) { + if (sw_elem->count <= 1) ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); - } else { - if (sw_elem->mode) - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + else mod_timer(&ifmgd->chswitch_timer, - jiffies + - msecs_to_jiffies(sw_elem->count * - cbss->beacon_interval)); - } + TU_TO_EXP_TIME(sw_elem->count * + cbss->beacon_interval)); } static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, - u16 capab_info, u8 *pwr_constr_elem, - u8 pwr_constr_elem_len) + struct ieee80211_channel *channel, + const u8 *country_ie, u8 country_ie_len, + const u8 *pwr_constr_elem) { - struct ieee80211_conf *conf = &sdata->local->hw.conf; + struct ieee80211_country_ie_triplet *triplet; + int chan = ieee80211_frequency_to_channel(channel->center_freq); + int i, chan_pwr, chan_increment, new_ap_level; + bool have_chan_pwr = false; - if (!(capab_info & WLAN_CAPABILITY_SPECTRUM_MGMT)) + /* Invalid IE */ + if (country_ie_len % 2 || country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) return; - /* Power constraint IE length should be 1 octet */ - if (pwr_constr_elem_len != 1) - return; + triplet = (void *)(country_ie + 3); + country_ie_len -= 3; + + switch (channel->band) { + default: + WARN_ON_ONCE(1); + /* fall through */ + case IEEE80211_BAND_2GHZ: + case IEEE80211_BAND_60GHZ: + chan_increment = 1; + break; + case IEEE80211_BAND_5GHZ: + chan_increment = 4; + break; + } + + /* find channel */ + while (country_ie_len >= 3) { + u8 first_channel = triplet->chans.first_channel; - if ((*pwr_constr_elem <= conf->channel->max_reg_power) && - (*pwr_constr_elem != sdata->local->power_constr_level)) { - sdata->local->power_constr_level = *pwr_constr_elem; - ieee80211_hw_config(sdata->local, 0); + if (first_channel >= IEEE80211_COUNTRY_EXTENSION_ID) + goto next; + + for (i = 0; i < triplet->chans.num_channels; i++) { + if (first_channel + i * chan_increment == chan) { + have_chan_pwr = true; + chan_pwr = triplet->chans.max_power; + break; + } + } + if (have_chan_pwr) + break; + + next: + triplet++; + country_ie_len -= 3; } + + if (!have_chan_pwr) + return; + + new_ap_level = max_t(int, 0, chan_pwr - *pwr_constr_elem); + + if (sdata->local->ap_power_level == new_ap_level) + return; + + sdata_info(sdata, + "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", + new_ap_level, chan_pwr, *pwr_constr_elem, + sdata->u.mgd.bssid); + sdata->local->ap_power_level = new_ap_level; + ieee80211_hw_config(sdata->local, 0); } void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif) @@ -1007,6 +1038,16 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) ieee80211_change_ps(local); } +void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata) +{ + bool ps_allowed = ieee80211_powersave_allowed(sdata); + + if (sdata->vif.bss_conf.ps != ps_allowed) { + sdata->vif.bss_conf.ps = ps_allowed; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_PS); + } +} + void ieee80211_dynamic_ps_disable_work(struct work_struct *work) { struct ieee80211_local *local = @@ -1239,7 +1280,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, } use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); - if (sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) + if (sdata->local->oper_channel->band == IEEE80211_BAND_5GHZ) use_short_slot = true; if (use_protection != bss_conf->use_cts_prot) { @@ -1307,9 +1348,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); - ieee80211_recalc_smps(local); mutex_unlock(&local->iflist_mtx); + ieee80211_recalc_smps(local); + ieee80211_recalc_ps_vif(sdata); + netif_tx_start_all_queues(sdata->dev); netif_carrier_on(sdata->dev); } @@ -1356,7 +1399,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(sdata, ifmgd->bssid); if (sta) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, tx); + ieee80211_sta_tear_down_BA_sessions(sta, false); } mutex_unlock(&local->sta_mtx); @@ -1371,6 +1414,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, } local->ps_sdata = NULL; + /* disable per-vif ps */ + ieee80211_recalc_ps_vif(sdata); + /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ if (tx) drv_flush(local, false); @@ -1401,7 +1447,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); - local->power_constr_level = 0; + local->ap_power_level = 0; del_timer_sync(&local->dynamic_ps_timer); cancel_work_sync(&local->dynamic_ps_enable_work); @@ -1542,7 +1588,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ssid_len = ssid[1]; ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid_len, NULL, - 0, (u32) -1, true, false); + 0, (u32) -1, true, false, + ifmgd->associated->channel); } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); @@ -1645,19 +1692,21 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, ssid_len = ssid[1]; skb = ieee80211_build_probe_req(sdata, cbss->bssid, - (u32) -1, ssid + 2, ssid_len, + (u32) -1, + sdata->local->oper_channel, + ssid + 2, ssid_len, NULL, 0, true); return skb; } EXPORT_SYMBOL(ieee80211_ap_probereq_get); -static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) +static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, + bool transmit_frame) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - u8 bssid[ETH_ALEN]; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); if (!ifmgd->associated) { @@ -1665,27 +1714,24 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) return; } - memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - - sdata_info(sdata, "Connection to AP %pM lost\n", bssid); - ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - false, frame_buf); + transmit_frame, frame_buf); + ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; mutex_unlock(&ifmgd->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. */ - cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&local->mtx); ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); } -void ieee80211_beacon_connection_loss_work(struct work_struct *work) +static void ieee80211_beacon_connection_loss_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -1701,10 +1747,24 @@ void ieee80211_beacon_connection_loss_work(struct work_struct *work) rcu_read_unlock(); } - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) - __ieee80211_connection_loss(sdata); - else + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) { + sdata_info(sdata, "Connection to AP %pM lost\n", + ifmgd->bssid); + __ieee80211_disconnect(sdata, false); + } else { ieee80211_mgd_probe_ap(sdata, true); + } +} + +static void ieee80211_csa_connection_drop_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.csa_connection_drop_work); + + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + __ieee80211_disconnect(sdata, true); } void ieee80211_beacon_loss(struct ieee80211_vif *vif) @@ -2232,14 +2292,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->iflist_mtx); } - if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) && - (memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, - ETH_ALEN) == 0)) { - struct ieee80211_channel_sw_ie *sw_elem = - (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem; - ieee80211_sta_process_chanswitch(sdata, sw_elem, + if (elems->ch_switch_ie && + memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0) + ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie, bss, rx_status->mactime); - } } @@ -2326,7 +2382,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - if (rx_status->freq != local->hw.conf.channel->center_freq) + if (rx_status->freq != local->oper_channel->center_freq) return; if (ifmgd->assoc_data && !ifmgd->assoc_data->have_beacon && @@ -2490,21 +2546,19 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, !(ifmgd->flags & IEEE80211_STA_DISABLE_11N)) { struct ieee80211_supported_band *sband; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[local->oper_channel->band]; changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation, bssid, true); } - /* Note: country IE parsing is done for us by cfg80211 */ - if (elems.country_elem) { - /* TODO: IBSS also needs this */ - if (elems.pwr_constr_elem) - ieee80211_handle_pwr_constr(sdata, - le16_to_cpu(mgmt->u.probe_resp.capab_info), - elems.pwr_constr_elem, - elems.pwr_constr_elem_len); - } + if (elems.country_elem && elems.pwr_constr_elem && + mgmt->u.probe_resp.capab_info & + cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT)) + ieee80211_handle_pwr_constr(sdata, local->oper_channel, + elems.country_elem, + elems.country_elem_len, + elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); } @@ -2601,7 +2655,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, false, frame_buf); @@ -2611,7 +2665,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, * must be outside lock due to cfg80211, * but that's not a problem. */ - cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&local->mtx); ieee80211_recalc_idle(local); @@ -2673,7 +2727,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1], - NULL, 0, (u32) -1, true, false); + NULL, 0, (u32) -1, true, false, + auth_data->bss->channel); } auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; @@ -2894,6 +2949,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->monitor_work); cancel_work_sync(&ifmgd->beacon_connection_loss_work); + cancel_work_sync(&ifmgd->csa_connection_drop_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -2950,6 +3006,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); INIT_WORK(&ifmgd->beacon_connection_loss_work, ieee80211_beacon_connection_loss_work); + INIT_WORK(&ifmgd->csa_connection_drop_work, + ieee80211_csa_connection_drop_work); INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work); setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); @@ -3000,41 +3058,17 @@ int ieee80211_max_network_latency(struct notifier_block *nb, return 0; } -static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, - struct cfg80211_bss *cbss, bool assoc) +static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, + struct cfg80211_bss *cbss) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_bss *bss = (void *)cbss->priv; - struct sta_info *sta = NULL; - bool have_sta = false; - int err; int ht_cfreq; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; const u8 *ht_oper_ie; const struct ieee80211_ht_operation *ht_oper = NULL; struct ieee80211_supported_band *sband; - if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) - return -EINVAL; - - if (assoc) { - rcu_read_lock(); - have_sta = sta_info_get(sdata, cbss->bssid); - rcu_read_unlock(); - } - - if (!have_sta) { - sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); - if (!sta) - return -ENOMEM; - } - - mutex_lock(&local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&local->mtx); - - /* switch to the right channel */ sband = local->hw.wiphy->bands[cbss->channel->band]; ifmgd->flags &= ~IEEE80211_STA_DISABLE_40MHZ; @@ -3097,10 +3131,51 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, local->oper_channel = cbss->channel; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - if (sta) { + return 0; +} + +static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, + struct cfg80211_bss *cbss, bool assoc) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_bss *bss = (void *)cbss->priv; + struct sta_info *new_sta = NULL; + bool have_sta = false; + int err; + + if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) + return -EINVAL; + + if (assoc) { + rcu_read_lock(); + have_sta = sta_info_get(sdata, cbss->bssid); + rcu_read_unlock(); + } + + if (!have_sta) { + new_sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); + if (!new_sta) + return -ENOMEM; + } + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(sdata->local); + mutex_unlock(&local->mtx); + + if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[cbss->channel->band]; + + err = ieee80211_prep_channel(sdata, cbss); + if (err) { + sta_info_free(local, new_sta); + return err; + } ieee80211_get_rates(sband, bss->supp_rates, bss->supp_rates_len, @@ -3122,7 +3197,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, basic_rates = BIT(min_rate_index); } - sta->sta.supp_rates[cbss->channel->band] = rates; + new_sta->sta.supp_rates[cbss->channel->band] = rates; sdata->vif.bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ @@ -3145,10 +3220,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_BEACON_INT); if (assoc) - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(new_sta, IEEE80211_STA_AUTH); - err = sta_info_insert(sta); - sta = NULL; + err = sta_info_insert(new_sta); + new_sta = NULL; if (err) { sdata_info(sdata, "failed to insert STA entry for the AP (error %d)\n", @@ -3302,9 +3377,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } /* prepare assoc data */ - - ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; - ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; + + /* + * keep only the 40 MHz disable bit set as it might have + * been set during authentication already, all other bits + * should be reset for a new connection + */ + ifmgd->flags &= IEEE80211_STA_DISABLE_40MHZ; ifmgd->beacon_crc_valid = false; @@ -3320,21 +3399,34 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP || req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) { ifmgd->flags |= IEEE80211_STA_DISABLE_11N; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; netdev_info(sdata->dev, - "disabling HT due to WEP/TKIP use\n"); + "disabling HT/VHT due to WEP/TKIP use\n"); } } - if (req->flags & ASSOC_REQ_DISABLE_HT) + if (req->flags & ASSOC_REQ_DISABLE_HT) { ifmgd->flags |= IEEE80211_STA_DISABLE_11N; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } /* Also disable HT if we don't support it or the AP doesn't use WMM */ sband = local->hw.wiphy->bands[req->bss->channel->band]; if (!sband->ht_cap.ht_supported || local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used) { ifmgd->flags |= IEEE80211_STA_DISABLE_11N; - netdev_info(sdata->dev, - "disabling HT as WMM/QoS is not supported\n"); + if (!bss->wmm_used) + netdev_info(sdata->dev, + "disabling HT as WMM/QoS is not supported by the AP\n"); + } + + /* disable VHT if we don't support it or the AP doesn't use WMM */ + if (!sband->vht_cap.vht_supported || + local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used) { + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + if (!bss->wmm_used) + netdev_info(sdata->dev, + "disabling VHT as WMM/QoS is not supported by the AP\n"); } memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa)); @@ -3456,7 +3548,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct cfg80211_deauth_request *req) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); @@ -3471,17 +3563,21 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code); if (ifmgd->associated && - ether_addr_equal(ifmgd->associated->bssid, req->bssid)) + ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, true, frame_buf); - else + } else { + drv_mgd_prepare_tx(sdata->local, sdata); ieee80211_send_deauth_disassoc(sdata, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, true, frame_buf); + } + mutex_unlock(&ifmgd->mtx); - __cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + __cfg80211_send_deauth(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); @@ -3495,7 +3591,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 bssid[ETH_ALEN]; - u8 frame_buf[DEAUTH_DISASSOC_LEN]; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); @@ -3520,7 +3616,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, frame_buf); mutex_unlock(&ifmgd->mtx); - __cfg80211_send_disassoc(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN); + __cfg80211_send_disassoc(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 635c3250c668..83608ac16780 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -116,6 +116,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); @@ -144,6 +147,9 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); @@ -227,8 +233,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work) u32 dur = dep->duration; dep->duration = dur - roc->duration; roc->duration = dur; - list_del(&dep->list); - list_add(&dep->list, &roc->list); + list_move(&dep->list, &roc->list); } } out_unlock: diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 6e4fd32c6617..10de668eb9f6 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -56,7 +56,7 @@ static inline void rate_control_rate_init(struct sta_info *sta) if (!ref) return; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[local->oper_channel->band]; ref->ops->rate_init(ref->priv, sband, ista, priv_sta); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0cb4edee6af5..61c621e9273f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -60,7 +60,9 @@ static inline int should_drop_frame(struct sk_buff *skb, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | + RX_FLAG_FAILED_PLCP_CRC | + RX_FLAG_AMPDU_IS_ZEROLEN)) return 1; if (unlikely(skb->len < 16 + present_fcs_len)) return 1; @@ -91,10 +93,17 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (status->flag & RX_FLAG_HT) /* HT info */ len += 3; + if (status->flag & RX_FLAG_AMPDU_DETAILS) { + /* padding */ + while (len & 3) + len++; + len += 8; + } + return len; } -/** +/* * ieee80211_add_rx_radiotap_header - add radiotap header * * add a radiotap header containing all the fields which the hardware provided. @@ -215,6 +224,37 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; *pos++ = status->rate_idx; } + + if (status->flag & RX_FLAG_AMPDU_DETAILS) { + u16 flags = 0; + + /* ensure 4 byte alignment */ + while ((pos - (u8 *)rthdr) & 3) + pos++; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS); + put_unaligned_le32(status->ampdu_reference, pos); + pos += 4; + if (status->flag & RX_FLAG_AMPDU_REPORT_ZEROLEN) + flags |= IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN; + if (status->flag & RX_FLAG_AMPDU_IS_ZEROLEN) + flags |= IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN; + if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN) + flags |= IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN; + if (status->flag & RX_FLAG_AMPDU_IS_LAST) + flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST; + if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR) + flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR; + if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) + flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN; + put_unaligned_le16(flags, pos); + pos += 2; + if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) + *pos++ = status->ampdu_delimiter_crc; + else + *pos++ = 0; + *pos++ = 0; + } } /* @@ -2268,7 +2308,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; case WLAN_CATEGORY_SPECTRUM_MGMT: - if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) + if (status->band != IEEE80211_BAND_5GHZ) break; if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -2772,8 +2812,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, if (!bssid) { if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) return 0; - } else if (!ieee80211_bssid_match(bssid, - sdata->vif.addr)) { + } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { /* * Accept public action frames even when the * BSSID doesn't match, this is used for P2P @@ -2793,9 +2832,18 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2)) return 0; break; + case NL80211_IFTYPE_P2P_DEVICE: + if (!ieee80211_is_public_action(hdr, skb->len) && + !ieee80211_is_probe_req(hdr->frame_control) && + !ieee80211_is_probe_resp(hdr->frame_control) && + !ieee80211_is_beacon(hdr->frame_control)) + return 0; + if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) + status->rx_flags &= ~IEEE80211_RX_RA_MATCH; + break; default: /* should never get here */ - WARN_ON(1); + WARN_ON_ONCE(1); break; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 839dd9737989..c4cdbde24fd3 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -407,7 +407,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, enum ieee80211_band band = local->hw.conf.channel->band; sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx));; + lockdep_is_held(&local->mtx)); for (i = 0; i < local->scan_req->n_ssids; i++) ieee80211_send_probe_req( @@ -416,7 +416,8 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->scan_req->ssids[i].ssid_len, local->scan_req->ie, local->scan_req->ie_len, local->scan_req->rates[band], false, - local->scan_req->no_cck); + local->scan_req->no_cck, + local->hw.conf.channel); /* * After sending probe requests, wait for probe responses @@ -479,11 +480,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->ops->hw_scan) { __set_bit(SCAN_HW_SCANNING, &local->scanning); } else if ((req->n_channels == 1) && - (req->channels[0]->center_freq == - local->hw.conf.channel->center_freq)) { - - /* If we are scanning only on the current channel, then - * we do not need to stop normal activities + (req->channels[0] == local->oper_channel)) { + /* + * If we are scanning only on the operating channel + * then we do not need to stop normal activities */ unsigned long next_delay; @@ -917,6 +917,7 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; + struct ieee80211_sched_scan_ies sched_scan_ies; int ret, i; mutex_lock(&local->mtx); @@ -935,33 +936,28 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, if (!local->hw.wiphy->bands[i]) continue; - local->sched_scan_ies.ie[i] = kzalloc(2 + - IEEE80211_MAX_SSID_LEN + - local->scan_ies_len + - req->ie_len, - GFP_KERNEL); - if (!local->sched_scan_ies.ie[i]) { + sched_scan_ies.ie[i] = kzalloc(2 + IEEE80211_MAX_SSID_LEN + + local->scan_ies_len + + req->ie_len, + GFP_KERNEL); + if (!sched_scan_ies.ie[i]) { ret = -ENOMEM; goto out_free; } - local->sched_scan_ies.len[i] = - ieee80211_build_preq_ies(local, - local->sched_scan_ies.ie[i], + sched_scan_ies.len[i] = + ieee80211_build_preq_ies(local, sched_scan_ies.ie[i], req->ie, req->ie_len, i, (u32) -1, 0); } - ret = drv_sched_scan_start(local, sdata, req, - &local->sched_scan_ies); - if (ret == 0) { + ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); + if (ret == 0) rcu_assign_pointer(local->sched_scan_sdata, sdata); - goto out; - } out_free: while (i > 0) - kfree(local->sched_scan_ies.ie[--i]); + kfree(sched_scan_ies.ie[--i]); out: mutex_unlock(&local->mtx); return ret; @@ -970,7 +966,7 @@ out: int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - int ret = 0, i; + int ret = 0; mutex_lock(&local->mtx); @@ -979,12 +975,9 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) goto out; } - if (rcu_access_pointer(local->sched_scan_sdata)) { - for (i = 0; i < IEEE80211_NUM_BANDS; i++) - kfree(local->sched_scan_ies.ie[i]); - + if (rcu_access_pointer(local->sched_scan_sdata)) drv_sched_scan_stop(local, sdata); - } + out: mutex_unlock(&local->mtx); @@ -1006,7 +999,6 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, sched_scan_stopped_work); - int i; mutex_lock(&local->mtx); @@ -1015,9 +1007,6 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) return; } - for (i = 0; i < IEEE80211_NUM_BANDS; i++) - kfree(local->sched_scan_ies.ie[i]); - rcu_assign_pointer(local->sched_scan_sdata, NULL); mutex_unlock(&local->mtx); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 06fa75ceb025..797dd36a220d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -91,6 +91,70 @@ static int sta_info_hash_del(struct ieee80211_local *local, return -ENOENT; } +static void free_sta_work(struct work_struct *wk) +{ + struct sta_info *sta = container_of(wk, struct sta_info, free_sta_wk); + int ac, i; + struct tid_ampdu_tx *tid_tx; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + + /* + * At this point, when being called as call_rcu callback, + * neither mac80211 nor the driver can reference this + * sta struct any more except by still existing timers + * associated with this station that we clean up below. + */ + + if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + BUG_ON(!sdata->bss); + + clear_sta_flag(sta, WLAN_STA_PS_STA); + + atomic_dec(&sdata->bss->num_sta_ps); + sta_info_recalc_tim(sta); + } + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->tx_filtered[ac]); + } + +#ifdef CONFIG_MAC80211_MESH + if (ieee80211_vif_is_mesh(&sdata->vif)) { + mesh_accept_plinks_update(sdata); + mesh_plink_deactivate(sta); + del_timer_sync(&sta->plink_timer); + } +#endif + + cancel_work_sync(&sta->drv_unblock_wk); + + /* + * Destroy aggregation state here. It would be nice to wait for the + * driver to finish aggregation stop and then clean up, but for now + * drivers have to handle aggregation stop being requested, followed + * directly by station destruction. + */ + for (i = 0; i < STA_TID_NUM; i++) { + tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); + if (!tid_tx) + continue; + __skb_queue_purge(&tid_tx->pending); + kfree(tid_tx); + } + + sta_info_free(local, sta); +} + +static void free_sta_rcu(struct rcu_head *h) +{ + struct sta_info *sta = container_of(h, struct sta_info, rcu_head); + + ieee80211_queue_work(&sta->local->hw, &sta->free_sta_wk); +} + /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) @@ -241,6 +305,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); + INIT_WORK(&sta->free_sta_wk, free_sta_work); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -654,8 +719,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - int ret, i, ac; - struct tid_ampdu_tx *tid_tx; + int ret, i; might_sleep(); @@ -674,7 +738,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) * will be sufficient. */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions(sta, false); ret = sta_info_hash_del(local, sta); if (ret) @@ -711,65 +775,14 @@ int __must_check __sta_info_destroy(struct sta_info *sta) WARN_ON_ONCE(ret != 0); } - /* - * At this point, after we wait for an RCU grace period, - * neither mac80211 nor the driver can reference this - * sta struct any more except by still existing timers - * associated with this station that we clean up below. - */ - synchronize_rcu(); - - if (test_sta_flag(sta, WLAN_STA_PS_STA)) { - BUG_ON(!sdata->bss); - - clear_sta_flag(sta, WLAN_STA_PS_STA); - - atomic_dec(&sdata->bss->num_sta_ps); - sta_info_recalc_tim(sta); - } - - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); - __skb_queue_purge(&sta->ps_tx_buf[ac]); - __skb_queue_purge(&sta->tx_filtered[ac]); - } - -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_accept_plinks_update(sdata); -#endif - sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - cancel_work_sync(&sta->drv_unblock_wk); - cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL); rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { - mesh_plink_deactivate(sta); - del_timer_sync(&sta->plink_timer); - } -#endif - - /* - * Destroy aggregation state here. It would be nice to wait for the - * driver to finish aggregation stop and then clean up, but for now - * drivers have to handle aggregation stop being requested, followed - * directly by station destruction. - */ - for (i = 0; i < STA_TID_NUM; i++) { - tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); - if (!tid_tx) - continue; - __skb_queue_purge(&tid_tx->pending); - kfree(tid_tx); - } - - sta_info_free(local, sta); + call_rcu(&sta->rcu_head, free_sta_rcu); return 0; } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index a470e1123a55..c88f161f8118 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -287,6 +287,7 @@ struct sta_ampdu_mlme { struct sta_info { /* General information, mostly static */ struct list_head list; + struct rcu_head rcu_head; struct sta_info __rcu *hnext; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -297,6 +298,7 @@ struct sta_info { spinlock_t lock; struct work_struct drv_unblock_wk; + struct work_struct free_sta_wk; u16 listen_interval; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8cd72914cdaf..3af0cc4130f1 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -34,7 +34,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, skb_queue_len(&local->skb_queue_unreliable); while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT && (skb = skb_dequeue(&local->skb_queue_unreliable))) { - dev_kfree_skb_irq(skb); + ieee80211_free_txskb(hw, skb); tmp--; I802_DEBUG_INC(local->tx_status_drop); } @@ -159,7 +159,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", skb_queue_len(&sta->tx_filtered[ac]), !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); } static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid) @@ -517,21 +517,41 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = (unsigned long)skb; + bool found = false; + acked = info->flags & IEEE80211_TX_STAT_ACK; - /* - * TODO: When we have non-netdev frame TX, - * we cannot use skb->dev->ieee80211_ptr - */ + rcu_read_lock(); + + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!sdata->dev) + continue; + + if (skb->dev != sdata->dev) + continue; - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) - cfg80211_probe_status(skb->dev, hdr->addr1, + found = true; + break; + } + + if (!skb->dev) { + sdata = rcu_dereference(local->p2p_sdata); + if (sdata) + found = true; + } + + if (!found) + skb->dev = NULL; + else if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) { + cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); - else - cfg80211_mgmt_tx_status( - skb->dev->ieee80211_ptr, cookie, skb->data, - skb->len, acked, GFP_ATOMIC); + } else { + cfg80211_mgmt_tx_status(&sdata->wdev, cookie, skb->data, + skb->len, acked, GFP_ATOMIC); + } + + rcu_read_unlock(); } if (unlikely(info->ack_frame_id)) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index c6d33b55b2df..18d9c8a52e9e 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -24,7 +24,7 @@ __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ - __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") + __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" @@ -274,9 +274,12 @@ TRACE_EVENT(drv_config, __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; __entry->max_sleep_period = local->hw.conf.max_sleep_period; __entry->listen_interval = local->hw.conf.listen_interval; - __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count; - __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; - __entry->center_freq = local->hw.conf.channel->center_freq; + __entry->long_frame_max_tx_count = + local->hw.conf.long_frame_max_tx_count; + __entry->short_frame_max_tx_count = + local->hw.conf.short_frame_max_tx_count; + __entry->center_freq = local->hw.conf.channel ? + local->hw.conf.channel->center_freq : 0; __entry->channel_type = local->hw.conf.channel_type; __entry->smps = local->hw.conf.smps_mode; ), diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c5e8c9c31f76..c9bf83f36657 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -55,7 +55,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) return 0; - sband = local->hw.wiphy->bands[tx->channel->band]; + sband = local->hw.wiphy->bands[info->band]; txrate = &sband->bitrates[info->control.rates[0].idx]; erp = txrate->flags & IEEE80211_RATE_ERP_G; @@ -354,7 +354,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) total += skb_queue_len(&sta->ps_tx_buf[ac]); if (skb) { purged++; - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); break; } } @@ -466,7 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) ps_dbg(tx->sdata, "STA %pM TX buffer for AC %d full - dropping oldest frame\n", sta->sta.addr, ac); - dev_kfree_skb(old); + ieee80211_free_txskb(&local->hw, old); } else tx->local->total_ps_buffered++; @@ -580,7 +580,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = NULL; else skip_hw = (tx->key->conf.flags & - IEEE80211_KEY_FLAG_SW_MGMT) && + IEEE80211_KEY_FLAG_SW_MGMT_TX) && ieee80211_is_mgmt(hdr->frame_control); break; case WLAN_CIPHER_SUITE_AES_CMAC: @@ -615,7 +615,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) memset(&txrc, 0, sizeof(txrc)); - sband = tx->local->hw.wiphy->bands[tx->channel->band]; + sband = tx->local->hw.wiphy->bands[info->band]; len = min_t(u32, tx->skb->len + FCS_LEN, tx->local->hw.wiphy->frag_threshold); @@ -626,13 +626,13 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.bss_conf = &tx->sdata->vif.bss_conf; txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[tx->channel->band]; + txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; memcpy(txrc.rate_idx_mcs_mask, - tx->sdata->rc_rateidx_mcs_mask[tx->channel->band], + tx->sdata->rc_rateidx_mcs_mask[info->band], sizeof(txrc.rate_idx_mcs_mask)); txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || @@ -667,7 +667,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) "scanning and associated. Target station: " "%pM on %d GHz band\n", tx->sdata->name, hdr->addr1, - tx->channel->band ? 5 : 2)) + info->band ? 5 : 2)) return TX_DROP; /* @@ -1103,7 +1103,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, spin_unlock(&tx->sta->lock); if (purge_skb) - dev_kfree_skb(purge_skb); + ieee80211_free_txskb(&tx->local->hw, purge_skb); } /* reset session timer */ @@ -1131,7 +1131,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->skb = skb; tx->local = local; tx->sdata = sdata; - tx->channel = local->hw.conf.channel; __skb_queue_head_init(&tx->skbs); /* @@ -1204,6 +1203,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, struct sk_buff_head *skbs, bool txpending) { + struct ieee80211_tx_control control; struct sk_buff *skb, *tmp; unsigned long flags; @@ -1214,7 +1214,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (WARN_ON_ONCE(q >= local->hw.queues)) { __skb_unlink(skb, skbs); - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); continue; } #endif @@ -1240,10 +1240,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); info->control.vif = vif; - info->control.sta = sta; + control.sta = sta; __skb_unlink(skb, skbs); - drv_tx(local, skb); + drv_tx(local, &control, skb); } return true; @@ -1356,7 +1356,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) if (unlikely(res == TX_DROP)) { I802_DEBUG_INC(tx->local->tx_handlers_drop); if (tx->skb) - dev_kfree_skb(tx->skb); + ieee80211_free_txskb(&tx->local->hw, tx->skb); else __skb_queue_purge(&tx->skbs); return -1; @@ -1393,14 +1393,13 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, res_prepare = ieee80211_tx_prepare(sdata, &tx, skb); if (unlikely(res_prepare == TX_DROP)) { - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); goto out; } else if (unlikely(res_prepare == TX_QUEUED)) { goto out; } - tx.channel = local->hw.conf.channel; - info->band = tx.channel->band; + info->band = local->hw.conf.channel->band; /* set up hw_queue value early */ if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) || @@ -1466,7 +1465,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) headroom = max_t(int, 0, headroom); if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) { - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); rcu_read_unlock(); return; } @@ -1720,7 +1719,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info; - int ret = NETDEV_TX_BUSY, head_need; + int head_need; u16 ethertype, hdrlen, meshhdrlen = 0; __le16 fc; struct ieee80211_hdr hdr; @@ -1736,10 +1735,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, u32 info_flags = 0; u16 info_id = 0; - if (unlikely(skb->len < ETH_HLEN)) { - ret = NETDEV_TX_OK; + if (unlikely(skb->len < ETH_HLEN)) goto fail; - } /* convert Ethernet header to proper 802.11 header (based on * operation mode) */ @@ -1787,7 +1784,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { /* Do not send frames with mesh_ttl == 0 */ sdata->u.mesh.mshstats.dropped_frames_ttl++; - ret = NETDEV_TX_OK; goto fail; } rcu_read_lock(); @@ -1874,10 +1870,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (tdls_direct) { /* link during setup - throw out frames to peer */ - if (!tdls_auth) { - ret = NETDEV_TX_OK; + if (!tdls_auth) goto fail; - } /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); @@ -1911,7 +1905,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, hdrlen = 24; break; default: - ret = NETDEV_TX_OK; goto fail; } @@ -1956,7 +1949,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, I802_DEBUG_INC(local->tx_handlers_drop_unauth_port); - ret = NETDEV_TX_OK; goto fail; } @@ -2011,10 +2003,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, skb = skb_clone(skb, GFP_ATOMIC); kfree_skb(tmp_skb); - if (!skb) { - ret = NETDEV_TX_OK; + if (!skb) goto fail; - } } hdr.frame_control = fc; @@ -2060,8 +2050,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, head_need += IEEE80211_ENCRYPT_HEADROOM; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); - if (ieee80211_skb_resize(sdata, skb, head_need, true)) - goto fail; + if (ieee80211_skb_resize(sdata, skb, head_need, true)) { + ieee80211_free_txskb(&local->hw, skb); + return NETDEV_TX_OK; + } } if (encaps_data) { @@ -2117,10 +2109,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; fail: - if (ret == NETDEV_TX_OK) - dev_kfree_skb(skb); - - return ret; + dev_kfree_skb(skb); + return NETDEV_TX_OK; } @@ -2196,7 +2186,7 @@ void ieee80211_tx_pending(unsigned long data) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (WARN_ON(!info->control.vif)) { - kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); continue; } @@ -2295,12 +2285,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_if_ap *ap = NULL; struct beacon_data *beacon; - struct ieee80211_supported_band *sband; - enum ieee80211_band band = local->hw.conf.channel->band; + enum ieee80211_band band = local->oper_channel->band; struct ieee80211_tx_rate_control txrc; - sband = local->hw.wiphy->bands[band]; - rcu_read_lock(); sdata = vif_to_sdata(vif); @@ -2410,7 +2397,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); - memset(mgmt->da, 0xff, ETH_ALEN); + eth_broadcast_addr(mgmt->da); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.beacon.beacon_int = @@ -2422,9 +2409,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - if (ieee80211_add_srates_ie(sdata, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true, band) || mesh_add_ds_params_ie(skb, sdata) || - ieee80211_add_ext_srates_ie(sdata, skb, true) || + ieee80211_add_ext_srates_ie(sdata, skb, true, band) || mesh_add_rsn_ie(skb, sdata) || mesh_add_ht_cap_ie(skb, sdata) || mesh_add_ht_oper_ie(skb, sdata) || @@ -2447,12 +2434,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memset(&txrc, 0, sizeof(txrc)); txrc.hw = hw; - txrc.sband = sband; + txrc.sband = local->hw.wiphy->bands[band]; txrc.bss_conf = &sdata->vif.bss_conf; txrc.skb = skb; txrc.reported_rate.idx = -1; txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; - if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1) + if (txrc.rate_idx_mask == (1 << txrc.sband->n_bitrates) - 1) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; @@ -2476,7 +2463,8 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ieee80211_if_ap *ap = NULL; - struct sk_buff *presp = NULL, *skb = NULL; + struct sk_buff *skb = NULL; + struct probe_resp *presp = NULL; struct ieee80211_hdr *hdr; struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); @@ -2490,10 +2478,12 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw, if (!presp) goto out; - skb = skb_copy(presp, GFP_ATOMIC); + skb = dev_alloc_skb(presp->len); if (!skb) goto out; + memcpy(skb_put(skb, presp->len), presp->data, presp->len); + hdr = (struct ieee80211_hdr *) skb->data; memset(hdr->addr1, 0, sizeof(hdr->addr1)); @@ -2604,9 +2594,9 @@ struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw, memset(hdr, 0, sizeof(*hdr)); hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ); - memset(hdr->addr1, 0xff, ETH_ALEN); + eth_broadcast_addr(hdr->addr1); memcpy(hdr->addr2, vif->addr, ETH_ALEN); - memset(hdr->addr3, 0xff, ETH_ALEN); + eth_broadcast_addr(hdr->addr3); pos = skb_put(skb, ie_ssid_len); *pos++ = WLAN_EID_SSID; @@ -2703,8 +2693,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, info = IEEE80211_SKB_CB(skb); tx.flags |= IEEE80211_TX_PS_BUFFERED; - tx.channel = local->hw.conf.channel; - info->band = tx.channel->band; + info->band = local->oper_channel->band; if (invoke_tx_handlers(&tx)) skb = NULL; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 39b82fee4904..22ca35054dd0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -276,6 +276,9 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue) list_for_each_entry_rcu(sdata, &local->interfaces, list) { int ac; + if (!sdata->dev) + continue; + if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) continue; @@ -364,6 +367,9 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, list_for_each_entry_rcu(sdata, &local->interfaces, list) { int ac; + if (!sdata->dev) + continue; + for (ac = 0; ac < n_acs; ac++) { if (sdata->vif.hw_queue[ac] == queue || sdata->vif.cab_queue == queue) @@ -768,8 +774,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elem_parse_failed = true; break; case WLAN_EID_CHANNEL_SWITCH: - elems->ch_switch_elem = pos; - elems->ch_switch_elem_len = elen; + if (elen != sizeof(struct ieee80211_channel_sw_ie)) { + elem_parse_failed = true; + break; + } + elems->ch_switch_ie = (void *)pos; break; case WLAN_EID_QUIET: if (!elems->quiet_elem) { @@ -783,8 +792,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->country_elem_len = elen; break; case WLAN_EID_PWR_CONSTRAINT: + if (elen != 1) { + elem_parse_failed = true; + break; + } elems->pwr_constr_elem = pos; - elems->pwr_constr_elem_len = elen; break; case WLAN_EID_TIMEOUT_INTERVAL: elems->timeout_int = pos; @@ -832,7 +844,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, memset(&qparam, 0, sizeof(qparam)); - use_11b = (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) && + use_11b = (local->oper_channel->band == IEEE80211_BAND_2GHZ) && !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE); /* @@ -899,7 +911,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, drv_conf_tx(local, sdata, ac, &qparam); } - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { + if (sdata->vif.type != NL80211_IFTYPE_MONITOR && + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { sdata->vif.bss_conf.qos = enable_qos; if (bss_notify) ieee80211_bss_info_change_notify(sdata, @@ -919,7 +932,7 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, if ((supp_rates[i] & 0x7f) * 5 > 110) have_higher_than_11mbit = 1; - if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && + if (local->oper_channel->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit) sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; else @@ -994,6 +1007,45 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } +void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, u16 stype, u16 reason, + bool send_frame, u8 *frame_buf) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt = (void *)frame_buf; + + /* build frame */ + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); + mgmt->duration = 0; /* initialize only */ + mgmt->seq_ctrl = 0; /* initialize only */ + memcpy(mgmt->da, bssid, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, bssid, ETH_ALEN); + /* u.deauth.reason_code == u.disassoc.reason_code */ + mgmt->u.deauth.reason_code = cpu_to_le16(reason); + + if (send_frame) { + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + IEEE80211_DEAUTH_FRAME_LEN); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + /* copy in frame */ + memcpy(skb_put(skb, IEEE80211_DEAUTH_FRAME_LEN), + mgmt, IEEE80211_DEAUTH_FRAME_LEN); + + if (sdata->vif.type != NL80211_IFTYPE_STATION || + !(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED)) + IEEE80211_SKB_CB(skb)->flags |= + IEEE80211_TX_INTFL_DONT_ENCRYPT; + + ieee80211_tx_skb(sdata, skb); + } +} + int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len, enum ieee80211_band band, u32 rate_mask, @@ -1100,6 +1152,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u32 ratemask, + struct ieee80211_channel *chan, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, bool directed) @@ -1109,7 +1162,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; size_t buf_len; u8 *buf; - u8 chan; + u8 chan_no; /* FIXME: come up with a proper value */ buf = kmalloc(200 + ie_len, GFP_KERNEL); @@ -1122,14 +1175,12 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, * badly-behaved APs don't respond when this parameter is included. */ if (directed) - chan = 0; + chan_no = 0; else - chan = ieee80211_frequency_to_channel( - local->hw.conf.channel->center_freq); + chan_no = ieee80211_frequency_to_channel(chan->center_freq); - buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, - local->hw.conf.channel->band, - ratemask, chan); + buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, chan->band, + ratemask, chan_no); skb = ieee80211_probereq_get(&local->hw, &sdata->vif, ssid, ssid_len, @@ -1154,11 +1205,13 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck) + u32 ratemask, bool directed, bool no_cck, + struct ieee80211_channel *channel) { struct sk_buff *skb; - skb = ieee80211_build_probe_req(sdata, dst, ratemask, ssid, ssid_len, + skb = ieee80211_build_probe_req(sdata, dst, ratemask, channel, + ssid, ssid_len, ie, ie_len, directed); if (skb) { if (no_cck) @@ -1359,7 +1412,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC | - BSS_CHANGED_ARP_FILTER; + BSS_CHANGED_ARP_FILTER | + BSS_CHANGED_PS; mutex_lock(&sdata->u.mgd.mtx); ieee80211_bss_info_change_notify(sdata, changed); mutex_unlock(&sdata->u.mgd.mtx); @@ -1385,6 +1439,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_MONITOR: /* ignore virtual */ break; + case NL80211_IFTYPE_P2P_DEVICE: + changed = BSS_CHANGED_IDLE; + break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -1549,14 +1606,13 @@ static int check_mgd_smps(struct ieee80211_if_managed *ifmgd, return 0; } -/* must hold iflist_mtx */ void ieee80211_recalc_smps(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF; int count = 0; - lockdep_assert_held(&local->iflist_mtx); + mutex_lock(&local->iflist_mtx); /* * This function could be improved to handle multiple @@ -1571,6 +1627,8 @@ void ieee80211_recalc_smps(struct ieee80211_local *local) list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + continue; if (sdata->vif.type != NL80211_IFTYPE_STATION) goto set; @@ -1583,12 +1641,14 @@ void ieee80211_recalc_smps(struct ieee80211_local *local) } if (smps_mode == local->smps_mode) - return; + goto unlock; set: local->smps_mode = smps_mode; /* changed flag is auto-detected for this */ ieee80211_hw_config(local, 0); + unlock: + mutex_unlock(&local->iflist_mtx); } static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) @@ -1809,7 +1869,8 @@ ieee80211_ht_oper_to_channel_type(struct ieee80211_ht_operation *ht_oper) } int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic) + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -1817,7 +1878,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, u8 i, rates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[band]; rates = sband->n_bitrates; if (rates > 8) rates = 8; @@ -1840,7 +1901,8 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, } int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic) + struct sk_buff *skb, bool need_basic, + enum ieee80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -1848,7 +1910,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[band]; exrates = sband->n_bitrates; if (exrates > 8) exrates -= 8; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c19b214ffd57..fefa514b9917 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -356,6 +356,55 @@ config NETFILTER_NETLINK_QUEUE_CT If this option is enabled, NFQUEUE can include Connection Tracking information together with the packet is the enqueued via NFNETLINK. +config NF_NAT + tristate + +config NF_NAT_NEEDED + bool + depends on NF_NAT + default y + +config NF_NAT_PROTO_DCCP + tristate + depends on NF_NAT && NF_CT_PROTO_DCCP + default NF_NAT && NF_CT_PROTO_DCCP + +config NF_NAT_PROTO_UDPLITE + tristate + depends on NF_NAT && NF_CT_PROTO_UDPLITE + default NF_NAT && NF_CT_PROTO_UDPLITE + +config NF_NAT_PROTO_SCTP + tristate + default NF_NAT && NF_CT_PROTO_SCTP + depends on NF_NAT && NF_CT_PROTO_SCTP + select LIBCRC32C + +config NF_NAT_AMANDA + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_AMANDA + +config NF_NAT_FTP + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_FTP + +config NF_NAT_IRC + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_IRC + +config NF_NAT_SIP + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_SIP + +config NF_NAT_TFTP + tristate + depends on NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_TFTP + endif # NF_CONNTRACK # transparent proxy support @@ -599,6 +648,16 @@ config NETFILTER_XT_TARGET_MARK (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). +config NETFILTER_XT_TARGET_NETMAP + tristate '"NETMAP" target support' + depends on NF_NAT + ---help--- + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' default m if NETFILTER_ADVANCED=n @@ -621,19 +680,6 @@ config NETFILTER_XT_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_TARGET_NOTRACK - tristate '"NOTRACK" target support' - depends on IP_NF_RAW || IP6_NF_RAW - depends on NF_CONNTRACK - help - The NOTRACK target allows a select rule to specify - which packets *not* to enter the conntrack/NAT - subsystem with all the consequences (no ICMP error tracking, - no protocol helpers for the selected packets). - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. - config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' depends on NETFILTER_ADVANCED @@ -644,6 +690,17 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NF_NAT + ---help--- + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1c5160f2278e..32596978df1d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -43,6 +43,23 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ + nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o + +obj-$(CONFIG_NF_NAT) += nf_nat.o + +# NAT protocols (nf_nat) +obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o +obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o +obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o + +# NAT helpers +obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o +obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o +obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o +obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o +obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o + # transparent proxy support obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o @@ -53,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o +obj-$(CONFIG_NF_NAT) += xt_nat.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o @@ -65,10 +83,11 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o +obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o -obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o +obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0bc6b60db4df..68912dadf13d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -126,7 +126,7 @@ unsigned int nf_iterate(struct list_head *head, unsigned int hook, const struct net_device *indev, const struct net_device *outdev, - struct list_head **i, + struct nf_hook_ops **elemp, int (*okfn)(struct sk_buff *), int hook_thresh) { @@ -136,22 +136,20 @@ unsigned int nf_iterate(struct list_head *head, * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_continue_rcu(*i, head) { - struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; - - if (hook_thresh > elem->priority) + list_for_each_entry_continue_rcu((*elemp), head, list) { + if (hook_thresh > (*elemp)->priority) continue; /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = elem->hook(hook, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - elem->hook, hook); + (*elemp)->hook, hook); continue; } #endif @@ -172,14 +170,14 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, int (*okfn)(struct sk_buff *), int hook_thresh) { - struct list_head *elem; + struct nf_hook_ops *elem; unsigned int verdict; int ret = 0; /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - elem = &nf_hooks[pf][hook]; + elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list); next_hook: verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh); @@ -273,6 +271,11 @@ EXPORT_SYMBOL_GPL(nfq_ct_nat_hook); #endif /* CONFIG_NF_CONNTRACK */ +#ifdef CONFIG_NF_NAT_NEEDED +void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); +EXPORT_SYMBOL(nf_nat_decode_session_hook); +#endif + #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_net_netfilter; EXPORT_SYMBOL(proc_net_netfilter); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 7e1b061aeeba..4a92fd47bd4c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -27,9 +27,12 @@ #define IP_SET_BITMAP_TIMEOUT #include <linux/netfilter/ipset/ip_set_timeout.h> +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("bitmap:ip type of IP sets"); +IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); /* Type structure */ @@ -284,7 +287,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else @@ -454,7 +457,8 @@ static int bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_ip *map; - u32 first_ip, last_ip, hosts, elements; + u32 first_ip, last_ip, hosts; + u64 elements; u8 netmask = 32; int ret; @@ -497,7 +501,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (netmask == 32) { hosts = 1; - elements = last_ip - first_ip + 1; + elements = (u64)last_ip - first_ip + 1; } else { u8 mask_bits; u32 mask; @@ -515,7 +519,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; - pr_debug("hosts %u, elements %u\n", hosts, elements); + pr_debug("hosts %u, elements %llu\n", + hosts, (unsigned long long)elements); map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) @@ -554,8 +559,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_IPV4, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index d7eaf10edb6d..0f92dc24cb89 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -26,9 +26,12 @@ #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets"); +IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); enum { @@ -320,11 +323,11 @@ bitmap_ipmac_tlist(const struct ip_set *set, (elem->match == MAC_FILLED && nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether))) - goto nla_put_failure; + goto nla_put_failure; timeout = elem->match == MAC_UNSET ? elem->timeout : ip_set_timeout_get(elem->timeout); if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout))) - goto nla_put_failure; + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, atd); @@ -557,7 +560,8 @@ static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip, last_ip, elements; + u32 first_ip, last_ip; + u64 elements; struct bitmap_ipmac *map; int ret; @@ -588,7 +592,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } else return -IPSET_ERR_PROTOCOL; - elements = last_ip - first_ip + 1; + elements = (u64)last_ip - first_ip + 1; if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; @@ -629,8 +633,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b9f1fce7053b..e6b2db76f4c3 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -22,9 +22,12 @@ #define IP_SET_BITMAP_TIMEOUT #include <linux/netfilter/ipset/ip_set_timeout.h> +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("bitmap:port type of IP sets"); +IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_bitmap:port"); /* Type structure */ @@ -487,8 +490,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9730882697aa..778465f217fa 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -69,7 +69,8 @@ find_set_type(const char *name, u8 family, u8 revision) list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && - (type->family == family || type->family == NFPROTO_UNSPEC) && + (type->family == family || + type->family == NFPROTO_UNSPEC) && revision >= type->revision_min && revision <= type->revision_max) return type; @@ -149,7 +150,8 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max, rcu_read_lock(); list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && - (type->family == family || type->family == NFPROTO_UNSPEC)) { + (type->family == family || + type->family == NFPROTO_UNSPEC)) { found = true; if (type->revision_min < *min) *min = type->revision_min; @@ -368,6 +370,12 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, set->variant->kadt(set, skb, par, IPSET_ADD, opt); write_unlock_bh(&set->lock); ret = 1; + } else { + /* --return-nomatch: invert matched element */ + if ((opt->flags & IPSET_RETURN_NOMATCH) && + (set->type->features & IPSET_TYPE_NOMATCH) && + (ret > 0 || ret == -ENOTEMPTY)) + ret = -ret; } /* Convert error codes to nomatch */ @@ -563,13 +571,13 @@ flag_exist(const struct nlmsghdr *nlh) } static struct nlmsghdr * -start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, +start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); if (nlh == NULL) return NULL; @@ -721,7 +729,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ - if ((ret = find_free_id(set->name, &index, &clash)) != 0) { + ret = find_free_id(set->name, &index, &clash); + if (ret != 0) { /* If this is the same set and requested, ignore error */ if (ret == -EEXIST && (flags & IPSET_FLAG_EXIST) && @@ -1045,7 +1054,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; - unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; @@ -1093,7 +1102,7 @@ dump_last: pr_debug("reference set\n"); __ip_set_get(index); } - nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, IPSET_CMD_LIST); if (!nlh) { @@ -1226,7 +1235,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, skb2 = nlmsg_new(payload, GFP_KERNEL); if (skb2 == NULL) return -ENOMEM; - rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, + rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = ret; @@ -1241,7 +1250,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1416,7 +1425,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; @@ -1428,7 +1437,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1476,7 +1485,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; @@ -1489,7 +1498,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1525,7 +1534,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; @@ -1533,7 +1542,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index a68dbd4f1e4e..ec3dba5dcd62 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -24,9 +24,12 @@ #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_hash.h> +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("hash:ip type of IP sets"); +IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ @@ -114,7 +117,7 @@ nla_put_failure: static inline void hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d) { - h->next.ip = ntohl(d->ip); + h->next.ip = d->ip; } static int @@ -179,7 +182,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else @@ -188,7 +191,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip += hosts) { nip = htonl(ip); if (nip == 0) @@ -452,8 +455,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 92722bb82eea..0171f7502fa5 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -25,9 +25,12 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> +#define REVISION_MIN 0 +#define REVISION_MAX 1 /* SCTP and UDPLITE support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("hash:ip,port type of IP sets"); +IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ @@ -130,8 +133,8 @@ static inline void hash_ipport4_data_next(struct ip_set_hash *h, const struct hash_ipport4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); + h->next.ip = d->ip; + h->next.port = d->port; } static int @@ -217,7 +220,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else @@ -231,9 +234,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip++) { - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); @@ -349,7 +353,7 @@ static inline void hash_ipport6_data_next(struct ip_set_hash *h, const struct hash_ipport6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -431,7 +435,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); @@ -522,8 +526,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 1, /* SCTP and UDPLITE support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 0637ce096def..6344ef551ec8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -25,9 +25,12 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> +#define REVISION_MIN 0 +#define REVISION_MAX 1 /* SCTP and UDPLITE support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets"); +IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ @@ -133,8 +136,8 @@ static inline void hash_ipportip4_data_next(struct ip_set_hash *h, const struct hash_ipportip4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); + h->next.ip = d->ip; + h->next.port = d->port; } static int @@ -225,7 +228,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } else @@ -239,9 +242,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip++) { - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); @@ -362,7 +366,7 @@ static inline void hash_ipportip6_data_next(struct ip_set_hash *h, const struct hash_ipportip6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -449,7 +453,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); @@ -540,8 +544,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 1, /* SCTP and UDPLITE support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 1ce21ca976e1..cb71f9a774e7 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -25,9 +25,14 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> +#define REVISION_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +#define REVISION_MAX 3 /* nomatch flag support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("hash:ip,port,net type of IP sets"); +IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ @@ -99,10 +104,10 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -173,9 +178,9 @@ static inline void hash_ipportnet4_data_next(struct ip_set_hash *h, const struct hash_ipportnet4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); - h->next.ip2 = ntohl(d->ip2); + h->next.ip = d->ip; + h->next.port = d->port; + h->next.ip2 = d->ip2; } static int @@ -290,7 +295,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (cidr > 32) + if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } @@ -314,14 +319,17 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); for (; !before(ip_to, ip); ip++) { data.ip = htonl(ip); - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.port = htons(p); - ip2 = retried && ip == h->next.ip && p == h->next.port - ? h->next.ip2 : ip2_from; + ip2 = retried + && ip == ntohl(h->next.ip) + && p == ntohs(h->next.port) + ? ntohl(h->next.ip2) : ip2_from; while (!after(ip2, ip2_to)) { data.ip2 = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, @@ -403,10 +411,10 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -486,7 +494,7 @@ static inline void hash_ipportnet6_data_next(struct ip_set_hash *h, const struct hash_ipportnet6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -598,7 +606,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); @@ -689,13 +697,12 @@ hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_ipportnet_type __read_mostly = { .name = "hash:ip,port,net", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | + IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - /* 1 SCTP and UDPLITE support added */ - /* 2 Range as input support for IPv4 added */ - .revision_max = 3, /* nomatch flag support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c57a6a09906d..29e94b981f3f 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -23,9 +23,13 @@ #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_hash.h> +#define REVISION_MIN 0 +/* 1 Range as input support for IPv4 added */ +#define REVISION_MAX 2 /* nomatch flag support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("hash:net type of IP sets"); +IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ @@ -86,10 +90,10 @@ hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_net4_data_match(const struct hash_net4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -152,7 +156,7 @@ static inline void hash_net4_data_next(struct ip_set_hash *h, const struct hash_net4_elem *d) { - h->next.ip = ntohl(d->ip); + h->next.ip = d->ip; } static int @@ -235,7 +239,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_HASH_RANGE; } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); while (!after(ip, ip_to)) { data.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); @@ -307,10 +311,10 @@ hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_net6_data_match(const struct hash_net6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -532,12 +536,11 @@ hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_net_type __read_mostly = { .name = "hash:net", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP, + .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - /* = 1 Range as input support for IPv4 added */ - .revision_max = 2, /* nomatch flag support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index d5d3607ae7bc..b9a63381e349 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -24,9 +24,13 @@ #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_hash.h> +#define REVISION_MIN 0 +/* 1 nomatch flag support added */ +#define REVISION_MAX 2 /* /0 support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("hash:net,iface type of IP sets"); +IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Interface name rbtree */ @@ -140,7 +144,7 @@ struct hash_netiface4_elem_hashed { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; }; #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) @@ -151,7 +155,7 @@ struct hash_netiface4_elem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; }; @@ -161,7 +165,7 @@ struct hash_netiface4_telem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; unsigned long timeout; }; @@ -181,18 +185,14 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, static inline bool hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem) { - return elem->cidr == 0; + return elem->elem == 0; } static inline void hash_netiface4_data_copy(struct hash_netiface4_elem *dst, const struct hash_netiface4_elem *src) { - dst->ip = src->ip; - dst->cidr = src->cidr; - dst->physdev = src->physdev; - dst->iface = src->iface; - dst->nomatch = src->nomatch; + memcpy(dst, src, sizeof(*dst)); } static inline void @@ -201,10 +201,10 @@ hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_netiface4_data_match(const struct hash_netiface4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -217,7 +217,7 @@ hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr) static inline void hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem) { - elem->cidr = 0; + elem->elem = 0; } static bool @@ -277,7 +277,7 @@ static inline void hash_netiface4_data_next(struct ip_set_hash *h, const struct hash_netiface4_elem *d) { - h->next.ip = ntohl(d->ip); + h->next.ip = d->ip; } static int @@ -288,7 +288,8 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem data = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .elem = 1, }; int ret; @@ -339,7 +340,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], { struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netiface4_elem data = { .cidr = HOST_MASK }; + struct hash_netiface4_elem data = { .cidr = HOST_MASK, .elem = 1 }; u32 ip = 0, ip_to, last; u32 timeout = h->timeout; char iface[IFNAMSIZ]; @@ -360,7 +361,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!data.cidr || data.cidr > HOST_MASK) + if (data.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } @@ -389,7 +390,6 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) flags |= (cadt_flags << 16); } - if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { data.ip = htonl(ip & ip_set_hostmask(data.cidr)); ret = adtfn(set, &data, timeout, flags); @@ -409,7 +409,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); while (!after(ip, ip_to)) { data.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); @@ -442,7 +442,7 @@ struct hash_netiface6_elem_hashed { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; }; #define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) @@ -452,7 +452,7 @@ struct hash_netiface6_elem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; }; @@ -461,7 +461,7 @@ struct hash_netiface6_telem { u8 physdev; u8 cidr; u8 nomatch; - u8 padding; + u8 elem; const char *iface; unsigned long timeout; }; @@ -481,7 +481,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, static inline bool hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem) { - return elem->cidr == 0; + return elem->elem == 0; } static inline void @@ -497,16 +497,16 @@ hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) dst->nomatch = flags & IPSET_FLAG_NOMATCH; } -static inline bool +static inline int hash_netiface6_data_match(const struct hash_netiface6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) { - elem->cidr = 0; + elem->elem = 0; } static inline void @@ -590,7 +590,8 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem data = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .elem = 1, }; int ret; @@ -637,7 +638,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], { struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netiface6_elem data = { .cidr = HOST_MASK }; + struct hash_netiface6_elem data = { .cidr = HOST_MASK, .elem = 1 }; u32 timeout = h->timeout; char iface[IFNAMSIZ]; int ret; @@ -659,7 +660,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!data.cidr || data.cidr > HOST_MASK) + if (data.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip6_netmask(&data.ip, data.cidr); @@ -773,11 +774,12 @@ hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_netiface_type __read_mostly = { .name = "hash:net,iface", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE, + .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE | + IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 1, /* nomatch flag support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index fc3143a2d41b..7ef700de596c 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -24,9 +24,14 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> +#define REVISION_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +#define REVISION_MAX 3 /* nomatch flag support added */ + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("hash:net,port type of IP sets"); +IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_hash:net,port"); /* Type specific function prefix */ @@ -99,10 +104,10 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_netport4_data_match(const struct hash_netport4_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -171,8 +176,8 @@ static inline void hash_netport4_data_next(struct ip_set_hash *h, const struct hash_netport4_elem *d) { - h->next.ip = ntohl(d->ip); - h->next.port = ntohs(d->port); + h->next.ip = d->ip; + h->next.port = d->port; } static int @@ -289,12 +294,13 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) - ip = h->next.ip; + ip = ntohl(h->next.ip); while (!after(ip, ip_to)) { data.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &cidr); data.cidr = cidr - 1; - p = retried && ip == h->next.ip ? h->next.port : port; + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; for (; p <= port_to; p++) { data.port = htons(p); ret = adtfn(set, &data, timeout, flags); @@ -369,10 +375,10 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } -static inline bool +static inline int hash_netport6_data_match(const struct hash_netport6_elem *elem) { - return !elem->nomatch; + return elem->nomatch ? -ENOTEMPTY : 1; } static inline void @@ -450,7 +456,7 @@ static inline void hash_netport6_data_next(struct ip_set_hash *h, const struct hash_netport6_elem *d) { - h->next.port = ntohs(d->port); + h->next.port = d->port; } static int @@ -554,7 +560,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); if (retried) - port = h->next.port; + port = ntohs(h->next.port); for (; port <= port_to; port++) { data.port = htons(port); ret = adtfn(set, &data, timeout, flags); @@ -644,13 +650,11 @@ hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) static struct ip_set_type hash_netport_type __read_mostly = { .name = "hash:net,port", .protocol = IPSET_PROTOCOL, - .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = 0, - /* 1 SCTP and UDPLITE support added */ - /* 2, Range as input support for IPv4 added */ - .revision_max = 3, /* nomatch flag support added */ + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 6cb1225765f9..8371c2bac2e4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -16,9 +16,12 @@ #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_list.h> +#define REVISION_MIN 0 +#define REVISION_MAX 0 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("list:set type of IP sets"); +IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements without and with timeout */ @@ -579,8 +582,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = 0, - .revision_max = 0, + .revision_min = REVISION_MIN, + .revision_max = REVISION_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index f9871385a65e..8b2cffdfdd99 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -250,7 +250,8 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT + depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \ + NF_CONNTRACK_FTP select IP_VS_NFCT ---help--- FTP is a protocol that transfers IP address and/or port number in diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 64f9e8f13207..9713e6e86d47 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, } -/* - * ip_vs_app registration routine - */ -int register_ip_vs_app(struct net *net, struct ip_vs_app *app) +/* Register application for netns */ +struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) { struct netns_ipvs *ipvs = net_ipvs(net); - /* increase the module use count */ - ip_vs_use_count_inc(); + struct ip_vs_app *a; + int err = 0; + + if (!ipvs) + return ERR_PTR(-ENOENT); mutex_lock(&__ip_vs_app_mutex); - list_add(&app->a_list, &ipvs->app_list); + list_for_each_entry(a, &ipvs->app_list, a_list) { + if (!strcmp(app->name, a->name)) { + err = -EEXIST; + goto out_unlock; + } + } + a = kmemdup(app, sizeof(*app), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out_unlock; + } + INIT_LIST_HEAD(&a->incs_list); + list_add(&a->a_list, &ipvs->app_list); + /* increase the module use count */ + ip_vs_use_count_inc(); +out_unlock: mutex_unlock(&__ip_vs_app_mutex); - return 0; + return err ? ERR_PTR(err) : a; } @@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { - struct ip_vs_app *inc, *nxt; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_app *a, *anxt, *inc, *nxt; + + if (!ipvs) + return; mutex_lock(&__ip_vs_app_mutex); - list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(net, inc); - } + list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { + if (app && strcmp(app->name, a->name)) + continue; + list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { + ip_vs_app_inc_release(net, inc); + } - list_del(&app->a_list); + list_del(&a->a_list); + kfree(a); - mutex_unlock(&__ip_vs_app_mutex); + /* decrease the module use count */ + ip_vs_use_count_dec(); + } - /* decrease the module use count */ - ip_vs_use_count_dec(); + mutex_unlock(&__ip_vs_app_mutex); } @@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net) void __net_exit ip_vs_app_net_cleanup(struct net *net) { + unregister_ip_vs_app(net, NULL /* all */); proc_net_remove(net, "ip_vs_app"); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b54eccef40b5..58918e20f9d5 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offset, ihl, verdict; + unsigned int offset, offset2, ihl, verdict; + bool ipip; *related = 1; @@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) net = skb_net(skb); + /* Special case for errors for IPIP packets */ + ipip = false; + if (cih->protocol == IPPROTO_IPIP) { + if (unlikely(cih->frag_off & htons(IP_OFFSET))) + return NF_ACCEPT; + /* Error for our IPIP must arrive at LOCAL_IN */ + if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) + return NF_ACCEPT; + offset += cih->ihl * 4; + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ + ipip = true; + } + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking incoming ICMP for"); + offset2 = offset; offset += cih->ihl * 4; ip_vs_fill_iphdr(AF_INET, cih, &ciph); - /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); + /* The embedded headers contain source and dest in reverse order. + * For IPIP this is error for request, not for reply. + */ + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); if (!cp) return NF_ACCEPT; @@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) goto out; } + if (ipip) { + __be32 info = ic->un.gateway; + + /* Update the MTU */ + if (ic->type == ICMP_DEST_UNREACH && + ic->code == ICMP_FRAG_NEEDED) { + struct ip_vs_dest *dest = cp->dest; + u32 mtu = ntohs(ic->un.frag.mtu); + + /* Strip outer IP and ICMP, go to IPIP header */ + __skb_pull(skb, ihl + sizeof(_icmph)); + offset2 -= ihl + sizeof(_icmph); + skb_reset_network_header(skb); + IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); + rcu_read_lock(); + ipv4_update_pmtu(skb, dev_net(skb->dev), + mtu, 0, 0, 0, 0); + rcu_read_unlock(); + /* Client uses PMTUD? */ + if (!(cih->frag_off & htons(IP_DF))) + goto ignore_ipip; + /* Prefer the resulting PMTU */ + if (dest) { + spin_lock(&dest->dst_lock); + if (dest->dst_cache) + mtu = dst_mtu(dest->dst_cache); + spin_unlock(&dest->dst_lock); + } + if (mtu > 68 + sizeof(struct iphdr)) + mtu -= sizeof(struct iphdr); + info = htonl(mtu); + } + /* Strip outer IP, ICMP and IPIP, go to IP header of + * original request. + */ + __skb_pull(skb, offset2); + skb_reset_network_header(skb); + IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, + ic->type, ic->code, ntohl(info)); + icmp_send(skb, ic->type, ic->code, info); + /* ICMP can be shorter but anyways, account it */ + ip_vs_out_stats(cp, skb); + +ignore_ipip: + consume_skb(skb); + verdict = NF_STOLEN; + goto out; + } + /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 84444dda194b..7e7198b51c06 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -539,8 +539,7 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Remove it from the rs_table table. */ if (!list_empty(&dest->d_list)) { - list_del(&dest->d_list); - INIT_LIST_HEAD(&dest->d_list); + list_del_init(&dest->d_list); } return 1; @@ -1171,8 +1170,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!svc->stats.cpustats) + if (!svc->stats.cpustats) { + ret = -ENOMEM; goto out_err; + } /* I'm the first user of the service */ atomic_set(&svc->usecnt, 0); @@ -1801,6 +1802,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "pmtu_disc", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -2759,6 +2766,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; + memset(&t, 0, sizeof(t)); __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; @@ -2930,7 +2938,7 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_SERVICE); if (!hdr) @@ -3119,7 +3127,7 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DEST); if (!hdr) @@ -3248,7 +3256,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, struct netlink_callback *cb) { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DAEMON); if (!hdr) @@ -3675,7 +3683,7 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -int __net_init ip_vs_control_net_init_sysctl(struct net *net) +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3726,6 +3734,8 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); tbl[idx++].data = &ipvs->sysctl_sync_retries; tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + ipvs->sysctl_pmtu_disc = 1; + tbl[idx++].data = &ipvs->sysctl_pmtu_disc; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); @@ -3743,7 +3753,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3754,8 +3764,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) #else -int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index b20b29c903ef..4f53a5f04437 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -268,6 +268,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * packet. */ ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + iph->ihl * 4, start-data, end-start, buf, buf_len); if (ret) { @@ -441,16 +442,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!ipvs) return -ENOENT; - app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); - if (!app) - return -ENOMEM; - INIT_LIST_HEAD(&app->a_list); - INIT_LIST_HEAD(&app->incs_list); - ipvs->ftp_app = app; - ret = register_ip_vs_app(net, app); - if (ret) - goto err_exit; + app = register_ip_vs_app(net, &ip_vs_ftp); + if (IS_ERR(app)) + return PTR_ERR(app); for (i = 0; i < ports_count; i++) { if (!ports[i]) @@ -464,9 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) return 0; err_unreg: - unregister_ip_vs_app(net, app); -err_exit: - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); return ret; } /* @@ -474,10 +467,7 @@ err_exit: */ static void __ip_vs_ftp_exit(struct net *net) { - struct netns_ipvs *ipvs = net_ipvs(net); - - unregister_ip_vs_app(net, ipvs->ftp_app); - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); } static struct pernet_operations ip_vs_ftp_ops = { diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 65b616ae1716..56f6d5d81a77 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -49,6 +49,7 @@ enum { IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to * local */ + IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ }; /* @@ -84,6 +85,58 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) return dst; } +static inline bool +__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) +{ + if (IP6CB(skb)->frag_max_size) { + /* frag_max_size tell us that, this packet have been + * defragmented by netfilter IPv6 conntrack module. + */ + if (IP6CB(skb)->frag_max_size > mtu) + return true; /* largest fragment violate MTU */ + } + else if (skb->len > mtu && !skb_is_gso(skb)) { + return true; /* Packet size violate MTU size */ + } + return false; +} + +/* Get route to daddr, update *saddr, optionally bind route to saddr */ +static struct rtable *do_output_route4(struct net *net, __be32 daddr, + u32 rtos, int rt_mode, __be32 *saddr) +{ + struct flowi4 fl4; + struct rtable *rt; + int loop = 0; + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = daddr; + fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; + fl4.flowi4_tos = rtos; + +retry: + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + /* Invalid saddr ? */ + if (PTR_ERR(rt) == -EINVAL && *saddr && + rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { + *saddr = 0; + flowi4_update_output(&fl4, 0, rtos, daddr, 0); + goto retry; + } + IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); + return NULL; + } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + ip_rt_put(rt); + *saddr = fl4.saddr; + flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + loop++; + goto retry; + } + *saddr = fl4.saddr; + return rt; +} + /* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, @@ -98,20 +151,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = dest->addr.ip; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { + rt = do_output_route4(net, dest->addr.ip, rtos, + rt_mode, &dest->dst_saddr.ip); + if (!rt) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - dest->dst_saddr.ip = fl4.saddr; IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " "rtos=%X\n", &dest->addr.ip, &dest->dst_saddr.ip, @@ -122,19 +168,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, *ret_saddr = dest->dst_saddr.ip; spin_unlock(&dest->dst_lock); } else { - struct flowi4 fl4; + __be32 saddr = htonl(INADDR_ANY); - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = daddr; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &daddr); + /* For such unconfigured boxes avoid many route lookups + * for performance reasons because we do not remember saddr + */ + rt_mode &= ~IP_VS_RT_MODE_CONNECT; + rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + if (!rt) return NULL; - } if (ret_saddr) - *ret_saddr = fl4.saddr; + *ret_saddr = saddr; } local = rt->rt_flags & RTCF_LOCAL; @@ -331,6 +375,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) old_dst = dest->dst_cache; dest->dst_cache = NULL; dst_release(old_dst); + dest->dst_saddr.ip = 0; } #define IP_VS_XMIT_TUNNEL(skb, cp) \ @@ -462,7 +507,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -683,7 +728,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -766,12 +811,13 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; - __be16 df = old_iph->frag_off; + __be16 df; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -781,7 +827,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL, + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT, &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { @@ -796,13 +843,13 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto tx_error_put; } - if (skb_dst(skb)) + if (rt_is_output_route(skb_rtable(skb))) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - df |= (old_iph->frag_off & htons(IP_DF)); + /* Copy DF, reset fragment offset and MF */ + df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; - if ((old_iph->frag_off & htons(IP_DF) && - mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { + if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; @@ -915,8 +962,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && - !skb_is_gso(skb)) { + /* MTU checking: Notice that 'mtu' have been adjusted before hand */ + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -1082,7 +1129,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -1318,7 +1365,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (__mtu_check_toobig_v6(skb, mtu)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index f2de8c55ac50..c514fe6033d2 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -40,6 +40,7 @@ MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) @@ -155,8 +156,8 @@ static int amanda_help(struct sk_buff *skb, nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && ct->status & IPS_NAT_MASK) - ret = nf_nat_amanda(skb, ctinfo, off - dataoff, - len, exp); + ret = nf_nat_amanda(skb, ctinfo, protoff, + off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cf4875565d67..0f241be28f9e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -55,6 +55,12 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); +int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff); +EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); + DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -249,12 +255,15 @@ static void death_by_event(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { /* bad luck, let's retry again */ - ct->timeout.expires = jiffies + + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); return; } /* we've got the event delivered, now it's dying */ @@ -268,6 +277,9 @@ static void death_by_event(unsigned long ul_conntrack) void nf_ct_insert_dying_list(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); /* add this conntrack to the dying list */ spin_lock_bh(&nf_conntrack_lock); @@ -275,10 +287,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct) &net->ct.dying); spin_unlock_bh(&nf_conntrack_lock); /* set a new timer to retry event delivery */ - setup_timer(&ct->timeout, death_by_event, (unsigned long)ct); - ct->timeout.expires = jiffies + + setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); } EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); @@ -924,7 +936,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - struct nf_conn_timeout *timeout_ext; unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; @@ -991,11 +1002,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_ASSERT(skb->nfct); /* Decide what timeout policy we want to apply to this flow. */ - timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) - timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); - else - timeouts = l4proto->get_timeouts(net); + timeouts = nf_ct_timeout_lookup(net, ct, l4proto); ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); if (ret <= 0) { @@ -1217,6 +1224,8 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), spin_lock_bh(&nf_conntrack_lock); for (; *bucket < net->ct.htable_size; (*bucket)++) { hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index e7be79e640de..de9781b6464f 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -61,7 +61,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) goto out_unlock; item.ct = ct; - item.pid = 0; + item.portid = 0; item.report = 0; ret = notify->fcn(events | missed, &item); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 45cf602a76bc..527651a53a45 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master, } } -static inline int refresh_timer(struct nf_conntrack_expect *i) -{ - struct nf_conn_help *master_help = nfct_help(i->master); - const struct nf_conntrack_expect_policy *p; - - if (!del_timer(&i->timeout)) - return 0; - - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[i->class]; - i->timeout.expires = jiffies + p->timeout * HZ; - add_timer(&i->timeout); - return 1; -} - static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { const struct nf_conntrack_expect_policy *p; @@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n; + struct hlist_node *n, *next; unsigned int h; int ret = 1; @@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { - /* Refresh timer: if it's dying, ignore.. */ - if (refresh_timer(i)) { - ret = 0; - goto out; + if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_ct_expect_put(i); + break; } } else if (expect_clash(i, expect)) { ret = -EBUSY; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 4bb771d1f57a..1ce3befb7c8a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -48,6 +48,7 @@ module_param(loose, bool, 0600); unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp); @@ -395,6 +396,12 @@ static int help(struct sk_buff *skb, /* Look up to see if we're just after a \n. */ if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { + /* We're picking up this, clear flags and let it continue */ + if (unlikely(ct_ftp_info->flags[dir] & NF_CT_FTP_SEQ_PICKUP)) { + ct_ftp_info->flags[dir] ^= NF_CT_FTP_SEQ_PICKUP; + goto skip_nl_seq; + } + /* Now if this ends in \n, update ftp info. */ pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n", ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", @@ -405,6 +412,7 @@ static int help(struct sk_buff *skb, goto out_update_nl; } +skip_nl_seq: /* Initialize IP/IPv6 addr to expected address (it's not mentioned in EPSV responses) */ cmd.l3num = nf_ct_l3num(ct); @@ -489,7 +497,7 @@ static int help(struct sk_buff *skb, nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); if (nf_nat_ftp && ct->status & IPS_NAT_MASK) ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, - matchoff, matchlen, exp); + protoff, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ if (nf_ct_expect_related(exp) != 0) @@ -511,6 +519,19 @@ out_update_nl: return ret; } +static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct) +{ + struct nf_ct_ftp_master *ftp = nfct_help_data(ct); + + /* This conntrack has been injected from user-space, always pick up + * sequence tracking. Otherwise, the first FTP command after the + * failover breaks. + */ + ftp->flags[IP_CT_DIR_ORIGINAL] |= NF_CT_FTP_SEQ_PICKUP; + ftp->flags[IP_CT_DIR_REPLY] |= NF_CT_FTP_SEQ_PICKUP; + return 0; +} + static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { @@ -560,6 +581,7 @@ static int __init nf_conntrack_ftp_init(void) ftp[i][j].expect_policy = &ftp_exp_policy; ftp[i][j].me = THIS_MODULE; ftp[i][j].help = help; + ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr; if (ports[i] == FTP_PORT) sprintf(ftp[i][j].name, "ftp"); else diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 4283b207e63b..1b30b0dee708 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -49,12 +49,12 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "(determined by routing information)"); /* Hooks for NAT */ -int (*set_h245_addr_hook) (struct sk_buff *skb, +int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) __read_mostly; -int (*set_h225_addr_hook) (struct sk_buff *skb, +int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_inet_addr *addr, __be16 port) @@ -62,16 +62,17 @@ int (*set_h225_addr_hook) (struct sk_buff *skb, int (*set_sig_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; int (*set_ras_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, __be16 rtp_port, @@ -80,24 +81,28 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; int (*nat_callforwarding_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; @@ -251,6 +256,7 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, /****************************************************************************/ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr) { @@ -295,9 +301,10 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp) == 0) { @@ -324,6 +331,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, static int expect_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr) { @@ -353,9 +361,10 @@ static int expect_t120(struct sk_buff *skb, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_t120 = rcu_dereference(nat_t120_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr, + ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -374,6 +383,7 @@ static int expect_t120(struct sk_buff *skb, static int process_h245_channel(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, H2250LogicalChannelParameters *channel) { @@ -381,7 +391,7 @@ static int process_h245_channel(struct sk_buff *skb, if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, &channel->mediaChannel); if (ret < 0) return -1; @@ -390,7 +400,7 @@ static int process_h245_channel(struct sk_buff *skb, if (channel-> options & eH2250LogicalChannelParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, &channel->mediaControlChannel); if (ret < 0) return -1; @@ -402,6 +412,7 @@ static int process_h245_channel(struct sk_buff *skb, /****************************************************************************/ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, OpenLogicalChannel *olc) { @@ -412,7 +423,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) { - ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, + protoff, data, dataoff, &olc-> forwardLogicalChannelParameters. multiplexParameters. @@ -430,7 +442,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { ret = - process_h245_channel(skb, ct, ctinfo, data, dataoff, + process_h245_channel(skb, ct, ctinfo, + protoff, data, dataoff, &olc-> reverseLogicalChannelParameters. multiplexParameters. @@ -448,7 +461,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, t120.choice == eDataProtocolCapability_separateLANStack && olc->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(skb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff, &olc->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -461,7 +474,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, OpenLogicalChannelAck *olca) { H2250LogicalChannelAckParameters *ack; @@ -477,7 +490,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, choice == eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { - ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, + protoff, data, dataoff, &olca-> reverseLogicalChannelParameters. multiplexParameters. @@ -496,7 +510,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, + protoff, data, dataoff, &ack->mediaChannel); if (ret < 0) return -1; @@ -505,7 +520,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, + protoff, data, dataoff, &ack->mediaControlChannel); if (ret < 0) return -1; @@ -515,7 +531,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if ((olca->options & eOpenLogicalChannelAck_separateStack) && olca->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(skb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff, &olca->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -528,14 +544,15 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, MultimediaSystemControlMessage *mscm) { switch (mscm->choice) { case eMultimediaSystemControlMessage_request: if (mscm->request.choice == eRequestMessage_openLogicalChannel) { - return process_olc(skb, ct, ctinfo, data, dataoff, + return process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &mscm->request.openLogicalChannel); } pr_debug("nf_ct_h323: H.245 Request %d\n", @@ -544,7 +561,8 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, case eMultimediaSystemControlMessage_response: if (mscm->response.choice == eResponseMessage_openLogicalChannelAck) { - return process_olca(skb, ct, ctinfo, data, dataoff, + return process_olca(skb, ct, ctinfo, + protoff, data, dataoff, &mscm->response. openLogicalChannelAck); } @@ -595,7 +613,8 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, } /* Process H.245 signal */ - if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0) + if (process_h245(skb, ct, ctinfo, protoff, + &data, dataoff, &mscm) < 0) goto drop; } @@ -659,7 +678,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, /****************************************************************************/ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr) { int dir = CTINFO2DIR(ctinfo); @@ -688,9 +707,10 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_h245 = rcu_dereference(nat_h245_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr, + ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -776,6 +796,7 @@ static int callforward_do_filter(const union nf_inet_addr *src, static int expect_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr) { @@ -811,9 +832,11 @@ static int expect_callforwarding(struct sk_buff *skb, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff, + ret = nat_callforwarding(skb, ct, ctinfo, + protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -831,6 +854,7 @@ static int expect_callforwarding(struct sk_buff *skb, /****************************************************************************/ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Setup_UUIE *setup) { @@ -844,7 +868,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &setup->h245Address); if (ret < 0) return -1; @@ -852,14 +876,15 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, set_h225_addr = rcu_dereference(set_h225_addr_hook); if ((setup->options & eSetup_UUIE_destCallSignalAddress) && - (set_h225_addr) && ct->status & IPS_NAT_MASK && + (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK && get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3, ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr(skb, data, dataoff, + ret = set_h225_addr(skb, protoff, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.u.tcp.port); @@ -868,14 +893,15 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, } if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) && - (set_h225_addr) && ct->status & IPS_NAT_MASK && + (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK && get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3, ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr(skb, data, dataoff, + ret = set_h225_addr(skb, protoff, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple.dst.u.tcp.port); @@ -885,7 +911,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, if (setup->options & eSetup_UUIE_fastStart) { for (i = 0; i < setup->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &setup->fastStart.item[i]); if (ret < 0) return -1; @@ -899,6 +926,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, static int process_callproceeding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, CallProceeding_UUIE *callproc) { @@ -908,7 +936,7 @@ static int process_callproceeding(struct sk_buff *skb, pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &callproc->h245Address); if (ret < 0) return -1; @@ -916,7 +944,8 @@ static int process_callproceeding(struct sk_buff *skb, if (callproc->options & eCallProceeding_UUIE_fastStart) { for (i = 0; i < callproc->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &callproc->fastStart.item[i]); if (ret < 0) return -1; @@ -929,6 +958,7 @@ static int process_callproceeding(struct sk_buff *skb, /****************************************************************************/ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Connect_UUIE *connect) { @@ -938,7 +968,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &connect->h245Address); if (ret < 0) return -1; @@ -946,7 +976,8 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, if (connect->options & eConnect_UUIE_fastStart) { for (i = 0; i < connect->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &connect->fastStart.item[i]); if (ret < 0) return -1; @@ -959,6 +990,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Alerting_UUIE *alert) { @@ -968,7 +1000,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &alert->h245Address); if (ret < 0) return -1; @@ -976,7 +1008,8 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, if (alert->options & eAlerting_UUIE_fastStart) { for (i = 0; i < alert->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &alert->fastStart.item[i]); if (ret < 0) return -1; @@ -989,6 +1022,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Facility_UUIE *facility) { @@ -999,15 +1033,15 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) - return expect_callforwarding(skb, ct, ctinfo, data, - dataoff, + return expect_callforwarding(skb, ct, ctinfo, + protoff, data, dataoff, &facility-> alternativeAddress); return 0; } if (facility->options & eFacility_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &facility->h245Address); if (ret < 0) return -1; @@ -1015,7 +1049,8 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, if (facility->options & eFacility_UUIE_fastStart) { for (i = 0; i < facility->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &facility->fastStart.item[i]); if (ret < 0) return -1; @@ -1028,6 +1063,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, Progress_UUIE *progress) { @@ -1037,7 +1073,7 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff, &progress->h245Address); if (ret < 0) return -1; @@ -1045,7 +1081,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, if (progress->options & eProgress_UUIE_fastStart) { for (i = 0; i < progress->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, + protoff, data, dataoff, &progress->fastStart.item[i]); if (ret < 0) return -1; @@ -1058,7 +1095,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, Q931 *q931) + unsigned int protoff, unsigned char **data, int dataoff, + Q931 *q931) { H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu; int i; @@ -1066,28 +1104,29 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, switch (pdu->h323_message_body.choice) { case eH323_UU_PDU_h323_message_body_setup: - ret = process_setup(skb, ct, ctinfo, data, dataoff, + ret = process_setup(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.setup); break; case eH323_UU_PDU_h323_message_body_callProceeding: - ret = process_callproceeding(skb, ct, ctinfo, data, dataoff, + ret = process_callproceeding(skb, ct, ctinfo, + protoff, data, dataoff, &pdu->h323_message_body. callProceeding); break; case eH323_UU_PDU_h323_message_body_connect: - ret = process_connect(skb, ct, ctinfo, data, dataoff, + ret = process_connect(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.connect); break; case eH323_UU_PDU_h323_message_body_alerting: - ret = process_alerting(skb, ct, ctinfo, data, dataoff, + ret = process_alerting(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.alerting); break; case eH323_UU_PDU_h323_message_body_facility: - ret = process_facility(skb, ct, ctinfo, data, dataoff, + ret = process_facility(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.facility); break; case eH323_UU_PDU_h323_message_body_progress: - ret = process_progress(skb, ct, ctinfo, data, dataoff, + ret = process_progress(skb, ct, ctinfo, protoff, data, dataoff, &pdu->h323_message_body.progress); break; default: @@ -1101,7 +1140,8 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, if (pdu->options & eH323_UU_PDU_h245Control) { for (i = 0; i < pdu->h245Control.count; i++) { - ret = process_h245(skb, ct, ctinfo, data, dataoff, + ret = process_h245(skb, ct, ctinfo, + protoff, data, dataoff, &pdu->h245Control.item[i]); if (ret < 0) return -1; @@ -1146,7 +1186,8 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, } /* Process Q.931 signal */ - if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0) + if (process_q931(skb, ct, ctinfo, protoff, + &data, dataoff, &q931) < 0) goto drop; } @@ -1243,7 +1284,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp, /****************************************************************************/ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned char **data, + unsigned int protoff, unsigned char **data, TransportAddress *taddr, int count) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1278,8 +1319,10 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nat_q931 = rcu_dereference(nat_q931_hook); - if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp); + if (nat_q931 && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { /* Need NAT */ + ret = nat_q931(skb, ct, ctinfo, protoff, data, + taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1299,6 +1342,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, GatekeeperRequest *grq) { typeof(set_ras_addr_hook) set_ras_addr; @@ -1306,8 +1350,9 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: GRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ - return set_ras_addr(skb, ct, ctinfo, data, + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) /* NATed */ + return set_ras_addr(skb, ct, ctinfo, protoff, data, &grq->rasAddress, 1); return 0; } @@ -1315,6 +1360,7 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, GatekeeperConfirm *gcf) { int dir = CTINFO2DIR(ctinfo); @@ -1359,6 +1405,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, RegistrationRequest *rrq) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1367,15 +1414,16 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: RRQ\n"); - ret = expect_q931(skb, ct, ctinfo, data, + ret = expect_q931(skb, ct, ctinfo, protoff, data, rrq->callSignalAddress.item, rrq->callSignalAddress.count); if (ret < 0) return -1; set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, data, + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { + ret = set_ras_addr(skb, ct, ctinfo, protoff, data, rrq->rasAddress.item, rrq->rasAddress.count); if (ret < 0) @@ -1394,6 +1442,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, RegistrationConfirm *rcf) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1405,8 +1454,9 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: RCF\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { + ret = set_sig_addr(skb, ct, ctinfo, protoff, data, rcf->callSignalAddress.item, rcf->callSignalAddress.count); if (ret < 0) @@ -1443,6 +1493,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, UnregistrationRequest *urq) { struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1453,8 +1504,9 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: URQ\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { + ret = set_sig_addr(skb, ct, ctinfo, protoff, data, urq->callSignalAddress.item, urq->callSignalAddress.count); if (ret < 0) @@ -1475,6 +1527,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, AdmissionRequest *arq) { const struct nf_ct_h323_master *info = nfct_help_data(ct); @@ -1491,9 +1544,10 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, &addr, &port) && !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && port == info->sig_port[dir] && + nf_ct_l3num(ct) == NFPROTO_IPV4 && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Answering ARQ */ - return set_h225_addr(skb, data, 0, + return set_h225_addr(skb, protoff, data, 0, &arq->destCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); @@ -1503,9 +1557,10 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &arq->srcCallSignalAddress, &addr, &port) && !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && - set_h225_addr && ct->status & IPS_NAT_MASK) { + set_h225_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { /* Calling ARQ */ - return set_h225_addr(skb, data, 0, + return set_h225_addr(skb, protoff, data, 0, &arq->srcCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, port); @@ -1517,6 +1572,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, AdmissionConfirm *acf) { int dir = CTINFO2DIR(ctinfo); @@ -1535,8 +1591,9 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) { /* Answering ACF */ set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) - return set_sig_addr(skb, ct, ctinfo, data, + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) + return set_sig_addr(skb, ct, ctinfo, protoff, data, &acf->destCallSignalAddress, 1); return 0; } @@ -1564,6 +1621,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, LocationRequest *lrq) { typeof(set_ras_addr_hook) set_ras_addr; @@ -1571,8 +1629,9 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: LRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) - return set_ras_addr(skb, ct, ctinfo, data, + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) + return set_ras_addr(skb, ct, ctinfo, protoff, data, &lrq->replyAddress, 1); return 0; } @@ -1580,6 +1639,7 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, LocationConfirm *lcf) { int dir = CTINFO2DIR(ctinfo); @@ -1619,6 +1679,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, InfoRequestResponse *irr) { int ret; @@ -1628,16 +1689,18 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: IRR\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, data, + if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { + ret = set_ras_addr(skb, ct, ctinfo, protoff, data, &irr->rasAddress, 1); if (ret < 0) return -1; } set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + ct->status & IPS_NAT_MASK) { + ret = set_sig_addr(skb, ct, ctinfo, protoff, data, irr->callSignalAddress.item, irr->callSignalAddress.count); if (ret < 0) @@ -1650,38 +1713,39 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int process_ras(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, RasMessage *ras) { switch (ras->choice) { case eRasMessage_gatekeeperRequest: - return process_grq(skb, ct, ctinfo, data, + return process_grq(skb, ct, ctinfo, protoff, data, &ras->gatekeeperRequest); case eRasMessage_gatekeeperConfirm: - return process_gcf(skb, ct, ctinfo, data, + return process_gcf(skb, ct, ctinfo, protoff, data, &ras->gatekeeperConfirm); case eRasMessage_registrationRequest: - return process_rrq(skb, ct, ctinfo, data, + return process_rrq(skb, ct, ctinfo, protoff, data, &ras->registrationRequest); case eRasMessage_registrationConfirm: - return process_rcf(skb, ct, ctinfo, data, + return process_rcf(skb, ct, ctinfo, protoff, data, &ras->registrationConfirm); case eRasMessage_unregistrationRequest: - return process_urq(skb, ct, ctinfo, data, + return process_urq(skb, ct, ctinfo, protoff, data, &ras->unregistrationRequest); case eRasMessage_admissionRequest: - return process_arq(skb, ct, ctinfo, data, + return process_arq(skb, ct, ctinfo, protoff, data, &ras->admissionRequest); case eRasMessage_admissionConfirm: - return process_acf(skb, ct, ctinfo, data, + return process_acf(skb, ct, ctinfo, protoff, data, &ras->admissionConfirm); case eRasMessage_locationRequest: - return process_lrq(skb, ct, ctinfo, data, + return process_lrq(skb, ct, ctinfo, protoff, data, &ras->locationRequest); case eRasMessage_locationConfirm: - return process_lcf(skb, ct, ctinfo, data, + return process_lcf(skb, ct, ctinfo, protoff, data, &ras->locationConfirm); case eRasMessage_infoRequestResponse: - return process_irr(skb, ct, ctinfo, data, + return process_irr(skb, ct, ctinfo, protoff, data, &ras->infoRequestResponse); default: pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); @@ -1721,7 +1785,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, } /* Process RAS message */ - if (process_ras(skb, ct, ctinfo, &data, &ras) < 0) + if (process_ras(skb, ct, ctinfo, protoff, &data, &ras) < 0) goto drop; accept: diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 009c52cfd1ec..3b20aa77cfc8 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -33,6 +33,7 @@ static DEFINE_SPINLOCK(irc_buffer_lock); unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) __read_mostly; @@ -205,7 +206,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && ct->status & IPS_NAT_MASK) - ret = nf_nat_irc(skb, ctinfo, + ret = nf_nat_irc(skb, ctinfo, protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 14f67a2cbcb5..7bbfb3deea30 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,7 +45,7 @@ #include <net/netfilter/nf_conntrack_timestamp.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_helper.h> #endif @@ -418,16 +418,16 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -604,7 +604,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -680,7 +680,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, net, item->pid, group, item->report, + err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -757,7 +757,7 @@ restart: #endif rcu_read_lock(); res = - ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct); @@ -961,7 +961,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else { /* Flush the whole table */ nf_conntrack_flush_report(net, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); return 0; } @@ -985,7 +985,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (del_timer(&ct->timeout)) { if (nf_conntrack_event_report(IPCT_DESTROY, ct, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)) < 0) { nf_ct_delete_from_lists(ct); /* we failed to report the event, try later */ @@ -1069,14 +1069,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -1096,13 +1096,14 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, const struct nlattr *attr) { typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; + int err; parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook); if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); nfnl_unlock(); - if (request_module("nf-nat-ipv4") < 0) { + if (request_module("nf-nat") < 0) { nfnl_lock(); rcu_read_lock(); return -EOPNOTSUPP; @@ -1115,7 +1116,23 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, return -EOPNOTSUPP; } - return parse_nat_setup(ct, manip, attr); + err = parse_nat_setup(ct, manip, attr); + if (err == -EAGAIN) { +#ifdef CONFIG_MODULES + rcu_read_unlock(); + nfnl_unlock(); + if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { + nfnl_lock(); + rcu_read_lock(); + return -EOPNOTSUPP; + } + nfnl_lock(); + rcu_read_lock(); +#else + err = -EOPNOTSUPP; +#endif + } + return err; } #endif @@ -1221,7 +1238,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) if (help) { if (help->helper == helper) { /* update private helper data if allowed. */ - if (helper->from_nlattr && helpinfo) + if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); return 0; } else @@ -1450,7 +1467,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, goto err2; } /* set private helper data if allowed. */ - if (helper->from_nlattr && helpinfo) + if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); /* not in hash table yet so not strictly necessary */ @@ -1596,7 +1613,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK) | events, - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); } @@ -1618,7 +1635,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK), - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } } @@ -1628,15 +1645,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1688,7 +1705,7 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) st = per_cpu_ptr(net->ct.stat, cpu); if (ctnetlink_ct_stat_cpu_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; @@ -1714,16 +1731,16 @@ ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1756,14 +1773,14 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), sock_net(skb->sk)); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -1896,10 +1913,15 @@ static int ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) { struct nlattr *cda[CTA_MAX+1]; + int ret; nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); - return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_lock_bh(&nf_conntrack_lock); + ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_unlock_bh(&nf_conntrack_lock); + + return ret; } static struct nfq_ct_hook ctnetlink_nfqueue_hook = { @@ -1974,6 +1996,8 @@ nla_put_failure: return -1; } +static const union nf_inet_addr any_addr; + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2000,7 +2024,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, goto nla_put_failure; #ifdef CONFIG_NF_NAT_NEEDED - if (exp->saved_ip || exp->saved_proto.all) { + if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || + exp->saved_proto.all) { nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -2009,7 +2034,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, goto nla_put_failure; nat_tuple.src.l3num = nf_ct_l3num(master); - nat_tuple.src.u3.ip = exp->saved_ip; + nat_tuple.src.u3 = exp->saved_addr; nat_tuple.dst.protonum = nf_ct_protonum(master); nat_tuple.src.u = exp->saved_proto; @@ -2045,15 +2070,15 @@ nla_put_failure: } static int -ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2104,7 +2129,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2119,7 +2144,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC); + nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: @@ -2162,7 +2187,7 @@ restart: cb->args[1] = 0; } if (ctnetlink_exp_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { @@ -2255,14 +2280,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2316,7 +2341,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* after list removal, usage count == 1 */ spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid, + nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2338,7 +2363,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2354,7 +2379,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2405,7 +2430,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, if (err < 0) return err; - exp->saved_ip = nat_tuple.src.u3.ip; + exp->saved_addr = nat_tuple.src.u3; exp->saved_proto = nat_tuple.src.u; exp->dir = ntohl(nla_get_be32(tb[CTA_EXPECT_NAT_DIR])); @@ -2419,7 +2444,7 @@ static int ctnetlink_create_expect(struct net *net, u16 zone, const struct nlattr * const cda[], u_int8_t u3, - u32 pid, int report) + u32 portid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -2532,7 +2557,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (err < 0) goto err_out; } - err = nf_ct_expect_related_report(exp, pid, report); + err = nf_ct_expect_related_report(exp, portid, report); err_out: nf_ct_expect_put(exp); out: @@ -2575,7 +2600,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_CREATE) { err = ctnetlink_create_expect(net, zone, cda, u3, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); } return err; @@ -2590,15 +2615,15 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu, +ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2637,7 +2662,7 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; st = per_cpu_ptr(net->ct.stat, cpu); - if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; @@ -2785,7 +2810,8 @@ static int __init ctnetlink_init(void) goto err_unreg_subsys; } - if (register_pernet_subsys(&ctnetlink_net_ops)) { + ret = register_pernet_subsys(&ctnetlink_net_ops); + if (ret < 0) { pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 6fed9ec35248..cc7669ef0b95 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -45,14 +45,14 @@ static DEFINE_SPINLOCK(nf_pptp_lock); int (*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); int (*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, + unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound); @@ -262,7 +262,7 @@ out_unexpect_orig: } static inline int -pptp_inbound_pkt(struct sk_buff *skb, +pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -376,7 +376,8 @@ pptp_inbound_pkt(struct sk_buff *skb, nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_inbound(skb, ct, ctinfo, + protoff, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -389,7 +390,7 @@ invalid: } static inline int -pptp_outbound_pkt(struct sk_buff *skb, +pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -471,7 +472,8 @@ pptp_outbound_pkt(struct sk_buff *skb, nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_outbound(skb, ct, ctinfo, + protoff, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -570,11 +572,11 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct, + ret = pptp_outbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct, + ret = pptp_inbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct, ctinfo); pr_debug("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 0dc63854390f..51e928db48c8 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -21,7 +21,6 @@ #include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> -#include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> @@ -294,9 +293,7 @@ void nf_conntrack_l3proto_unregister(struct net *net, nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ - rtnl_lock(); nf_ct_iterate_cleanup(net, kill_l3proto, proto); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -502,9 +499,7 @@ void nf_conntrack_l4proto_unregister(struct net *net, nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); /* Remove all contrack entries for this protocol */ - rtnl_lock(); nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a5ac11ebef33..61f9285111d1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sSS */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, +/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR }, /* * sNO -> sIV Too late and no reason to do anything * sSS -> sIV Client can't send SYN and then SYN/ACK * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open - * sSR -> sIG - * sES -> sIG Error: SYNs in window outside the SYN_SENT state - * are errors. Receiver will reply with RST - * and close the connection. - * Or we are not in sync and hold a dead connection. - * sFW -> sIG - * sCW -> sIG - * sLA -> sIG - * sTW -> sIG - * sCL -> sIG + * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open + * sES -> sIV Invalid SYN/ACK packets sent by the client + * sFW -> sIV + * sCW -> sIV + * sLA -> sIV + * sTW -> sIV + * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, @@ -505,10 +502,10 @@ static inline s16 nat_offset(const struct nf_conn *ct, return get_offset != NULL ? get_offset(ct, dir, seq) : 0; } -#define NAT_OFFSET(pf, ct, dir, seq) \ - (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) +#define NAT_OFFSET(ct, dir, seq) \ + (nat_offset(ct, dir, seq)) #else -#define NAT_OFFSET(pf, ct, dir, seq) 0 +#define NAT_OFFSET(ct, dir, seq) 0 #endif static bool tcp_in_window(const struct nf_conn *ct, @@ -541,7 +538,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ - receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); + receiver_offset = NAT_OFFSET(ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; @@ -633,15 +630,9 @@ static bool tcp_in_window(const struct nf_conn *ct, ack = sack = receiver->td_end; } - if (seq == end - && (!tcph->rst - || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) + if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) /* - * Packets contains no data: we assume it is valid - * and check the ack value only. - * However RST segments are always validated by their - * SEQ number, except when seq == 0 (reset sent answering - * SYN. + * RST sent answering SYN. */ seq = end = sender->td_end; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 758a1bacc126..df8f4f284481 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -52,15 +52,17 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff, - const char **dptr, +unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); -void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly; +void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff, + s16 off) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, + unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen, @@ -69,7 +71,8 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, unsigned int matchlen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff, +unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, @@ -79,7 +82,8 @@ unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff, __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff, +unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, @@ -88,6 +92,7 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff, EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, + unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen, @@ -96,7 +101,8 @@ unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff, +unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, @@ -183,12 +189,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr, return len + digits_len(ct, dptr, limit, shift); } -static int parse_addr(const struct nf_conn *ct, const char *cp, - const char **endp, union nf_inet_addr *addr, - const char *limit) +static int sip_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit, bool delim) { const char *end; - int ret = 0; + int ret; if (!ct) return 0; @@ -197,16 +203,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp, switch (nf_ct_l3num(ct)) { case AF_INET: ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + if (ret == 0) + return 0; break; case AF_INET6: + if (cp < limit && *cp == '[') + cp++; + else if (delim) + return 0; + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + if (ret == 0) + return 0; + + if (end < limit && *end == ']') + end++; + else if (delim) + return 0; break; default: BUG(); } - if (ret == 0 || end == cp) - return 0; if (endp) *endp = end; return 1; @@ -219,7 +237,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, union nf_inet_addr addr; const char *aux = dptr; - if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { + if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) { pr_debug("ip: %s parse failed.!\n", dptr); return 0; } @@ -296,7 +314,7 @@ int ct_sip_parse_request(const struct nf_conn *ct, return 0; dptr += shift; - if (!parse_addr(ct, dptr, &end, addr, limit)) + if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) return -1; if (end < limit && *end == ':') { end++; @@ -550,7 +568,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, if (ret == 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit)) + if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) return -1; if (*c == ':') { c++; @@ -599,7 +617,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, const char *name, unsigned int *matchoff, unsigned int *matchlen, - union nf_inet_addr *addr) + union nf_inet_addr *addr, bool delim) { const char *limit = dptr + datalen; const char *start, *end; @@ -613,7 +631,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, return 0; start += strlen(name); - if (!parse_addr(ct, start, &end, addr, limit)) + if (!sip_parse_addr(ct, start, &end, addr, limit, delim)) return 0; *matchoff = start - dptr; *matchlen = end - start; @@ -675,6 +693,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, return 1; } +static int sdp_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit) +{ + const char *end; + int ret; + + memset(addr, 0, sizeof(*addr)); + switch (nf_ct_l3num(ct)) { + case AF_INET: + ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + break; + case AF_INET6: + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + break; + default: + BUG(); + } + + if (ret == 0) + return 0; + if (endp) + *endp = end; + return 1; +} + +/* skip ip address. returns its length. */ +static int sdp_addr_len(const struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + union nf_inet_addr addr; + const char *aux = dptr; + + if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) { + pr_debug("ip: %s parse failed.!\n", dptr); + return 0; + } + + return dptr - aux; +} + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. @@ -684,13 +743,18 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, * be tolerant and also accept records terminated with a single newline * character". We handle both cases. */ -static const struct sip_header ct_sdp_hdrs[] = { - [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), - [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len), - [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len), - [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len), - [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len), - [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), +static const struct sip_header ct_sdp_hdrs_v4[] = { + [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), + [SDP_HDR_OWNER] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_CONNECTION] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), +}; + +static const struct sip_header ct_sdp_hdrs_v6[] = { + [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), + [SDP_HDR_OWNER] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len), + [SDP_HDR_CONNECTION] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len), + [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), }; /* Linear string search within SDP header values */ @@ -716,11 +780,14 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, enum sdp_header_types term, unsigned int *matchoff, unsigned int *matchlen) { - const struct sip_header *hdr = &ct_sdp_hdrs[type]; - const struct sip_header *thdr = &ct_sdp_hdrs[term]; + const struct sip_header *hdrs, *hdr, *thdr; const char *start = dptr, *limit = dptr + datalen; int shift = 0; + hdrs = nf_ct_l3num(ct) == NFPROTO_IPV4 ? ct_sdp_hdrs_v4 : ct_sdp_hdrs_v6; + hdr = &hdrs[type]; + thdr = &hdrs[term]; + for (dptr += dataoff; dptr < limit; dptr++) { /* Find beginning of line */ if (*dptr != '\r' && *dptr != '\n') @@ -775,8 +842,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, if (ret <= 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, NULL, addr, - dptr + *matchoff + *matchlen)) + if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr, + dptr + *matchoff + *matchlen)) return -1; return 1; } @@ -830,7 +897,8 @@ static void flush_expectations(struct nf_conn *ct, bool media) spin_unlock_bh(&nf_conntrack_lock); } -static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, +static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, union nf_inet_addr *daddr, __be16 port, enum sip_expectation_classes class, @@ -886,12 +954,12 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, exp->class != class) break; #ifdef CONFIG_NF_NAT_NEEDED - if (exp->tuple.src.l3num == AF_INET && !direct_rtp && - (exp->saved_ip != exp->tuple.dst.u3.ip || + if (!direct_rtp && + (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) || exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && ct->status & IPS_NAT_MASK) { - daddr->ip = exp->saved_ip; - tuple.dst.u3.ip = exp->saved_ip; + *daddr = exp->saved_addr; + tuple.dst.u3 = exp->saved_addr; tuple.dst.u.udp.port = exp->saved_proto.udp.port; direct_rtp = 1; } else @@ -907,7 +975,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, if (direct_rtp) { nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, dataoff, dptr, datalen, + !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } @@ -929,7 +997,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen, + ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { @@ -970,7 +1038,8 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr, return NULL; } -static int process_sdp(struct sk_buff *skb, unsigned int dataoff, +static int process_sdp(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -983,15 +1052,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; unsigned int port; - enum sdp_header_types c_hdr; const struct sdp_media_type *t; int ret = NF_ACCEPT; typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); - c_hdr = nf_ct_l3num(ct) == AF_INET ? SDP_HDR_CONNECTION_IP4 : - SDP_HDR_CONNECTION_IP6; /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, @@ -1005,7 +1071,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, * the end of the session description. */ caddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, - c_hdr, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &matchoff, &matchlen, &caddr) > 0) caddr_len = matchlen; @@ -1035,7 +1101,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, /* The media description overrides the session description. */ maddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen, - c_hdr, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &matchoff, &matchlen, &maddr) > 0) { maddr_len = matchlen; memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); @@ -1044,7 +1110,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, else return NF_DROP; - ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen, + ret = set_expected_rtp_rtcp(skb, protoff, dataoff, + dptr, datalen, &rtp_addr, htons(port), t->class, mediaoff, medialen); if (ret != NF_ACCEPT) @@ -1052,8 +1119,9 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, /* Update media connection address if present */ if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen, - mediaoff, c_hdr, SDP_HDR_MEDIA, + ret = nf_nat_sdp_addr(skb, protoff, dataoff, + dptr, datalen, mediaoff, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) return ret; @@ -1064,12 +1132,13 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, /* Update session connection and owner addresses */ nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff, - &rtp_addr); + ret = nf_nat_sdp_session(skb, protoff, dataoff, + dptr, datalen, sdpoff, &rtp_addr); return ret; } -static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, +static int process_invite_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1079,13 +1148,14 @@ static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dataoff, dptr, datalen, cseq); + return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_update_response(struct sk_buff *skb, unsigned int dataoff, +static int process_update_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1095,13 +1165,14 @@ static int process_update_response(struct sk_buff *skb, unsigned int dataoff, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dataoff, dptr, datalen, cseq); + return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, +static int process_prack_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1111,13 +1182,14 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dataoff, dptr, datalen, cseq); + return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, +static int process_invite_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1127,13 +1199,14 @@ static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, unsigned int ret; flush_expectations(ct, true); - ret = process_sdp(skb, dataoff, dptr, datalen, cseq); + ret = process_sdp(skb, protoff, dataoff, dptr, datalen, cseq); if (ret == NF_ACCEPT) ct_sip_info->invite_cseq = cseq; return ret; } -static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, +static int process_bye_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1148,7 +1221,8 @@ static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, * signalling connections. The expectation is marked inactive and is activated * when receiving a response indicating success from the registrar. */ -static int process_register_request(struct sk_buff *skb, unsigned int dataoff, +static int process_register_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1223,8 +1297,8 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp, - matchoff, matchlen); + ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, + exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -1239,7 +1313,8 @@ store_cseq: return ret; } -static int process_register_response(struct sk_buff *skb, unsigned int dataoff, +static int process_register_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1321,7 +1396,8 @@ static const struct sip_handler sip_handlers[] = { SIP_HANDLER("REGISTER", process_register_request, process_register_response), }; -static int process_sip_response(struct sk_buff *skb, unsigned int dataoff, +static int process_sip_response(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -1352,13 +1428,14 @@ static int process_sip_response(struct sk_buff *skb, unsigned int dataoff, if (*datalen < matchend + handler->len || strnicmp(*dptr + matchend, handler->method, handler->len)) continue; - return handler->response(skb, dataoff, dptr, datalen, + return handler->response(skb, protoff, dataoff, dptr, datalen, cseq, code); } return NF_ACCEPT; } -static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, +static int process_sip_request(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -1383,26 +1460,28 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, if (!cseq) return NF_DROP; - return handler->request(skb, dataoff, dptr, datalen, cseq); + return handler->request(skb, protoff, dataoff, dptr, datalen, + cseq); } return NF_ACCEPT; } static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, - unsigned int dataoff, const char **dptr, - unsigned int *datalen) + unsigned int protoff, unsigned int dataoff, + const char **dptr, unsigned int *datalen) { typeof(nf_nat_sip_hook) nf_nat_sip; int ret; if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) - ret = process_sip_request(skb, dataoff, dptr, datalen); + ret = process_sip_request(skb, protoff, dataoff, dptr, datalen); else - ret = process_sip_response(skb, dataoff, dptr, datalen); + ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen)) + if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, + dptr, datalen)) ret = NF_DROP; } @@ -1470,7 +1549,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, if (msglen > datalen) return NF_DROP; - ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen); + ret = process_sip_msg(skb, ct, protoff, dataoff, + &dptr, &msglen); if (ret != NF_ACCEPT) break; diff = msglen - origlen; @@ -1484,7 +1564,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); if (nf_nat_sip_seq_adjust) - nf_nat_sip_seq_adjust(skb, tdiff); + nf_nat_sip_seq_adjust(skb, protoff, tdiff); } return ret; @@ -1511,11 +1591,10 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, if (datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - return process_sip_msg(skb, ct, dataoff, &dptr, &datalen); + return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen); } static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; -static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1585,9 +1664,9 @@ static int __init nf_conntrack_sip_init(void) sip[i][j].me = THIS_MODULE; if (ports[i] == SIP_PORT) - sprintf(sip_names[i][j], "sip"); + sprintf(sip[i][j].name, "sip"); else - sprintf(sip_names[i][j], "sip-%u", i); + sprintf(sip[i][j].name, "sip-%u", i); pr_debug("port #%u: %u\n", i, ports[i]); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 770f76432ad0..3deec997be89 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -18,13 +18,13 @@ extern unsigned int nf_iterate(struct list_head *head, unsigned int hook, const struct net_device *indev, const struct net_device *outdev, - struct list_head **i, + struct nf_hook_ops **elemp, int (*okfn)(struct sk_buff *), int hook_thresh); /* nf_queue.c */ extern int nf_queue(struct sk_buff *skb, - struct list_head *elem, + struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 3c04d24e2976..42d337881171 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -16,7 +16,6 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <linux/netfilter/nf_conntrack_amanda.h> MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); @@ -26,6 +25,7 @@ MODULE_ALIAS("ip_nat_amanda"); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) @@ -61,7 +61,7 @@ static unsigned int help(struct sk_buff *skb, sprintf(buffer, "%u", port); ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, - matchoff, matchlen, + protoff, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 44b082fd48ab..5f2f9109f461 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1,7 +1,7 @@ -/* NAT for netfilter; shared with compatibility layer. */ - -/* (C) 1999-2001 Paul `Rusty' Russell +/* + * (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,38 +13,105 @@ #include <linux/timer.h> #include <linux/skbuff.h> #include <linux/gfp.h> -#include <net/checksum.h> -#include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> /* For tcp_prot in getorigdst */ -#include <linux/icmp.h> -#include <linux/udp.h> +#include <net/xfrm.h> #include <linux/jhash.h> +#include <linux/rtnetlink.h> -#include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_zones.h> +#include <linux/netfilter/nf_nat.h> static DEFINE_SPINLOCK(nf_nat_lock); -static struct nf_conntrack_l3proto *l3proto __read_mostly; - -#define MAX_IP_NAT_PROTO 256 -static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] +static DEFINE_MUTEX(nf_nat_proto_mutex); +static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] + __read_mostly; +static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; -static inline const struct nf_nat_protocol * -__nf_nat_proto_find(u_int8_t protonum) + +inline const struct nf_nat_l3proto * +__nf_nat_l3proto_find(u8 family) +{ + return rcu_dereference(nf_nat_l3protos[family]); +} + +inline const struct nf_nat_l4proto * +__nf_nat_l4proto_find(u8 family, u8 protonum) +{ + return rcu_dereference(nf_nat_l4protos[family][protonum]); +} +EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find); + +#ifdef CONFIG_XFRM +static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + u8 family; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return; + + family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(family); + if (l3proto == NULL) + goto out; + + dir = CTINFO2DIR(ctinfo); + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + l3proto->decode_session(skb, ct, dir, statusbit, fl); +out: + rcu_read_unlock(); +} + +int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) { - return rcu_dereference(nf_nat_protos[protonum]); + struct flowi fl; + unsigned int hh_len; + struct dst_entry *dst; + + if (xfrm_decode_session(skb, &fl, family) < 0) + return -1; + + dst = skb_dst(skb); + if (dst->xfrm) + dst = ((struct xfrm_dst *)dst)->route; + dst_hold(dst); + + dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); + if (IS_ERR(dst)) + return -1; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + /* Change in oif may mean change in hh_len. */ + hh_len = skb_dst(skb)->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; + return 0; } +EXPORT_SYMBOL(nf_xfrm_me_harder); +#endif /* CONFIG_XFRM */ /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -54,10 +121,9 @@ hash_by_src(const struct net *net, u16 zone, unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ - hash = jhash_3words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->src.u.all ^ zone, - tuple->dst.protonum, nf_conntrack_hash_rnd); - return ((u64)hash * net->ipv4.nat_htable_size) >> 32; + hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), + tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); + return ((u64)hash * net->ct.nat_htable_size) >> 32; } /* Is this tuple already taken? (not by us) */ @@ -66,10 +132,11 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { /* Conntrack tracking doesn't keep track of outgoing tuples; only - incoming ones. NAT means they don't have a fixed mapping, - so we invert the tuple and look for the incoming reply. - - We could keep a separate hash if this proves too slow. */ + * incoming ones. NAT means they don't have a fixed mapping, + * so we invert the tuple and look for the incoming reply. + * + * We could keep a separate hash if this proves too slow. + */ struct nf_conntrack_tuple reply; nf_ct_invert_tuplepr(&reply, tuple); @@ -78,31 +145,26 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, EXPORT_SYMBOL(nf_nat_used_tuple); /* If we source map this tuple so reply looks like reply_tuple, will - * that meet the constraints of range. */ -static int -in_range(const struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range) + * that meet the constraints of range. + */ +static int in_range(const struct nf_nat_l3proto *l3proto, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range) { - const struct nf_nat_protocol *proto; - int ret = 0; - /* If we are supposed to map IPs, then we must be in the - range specified, otherwise let this drag us onto a new src IP. */ - if (range->flags & NF_NAT_RANGE_MAP_IPS) { - if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || - ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) - return 0; - } + * range specified, otherwise let this drag us onto a new src IP. + */ + if (range->flags & NF_NAT_RANGE_MAP_IPS && + !l3proto->in_range(tuple, range)) + return 0; - rcu_read_lock(); - proto = __nf_nat_proto_find(tuple->dst.protonum); if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || - proto->in_range(tuple, NF_NAT_MANIP_SRC, - &range->min, &range->max)) - ret = 1; - rcu_read_unlock(); + l4proto->in_range(tuple, NF_NAT_MANIP_SRC, + &range->min_proto, &range->max_proto)) + return 1; - return ret; + return 0; } static inline int @@ -113,24 +175,25 @@ same_src(const struct nf_conn *ct, t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; return (t->dst.protonum == tuple->dst.protonum && - t->src.u3.ip == tuple->src.u3.ip && + nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) && t->src.u.all == tuple->src.u.all); } /* Only called for SRC manip */ static int find_appropriate_src(struct net *net, u16 zone, + const struct nf_nat_l3proto *l3proto, + const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, - const struct nf_nat_ipv4_range *range) + const struct nf_nat_range *range) { unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; - rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ @@ -138,119 +201,148 @@ find_appropriate_src(struct net *net, u16 zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; - if (in_range(result, range)) { - rcu_read_unlock(); + if (in_range(l3proto, l4proto, result, range)) return 1; - } } } - rcu_read_unlock(); return 0; } /* For [FUTURE] fragmentation handling, we want the least-used - src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus - if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports - 1-65535, we don't do pro-rata allocation based on ports; we choose - the ip with the lowest src-ip/dst-ip/proto usage. -*/ + * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + * 1-65535, we don't do pro-rata allocation based on ports; we choose + * the ip with the lowest src-ip/dst-ip/proto usage. + */ static void find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, + const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) { - __be32 *var_ipp; + union nf_inet_addr *var_ipp; + unsigned int i, max; /* Host order */ - u_int32_t minip, maxip, j; + u32 minip, maxip, j, dist; + bool full_range; /* No IP mapping? Do nothing. */ if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) return; if (maniptype == NF_NAT_MANIP_SRC) - var_ipp = &tuple->src.u3.ip; + var_ipp = &tuple->src.u3; else - var_ipp = &tuple->dst.u3.ip; + var_ipp = &tuple->dst.u3; /* Fast path: only one choice. */ - if (range->min_ip == range->max_ip) { - *var_ipp = range->min_ip; + if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) { + *var_ipp = range->min_addr; return; } + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + max = sizeof(var_ipp->ip) / sizeof(u32) - 1; + else + max = sizeof(var_ipp->ip6) / sizeof(u32) - 1; + /* Hashing source and destination IPs gives a fairly even * spread in practice (if there are a small number of IPs * involved, there usually aren't that many connections * anyway). The consistency means that servers see the same * client coming from the same IP (some Internet Banking sites - * like this), even across reboots. */ - minip = ntohl(range->min_ip); - maxip = ntohl(range->max_ip); - j = jhash_2words((__force u32)tuple->src.u3.ip, - range->flags & NF_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); - j = ((u64)j * (maxip - minip + 1)) >> 32; - *var_ipp = htonl(minip + j); + * like this), even across reboots. + */ + j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), + range->flags & NF_NAT_RANGE_PERSISTENT ? + 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); + + full_range = false; + for (i = 0; i <= max; i++) { + /* If first bytes of the address are at the maximum, use the + * distance. Otherwise use the full range. + */ + if (!full_range) { + minip = ntohl((__force __be32)range->min_addr.all[i]); + maxip = ntohl((__force __be32)range->max_addr.all[i]); + dist = maxip - minip + 1; + } else { + minip = 0; + dist = ~0; + } + + var_ipp->all[i] = (__force __u32) + htonl(minip + (((u64)j * dist) >> 32)); + if (var_ipp->all[i] != range->max_addr.all[i]) + full_range = true; + + if (!(range->flags & NF_NAT_RANGE_PERSISTENT)) + j ^= (__force u32)tuple->dst.u3.all[i]; + } } -/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, - * we change the source to map into the range. For NF_INET_PRE_ROUTING +/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, + * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the - * range. It might not be possible to get a unique tuple, but we try. + * range. It might not be possible to get a unique tuple, but we try. * At worst (or if we race), we will end up with a final duplicate in * __ip_conntrack_confirm and drop the packet. */ static void get_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig_tuple, - const struct nf_nat_ipv4_range *range, + const struct nf_nat_range *range, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + const struct nf_nat_l3proto *l3proto; + const struct nf_nat_l4proto *l4proto; struct net *net = nf_ct_net(ct); - const struct nf_nat_protocol *proto; u16 zone = nf_ct_zone(ct); - /* 1) If this srcip/proto/src-proto-part is currently mapped, - and that same mapping gives a unique tuple within the given - range, use that. + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); + l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num, + orig_tuple->dst.protonum); - This is only required for source (ie. NAT/masq) mappings. - So far, we don't do local source mappings, so multiple - manips not an issue. */ + /* 1) If this srcip/proto/src-proto-part is currently mapped, + * and that same mapping gives a unique tuple within the given + * range, use that. + * + * This is only required for source (ie. NAT/masq) mappings. + * So far, we don't do local source mappings, so multiple + * manips not an issue. + */ if (maniptype == NF_NAT_MANIP_SRC && !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { /* try the original tuple first */ - if (in_range(orig_tuple, range)) { + if (in_range(l3proto, l4proto, orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { *tuple = *orig_tuple; - return; + goto out; } - } else if (find_appropriate_src(net, zone, orig_tuple, tuple, - range)) { + } else if (find_appropriate_src(net, zone, l3proto, l4proto, + orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) - return; + goto out; } } - /* 2) Select the least-used IP/proto combination in the given - range. */ + /* 2) Select the least-used IP/proto combination in the given range */ *tuple = *orig_tuple; find_best_ips_proto(zone, tuple, range, ct, maniptype); /* 3) The per-protocol part of the manip is made to map into - the range to make a unique tuple. */ - - rcu_read_lock(); - proto = __nf_nat_proto_find(orig_tuple->dst.protonum); + * the range to make a unique tuple. + */ /* Only bother mapping if it's not already in range and unique */ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { - if (proto->in_range(tuple, maniptype, &range->min, - &range->max) && - (range->min.all == range->max.all || + if (l4proto->in_range(tuple, maniptype, + &range->min_proto, + &range->max_proto) && + (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) goto out; } else if (!nf_nat_used_tuple(tuple, ct)) { @@ -259,14 +351,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, } /* Last change: get protocol to try to obtain unique tuple. */ - proto->unique_tuple(tuple, range, maniptype, ct); + l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); out: rcu_read_unlock(); } unsigned int nf_nat_setup_info(struct nf_conn *ct, - const struct nf_nat_ipv4_range *range, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { struct net *net = nf_ct_net(ct); @@ -288,10 +380,10 @@ nf_nat_setup_info(struct nf_conn *ct, BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally - this is what is in the conntrack, except for prior - manipulations (future optimization: if num_manips == 0, - orig_tp = - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ + * this is what is in the conntrack, except for prior + * manipulations (future optimization: if num_manips == 0, + * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) + */ nf_ct_invert_tuplepr(&curr_tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -317,11 +409,11 @@ nf_nat_setup_info(struct nf_conn *ct, srchash = hash_by_src(net, nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); - /* nf_conntrack_alter_reply might re-allocate extension area */ + /* nf_conntrack_alter_reply might re-allocate extension aera */ nat = nfct_nat(ct); nat->ct = ct; hlist_add_head_rcu(&nat->bysource, - &net->ipv4.nat_bysource[srchash]); + &net->ct.nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -335,47 +427,14 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); -/* Returns true if succeeded. */ -static bool -manip_pkt(u_int16_t proto, - struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype) -{ - struct iphdr *iph; - const struct nf_nat_protocol *p; - - if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) - return false; - - iph = (void *)skb->data + iphdroff; - - /* Manipulate protcol part. */ - - /* rcu_read_lock()ed by nf_hook_slow */ - p = __nf_nat_proto_find(proto); - if (!p->manip_pkt(skb, iphdroff, target, maniptype)) - return false; - - iph = (void *)skb->data + iphdroff; - - if (maniptype == NF_NAT_MANIP_SRC) { - csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); - iph->saddr = target->src.u3.ip; - } else { - csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); - iph->daddr = target->dst.u3.ip; - } - return true; -} - /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb) { + const struct nf_nat_l3proto *l3proto; + const struct nf_nat_l4proto *l4proto; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); @@ -396,129 +455,176 @@ unsigned int nf_nat_packet(struct nf_conn *ct, /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) + l3proto = __nf_nat_l3proto_find(target.src.l3num); + l4proto = __nf_nat_l4proto_find(target.src.l3num, + target.dst.protonum); + if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) return NF_DROP; } return NF_ACCEPT; } EXPORT_SYMBOL_GPL(nf_nat_packet); -/* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int nf_nat_icmp_reply_translation(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff *skb) +struct nf_nat_proto_clean { + u8 l3proto; + u8 l4proto; + bool hash; +}; + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int nf_nat_proto_clean(struct nf_conn *i, void *data) { - struct { - struct icmphdr icmp; - struct iphdr ip; - } *inside; - struct nf_conntrack_tuple target; - int hdrlen = ip_hdrlen(skb); - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned long statusbit; - enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + const struct nf_nat_proto_clean *clean = data; + struct nf_conn_nat *nat = nfct_nat(i); - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + if (!nat) return 0; - - inside = (void *)skb->data + hdrlen; - - /* We're actually going to mangle it beyond trivial checksum - adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + if (!(i->status & IPS_SRC_NAT_DONE)) + return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || + (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; - /* Must be RELATED */ - NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED_REPLY); - - /* Redirects on non-null nats must be dropped, else they'll - start talking to each other without our translation, and be - confused... --RR */ - if (inside->icmp.type == ICMP_REDIRECT) { - /* If NAT isn't finished, assume it and drop. */ - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - - if (ct->status & IPS_NAT_MASK) - return 0; + if (clean->hash) { + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + spin_unlock_bh(&nf_nat_lock); + } else { + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | + IPS_SEQ_ADJUST); } + return 0; +} - if (manip == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply dir. */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - if (!(ct->status & statusbit)) - return 1; - - pr_debug("icmp_reply_translation: translating error %p manip %u " - "dir %s\n", skb, manip, - dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); - - /* Change inner back to look like incoming packet. We do the - opposite manip on this hook to normal, because it might not - pass all hooks (locally-generated ICMP). Consider incoming - packet: PREROUTING (DST manip), routing produces ICMP, goes - through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, !manip)) - return 0; +static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) +{ + struct nf_nat_proto_clean clean = { + .l3proto = l3proto, + .l4proto = l4proto, + }; + struct net *net; + + rtnl_lock(); + /* Step 1 - remove from bysource hash */ + clean.hash = true; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + synchronize_rcu(); - if (skb->ip_summed != CHECKSUM_PARTIAL) { - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)skb->data + hdrlen; - inside->icmp.checksum = 0; - inside->icmp.checksum = - csum_fold(skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); - } + /* Step 2 - clean NAT section */ + clean.hash = false; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + rtnl_unlock(); +} - /* Change outer to look the reply to an incoming packet - * (proto 0 means don't invert per-proto part). */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, skb, 0, &target, manip)) - return 0; +static void nf_nat_l3proto_clean(u8 l3proto) +{ + struct nf_nat_proto_clean clean = { + .l3proto = l3proto, + }; + struct net *net; + + rtnl_lock(); + /* Step 1 - remove from bysource hash */ + clean.hash = true; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + synchronize_rcu(); - return 1; + /* Step 2 - clean NAT section */ + clean.hash = false; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + rtnl_unlock(); } -EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); /* Protocol registration. */ -int nf_nat_protocol_register(const struct nf_nat_protocol *proto) +int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) { + const struct nf_nat_l4proto **l4protos; + unsigned int i; int ret = 0; - spin_lock_bh(&nf_nat_lock); + mutex_lock(&nf_nat_proto_mutex); + if (nf_nat_l4protos[l3proto] == NULL) { + l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), + GFP_KERNEL); + if (l4protos == NULL) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < IPPROTO_MAX; i++) + RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); + + /* Before making proto_array visible to lockless readers, + * we must make sure its content is committed to memory. + */ + smp_wmb(); + + nf_nat_l4protos[l3proto] = l4protos; + } + if (rcu_dereference_protected( - nf_nat_protos[proto->protonum], - lockdep_is_held(&nf_nat_lock) - ) != &nf_nat_unknown_protocol) { + nf_nat_l4protos[l3proto][l4proto->l4proto], + lockdep_is_held(&nf_nat_proto_mutex) + ) != &nf_nat_l4proto_unknown) { ret = -EBUSY; goto out; } - RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); out: - spin_unlock_bh(&nf_nat_lock); + mutex_unlock(&nf_nat_proto_mutex); return ret; } -EXPORT_SYMBOL(nf_nat_protocol_register); +EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); /* No one stores the protocol anywhere; simply delete it. */ -void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) +void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) { - spin_lock_bh(&nf_nat_lock); - RCU_INIT_POINTER(nf_nat_protos[proto->protonum], - &nf_nat_unknown_protocol); - spin_unlock_bh(&nf_nat_lock); + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], + &nf_nat_l4proto_unknown); + mutex_unlock(&nf_nat_proto_mutex); synchronize_rcu(); + + nf_nat_l4proto_clean(l3proto, l4proto->l4proto); +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister); + +int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) +{ + int err; + + err = nf_ct_l3proto_try_module_get(l3proto->l3proto); + if (err < 0) + return err; + + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], + &nf_nat_l4proto_tcp); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], + &nf_nat_l4proto_udp); + mutex_unlock(&nf_nat_proto_mutex); + + RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); + return 0; +} +EXPORT_SYMBOL_GPL(nf_nat_l3proto_register); + +void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto) +{ + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL); + mutex_unlock(&nf_nat_proto_mutex); + synchronize_rcu(); + + nf_nat_l3proto_clean(l3proto->l3proto); + nf_ct_l3proto_module_put(l3proto->l3proto); } -EXPORT_SYMBOL(nf_nat_protocol_unregister); +EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); /* No one using conntrack by the time this called. */ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) @@ -570,34 +676,36 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_conn *ct, - struct nf_nat_ipv4_range *range) + struct nf_nat_range *range) { struct nlattr *tb[CTA_PROTONAT_MAX+1]; - const struct nf_nat_protocol *npt; + const struct nf_nat_l4proto *l4proto; int err; err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); if (err < 0) return err; - rcu_read_lock(); - npt = __nf_nat_proto_find(nf_ct_protonum(ct)); - if (npt->nlattr_to_range) - err = npt->nlattr_to_range(tb, range); - rcu_read_unlock(); + l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->nlattr_to_range) + err = l4proto->nlattr_to_range(tb, range); + return err; } static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { - [CTA_NAT_MINIP] = { .type = NLA_U32 }, - [CTA_NAT_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_V6_MINIP] = { .len = sizeof(struct in6_addr) }, + [CTA_NAT_V6_MAXIP] = { .len = sizeof(struct in6_addr) }, [CTA_NAT_PROTO] = { .type = NLA_NESTED }, }; static int nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_ipv4_range *range) + const struct nf_conn *ct, struct nf_nat_range *range) { + const struct nf_nat_l3proto *l3proto; struct nlattr *tb[CTA_NAT_MAX+1]; int err; @@ -607,25 +715,23 @@ nfnetlink_parse_nat(const struct nlattr *nat, if (err < 0) return err; - if (tb[CTA_NAT_MINIP]) - range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); - - if (!tb[CTA_NAT_MAXIP]) - range->max_ip = range->min_ip; - else - range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); - - if (range->min_ip) - range->flags |= NF_NAT_RANGE_MAP_IPS; + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + if (l3proto == NULL) { + err = -EAGAIN; + goto out; + } + err = l3proto->nlattr_to_range(tb, range); + if (err < 0) + goto out; if (!tb[CTA_NAT_PROTO]) - return 0; + goto out; err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); - if (err < 0) - return err; - - return 0; +out: + rcu_read_unlock(); + return err; } static int @@ -633,10 +739,12 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; + int err; - if (nfnetlink_parse_nat(attr, ct, &range) < 0) - return -EINVAL; + err = nfnetlink_parse_nat(attr, ct, &range); + if (err < 0) + return err; if (nf_nat_initialized(ct, manip)) return -EEXIST; @@ -655,30 +763,20 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int __net_init nf_nat_net_init(struct net *net) { /* Leave them the same for the moment. */ - net->ipv4.nat_htable_size = net->ct.htable_size; - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); - if (!net->ipv4.nat_bysource) + net->ct.nat_htable_size = net->ct.htable_size; + net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0); + if (!net->ct.nat_bysource) return -ENOMEM; return 0; } -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - static void __net_exit nf_nat_net_exit(struct net *net) { - nf_ct_iterate_cleanup(net, &clean_nat, NULL); + struct nf_nat_proto_clean clean = {}; + + nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); synchronize_rcu(); - nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); + nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { @@ -697,11 +795,8 @@ static struct nfq_ct_nat_hook nfq_ct_nat = { static int __init nf_nat_init(void) { - size_t i; int ret; - need_ipv4_conntrack(); - ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); @@ -712,22 +807,11 @@ static int __init nf_nat_init(void) if (ret < 0) goto cleanup_extend; - /* Sew in builtin protocols. */ - spin_lock_bh(&nf_nat_lock); - for (i = 0; i < MAX_IP_NAT_PROTO; i++) - RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); - spin_unlock_bh(&nf_nat_lock); + nf_ct_helper_expectfn_register(&follow_master_nat); /* Initialize fake conntrack so that NAT will skip it */ nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); - - nf_ct_helper_expectfn_register(&follow_master_nat); - BUG_ON(nf_nat_seq_adjust_hook != NULL); RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); @@ -736,6 +820,10 @@ static int __init nf_nat_init(void) BUG_ON(nf_ct_nat_offset != NULL); RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); +#ifdef CONFIG_XFRM + BUG_ON(nf_nat_decode_session_hook != NULL); + RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); +#endif return 0; cleanup_extend: @@ -745,19 +833,24 @@ static int __init nf_nat_init(void) static void __exit nf_nat_cleanup(void) { + unsigned int i; + unregister_pernet_subsys(&nf_nat_net_ops); - nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); nf_ct_helper_expectfn_unregister(&follow_master_nat); RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); RCU_INIT_POINTER(nf_ct_nat_offset, NULL); RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); +#ifdef CONFIG_XFRM + RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); +#endif + for (i = 0; i < NFPROTO_NUMPROTO; i++) + kfree(nf_nat_l4protos[i]); synchronize_net(); } MODULE_LICENSE("GPL"); -MODULE_ALIAS("nf-nat-ipv4"); module_init(nf_nat_init); module_exit(nf_nat_cleanup); diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index e462a957d080..e839b97b2863 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -10,12 +10,11 @@ #include <linux/module.h> #include <linux/moduleparam.h> -#include <linux/ip.h> +#include <linux/inet.h> #include <linux/tcp.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_ftp.h> @@ -27,22 +26,27 @@ MODULE_ALIAS("ip_nat_ftp"); /* FIXME: Time out? --RR */ -static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, +static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type, char *buffer, size_t buflen, - __be32 addr, u16 port) + union nf_inet_addr *addr, u16 port) { switch (type) { case NF_CT_FTP_PORT: case NF_CT_FTP_PASV: return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u", - ((unsigned char *)&addr)[0], - ((unsigned char *)&addr)[1], - ((unsigned char *)&addr)[2], - ((unsigned char *)&addr)[3], + ((unsigned char *)&addr->ip)[0], + ((unsigned char *)&addr->ip)[1], + ((unsigned char *)&addr->ip)[2], + ((unsigned char *)&addr->ip)[3], port >> 8, port & 0xFF); case NF_CT_FTP_EPRT: - return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port); + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + return snprintf(buffer, buflen, "|1|%pI4|%u|", + &addr->ip, port); + else + return snprintf(buffer, buflen, "|2|%pI6|%u|", + &addr->ip6, port); case NF_CT_FTP_EPSV: return snprintf(buffer, buflen, "|||%u|", port); } @@ -55,21 +59,22 @@ static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, static unsigned int nf_nat_ftp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) { - __be32 newip; + union nf_inet_addr newaddr; u_int16_t port; int dir = CTINFO2DIR(ctinfo); struct nf_conn *ct = exp->master; - char buffer[sizeof("|1|255.255.255.255|65535|")]; + char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN]; unsigned int buflen; pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); /* Connection will come from wherever this packet goes, hence !dir */ - newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + newaddr = ct->tuplehash[!dir].tuple.dst.u3; exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->dir = !dir; @@ -94,13 +99,14 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, if (port == 0) return NF_DROP; - buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port); + buflen = nf_nat_ftp_fmt_cmd(ct, type, buffer, sizeof(buffer), + &newaddr, port); if (!buflen) goto out; pr_debug("calling nf_nat_mangle_tcp_packet\n"); - if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, matchlen, buffer, buflen)) goto out; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 2e59ad0b90ca..23c2b38676a6 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -1,4 +1,4 @@ -/* ip_nat_helper.c - generic support functions for NAT helpers +/* nf_nat_helper.c - generic support functions for NAT helpers * * (C) 2000-2002 Harald Welte <laforge@netfilter.org> * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> @@ -9,23 +9,19 @@ */ #include <linux/module.h> #include <linux/gfp.h> -#include <linux/kmod.h> #include <linux/types.h> -#include <linux/timer.h> #include <linux/skbuff.h> #include <linux/tcp.h> #include <linux/udp.h> -#include <net/checksum.h> #include <net/tcp.h> -#include <net/route.h> -#include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> @@ -90,7 +86,6 @@ s16 nf_nat_get_offset(const struct nf_conn *ct, return offset; } -EXPORT_SYMBOL_GPL(nf_nat_get_offset); /* Frobs data inside this packet, which is linear. */ static void mangle_contents(struct sk_buff *skb, @@ -125,9 +120,13 @@ static void mangle_contents(struct sk_buff *skb, __skb_trim(skb, skb->len + rep_len - match_len); } - /* fix IP hdr checksum information */ - ip_hdr(skb)->tot_len = htons(skb->len); - ip_send_check(ip_hdr(skb)); + if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) { + /* fix IP hdr checksum information */ + ip_hdr(skb)->tot_len = htons(skb->len); + ip_send_check(ip_hdr(skb)); + } else + ipv6_hdr(skb)->payload_len = + htons(skb->len - sizeof(struct ipv6hdr)); } /* Unusual, but possible case. */ @@ -166,35 +165,6 @@ void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, } EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); -static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, - int datalen, __sum16 *check, int oldlen) -{ - struct rtable *rt = skb_rtable(skb); - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(rt->rt_flags & RTCF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + - skb_network_offset(skb) + - iph->ihl * 4; - skb->csum_offset = (void *)check - data; - *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, iph->protocol, 0); - } else { - *check = 0; - *check = csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, iph->protocol, - csum_partial(data, datalen, - 0)); - if (iph->protocol == IPPROTO_UDP && !*check) - *check = CSUM_MANGLED_0; - } - } else - inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); -} - /* Generic function for mangling variable-length address changes inside * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX * command in FTP). @@ -206,12 +176,13 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, unsigned int rep_len, bool adjust) { - struct iphdr *iph; + const struct nf_nat_l3proto *l3proto; struct tcphdr *tcph; int oldlen, datalen; @@ -225,15 +196,17 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, SKB_LINEAR_ASSERT(skb); - iph = ip_hdr(skb); - tcph = (void *)iph + iph->ihl*4; + tcph = (void *)skb->data + protoff; - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + tcph->doff*4, + oldlen = skb->len - protoff; + mangle_contents(skb, protoff + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); - datalen = skb->len - iph->ihl*4; - nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen); + datalen = skb->len - protoff; + + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check, + datalen, oldlen); if (adjust && rep_len != match_len) nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, @@ -257,12 +230,13 @@ int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, unsigned int rep_len) { - struct iphdr *iph; + const struct nf_nat_l3proto *l3proto; struct udphdr *udph; int datalen, oldlen; @@ -274,22 +248,23 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, !enlarge_skb(skb, rep_len - match_len)) return 0; - iph = ip_hdr(skb); - udph = (void *)iph + iph->ihl*4; + udph = (void *)skb->data + protoff; - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + sizeof(*udph), + oldlen = skb->len - protoff; + mangle_contents(skb, protoff + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - datalen = skb->len - iph->ihl*4; + datalen = skb->len - protoff; udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) return 1; - nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen); + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check, + datalen, oldlen); return 1; } @@ -341,6 +316,7 @@ sack_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static inline unsigned int nf_nat_sack_adjust(struct sk_buff *skb, + unsigned int protoff, struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -348,8 +324,8 @@ nf_nat_sack_adjust(struct sk_buff *skb, unsigned int dir, optoff, optend; struct nf_conn_nat *nat = nfct_nat(ct); - optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); - optend = ip_hdrlen(skb) + tcph->doff * 4; + optoff = protoff + sizeof(struct tcphdr); + optend = protoff + tcph->doff * 4; if (!skb_make_writable(skb, optend)) return 0; @@ -387,7 +363,8 @@ nf_nat_sack_adjust(struct sk_buff *skb, int nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) + enum ip_conntrack_info ctinfo, + unsigned int protoff) { struct tcphdr *tcph; int dir; @@ -401,10 +378,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, this_way = &nat->seq[dir]; other_way = &nat->seq[!dir]; - if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) + if (!skb_make_writable(skb, protoff + sizeof(*tcph))) return 0; - tcph = (void *)skb->data + ip_hdrlen(skb); + tcph = (void *)skb->data + protoff; if (after(ntohl(tcph->seq), this_way->correction_pos)) seqoff = this_way->offset_after; else @@ -429,7 +406,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph->seq = newseq; tcph->ack_seq = newack; - return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); + return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); } /* Setup NAT on this expected conntrack so it follows master. */ @@ -437,22 +414,22 @@ nf_nat_seq_adjust(struct sk_buff *skb, void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; + range.min_proto = range.max_proto = exp->saved_proto; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 979ae165f4ef..1fedee6e7fb6 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -17,7 +17,6 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_irc.h> @@ -29,12 +28,12 @@ MODULE_ALIAS("ip_nat_irc"); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp) { char buffer[sizeof("4294967296 65635")]; - u_int32_t ip; u_int16_t port; unsigned int ret; @@ -60,13 +59,8 @@ static unsigned int help(struct sk_buff *skb, if (port == 0) return NF_DROP; - ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); - sprintf(buffer, "%u %u", ip, port); - pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", - buffer, &ip, port); - ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, - matchoff, matchlen, buffer, + protoff, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index 9993bc93e102..9baaf734c142 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -9,20 +9,18 @@ #include <linux/types.h> #include <linux/random.h> -#include <linux/ip.h> - #include <linux/netfilter.h> #include <linux/export.h> -#include <net/secure_seq.h> + #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_rule.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> -bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) +bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) { __be16 port; @@ -34,13 +32,14 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, return ntohs(port) >= ntohs(min->all) && ntohs(port) <= ntohs(max->all); } -EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); +EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range); -void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, - u_int16_t *rover) +void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct, + u16 *rover) { unsigned int range_size, min, i; __be16 *portptr; @@ -71,15 +70,14 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = 65535 - 1024 + 1; } } else { - min = ntohs(range->min.all); - range_size = ntohs(range->max.all) - min + 1; + min = ntohs(range->min_proto.all); + range_size = ntohs(range->max_proto.all) - min + 1; } if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) - off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, - maniptype == NF_NAT_MANIP_SRC - ? tuple->dst.u.all - : tuple->src.u.all); + off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); else off = *rover; @@ -93,22 +91,22 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, } return; } -EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); +EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) -int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_ipv4_range *range) +int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) { if (tb[CTA_PROTONAT_PORT_MIN]) { - range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); - range->max.all = range->min.tcp.port; + range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); + range->max_proto.all = range->min_proto.all; range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[CTA_PROTONAT_PORT_MAX]) { - range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); + range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } return 0; } -EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); +EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range); #endif diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index 3f67138d187c..c8be2cdac0bf 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -1,7 +1,7 @@ /* * DCCP NAT protocol helper * - * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,35 +13,34 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/skbuff.h> -#include <linux/ip.h> #include <linux/dccp.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> static u_int16_t dccp_port_rover; static void -dccp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +dccp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &dccp_port_rover); + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &dccp_port_rover); } static bool dccp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (const void *)(skb->data + iphdroff); struct dccp_hdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl * 4; - __be32 oldip, newip; __be16 *portptr, oldport, newport; int hdrsize = 8; /* DCCP connection tracking guarantees this much */ @@ -51,17 +50,12 @@ dccp_manip_pkt(struct sk_buff *skb, if (!skb_make_writable(skb, hdroff + hdrsize)) return false; - iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct dccp_hdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { - oldip = iph->saddr; - newip = tuple->src.u3.ip; newport = tuple->src.u.dccp.port; portptr = &hdr->dccph_sport; } else { - oldip = iph->daddr; - newip = tuple->dst.u3.ip; newport = tuple->dst.u.dccp.port; portptr = &hdr->dccph_dport; } @@ -72,30 +66,46 @@ dccp_manip_pkt(struct sk_buff *skb, if (hdrsize < sizeof(*hdr)) return true; - inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1); + l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, + tuple, maniptype); inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, 0); return true; } -static const struct nf_nat_protocol nf_nat_protocol_dccp = { - .protonum = IPPROTO_DCCP, +static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { + .l4proto = IPPROTO_DCCP, .manip_pkt = dccp_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = dccp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; static int __init nf_nat_proto_dccp_init(void) { - return nf_nat_protocol_register(&nf_nat_protocol_dccp); + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); +err1: + return err; } static void __exit nf_nat_proto_dccp_fini(void) { - nf_nat_protocol_unregister(&nf_nat_protocol_dccp); + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); + } module_init(nf_nat_proto_dccp_init); diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 3cce9b6c1c29..e64faa5ca893 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -8,53 +8,46 @@ #include <linux/types.h> #include <linux/init.h> -#include <linux/ip.h> #include <linux/sctp.h> #include <linux/module.h> #include <net/sctp/checksum.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l4proto.h> static u_int16_t nf_sctp_port_rover; static void -sctp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +sctp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &nf_sctp_port_rover); + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &nf_sctp_port_rover); } static bool sctp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct sk_buff *frag; sctp_sctphdr_t *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; __be32 crc32; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; - iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct sctphdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; + /* Get rid of src port */ hdr->source = tuple->src.u.sctp.port; } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; + /* Get rid of dst port */ hdr->dest = tuple->dst.u.sctp.port; } @@ -68,24 +61,38 @@ sctp_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_protocol nf_nat_protocol_sctp = { - .protonum = IPPROTO_SCTP, +static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { + .l4proto = IPPROTO_SCTP, .manip_pkt = sctp_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = sctp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; static int __init nf_nat_proto_sctp_init(void) { - return nf_nat_protocol_register(&nf_nat_protocol_sctp); + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); +err1: + return err; } static void __exit nf_nat_proto_sctp_exit(void) { - nf_nat_protocol_unregister(&nf_nat_protocol_sctp); + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); } module_init(nf_nat_proto_sctp_init); diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c index 9fb4b4e72bbf..83ec8a6e4c36 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -9,37 +9,36 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/export.h> -#include <linux/ip.h> #include <linux/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_rule.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_core.h> -static u_int16_t tcp_port_rover; +static u16 tcp_port_rover; static void -tcp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +tcp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &tcp_port_rover); } static bool tcp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct tcphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; __be16 *portptr, newport, oldport; int hdrsize = 8; /* TCP connection tracking guarantees this much */ @@ -52,19 +51,14 @@ tcp_manip_pkt(struct sk_buff *skb, if (!skb_make_writable(skb, hdroff + hdrsize)) return false; - iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct tcphdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; + /* Get rid of src port */ newport = tuple->src.u.tcp.port; portptr = &hdr->source; } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; + /* Get rid of dst port */ newport = tuple->dst.u.tcp.port; portptr = &hdr->dest; } @@ -75,17 +69,17 @@ tcp_manip_pkt(struct sk_buff *skb, if (hdrsize < sizeof(*hdr)) return true; - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); return true; } -const struct nf_nat_protocol nf_nat_protocol_tcp = { - .protonum = IPPROTO_TCP, +const struct nf_nat_l4proto nf_nat_l4proto_tcp = { + .l4proto = IPPROTO_TCP, .manip_pkt = tcp_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = tcp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index 9883336e628f..7df613fb34a2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -9,59 +9,53 @@ #include <linux/types.h> #include <linux/export.h> #include <linux/init.h> -#include <linux/ip.h> #include <linux/udp.h> #include <linux/netfilter.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_rule.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> -static u_int16_t udp_port_rover; +static u16 udp_port_rover; static void -udp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +udp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udp_port_rover); } static bool udp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct udphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; __be16 *portptr, newport; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; - - iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct udphdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; + /* Get rid of src port */ newport = tuple->src.u.udp.port; portptr = &hdr->source; } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; + /* Get rid of dst port */ newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + l3proto->csum_update(skb, iphdroff, &hdr->check, + tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); if (!hdr->check) @@ -71,12 +65,12 @@ udp_manip_pkt(struct sk_buff *skb, return true; } -const struct nf_nat_protocol nf_nat_protocol_udp = { - .protonum = IPPROTO_UDP, +const struct nf_nat_l4proto nf_nat_l4proto_udp = { + .l4proto = IPPROTO_UDP, .manip_pkt = udp_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = udp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index d24d10a7beb2..776a0d1317b1 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -9,59 +9,53 @@ #include <linux/types.h> #include <linux/init.h> -#include <linux/ip.h> #include <linux/udp.h> #include <linux/netfilter.h> #include <linux/module.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> -static u_int16_t udplite_port_rover; +static u16 udplite_port_rover; static void -udplite_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udplite_port_rover); + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udplite_port_rover); } static bool udplite_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct udphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; __be16 *portptr, newport; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; - iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct udphdr *)(skb->data + hdroff); if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; + /* Get rid of source port */ newport = tuple->src.u.udp.port; portptr = &hdr->source; } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; + /* Get rid of dst port */ newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); if (!hdr->check) hdr->check = CSUM_MANGLED_0; @@ -70,24 +64,38 @@ udplite_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_protocol nf_nat_protocol_udplite = { - .protonum = IPPROTO_UDPLITE, +static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { + .l4proto = IPPROTO_UDPLITE, .manip_pkt = udplite_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = udplite_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; static int __init nf_nat_proto_udplite_init(void) { - return nf_nat_protocol_register(&nf_nat_protocol_udplite); + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); +err1: + return err; } static void __exit nf_nat_proto_udplite_fini(void) { - nf_nat_protocol_unregister(&nf_nat_protocol_udplite); + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); } module_init(nf_nat_proto_udplite_init); diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c index e0afe8112b1c..6e494d584412 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/netfilter/nf_nat_proto_unknown.c @@ -15,8 +15,7 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_rule.h> -#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_l4proto.h> static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type manip_type, @@ -26,26 +25,29 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, return true; } -static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { /* Sorry: we can't help you; if it's not unique, we can't frob - anything. */ + * anything. + */ return; } static bool unknown_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { return true; } -const struct nf_nat_protocol nf_nat_unknown_protocol = { +const struct nf_nat_l4proto nf_nat_l4proto_unknown = { .manip_pkt = unknown_manip_pkt, .in_range = unknown_in_range, .unique_tuple = unknown_unique_tuple, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index ea4a23813d26..16303c752213 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -3,7 +3,7 @@ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> * based on RR's ip_nat_ftp.c and other modules. * (C) 2007 United Security Providers - * (C) 2007, 2008 Patrick McHardy <kaber@trash.net> + * (C) 2007, 2008, 2011, 2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -12,14 +12,12 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/ip.h> +#include <linux/inet.h> #include <linux/udp.h> #include <linux/tcp.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_sip.h> @@ -30,7 +28,8 @@ MODULE_DESCRIPTION("SIP NAT helper"); MODULE_ALIAS("ip_nat_sip"); -static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, +static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, const char *buffer, unsigned int buflen) @@ -41,20 +40,20 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, unsigned int baseoff; if (nf_ct_protonum(ct) == IPPROTO_TCP) { - th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); - baseoff = ip_hdrlen(skb) + th->doff * 4; + th = (struct tcphdr *)(skb->data + protoff); + baseoff = protoff + th->doff * 4; matchoff += dataoff - baseoff; if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, - matchoff, matchlen, + protoff, matchoff, matchlen, buffer, buflen, false)) return 0; } else { - baseoff = ip_hdrlen(skb) + sizeof(struct udphdr); + baseoff = protoff + sizeof(struct udphdr); matchoff += dataoff - baseoff; if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, - matchoff, matchlen, + protoff, matchoff, matchlen, buffer, buflen)) return 0; } @@ -65,7 +64,30 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, return 1; } -static int map_addr(struct sk_buff *skb, unsigned int dataoff, +static int sip_sprintf_addr(const struct nf_conn *ct, char *buffer, + const union nf_inet_addr *addr, bool delim) +{ + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + return sprintf(buffer, "%pI4", &addr->ip); + else { + if (delim) + return sprintf(buffer, "[%pI6c]", &addr->ip6); + else + return sprintf(buffer, "%pI6c", &addr->ip6); + } +} + +static int sip_sprintf_addr_port(const struct nf_conn *ct, char *buffer, + const union nf_inet_addr *addr, u16 port) +{ + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + return sprintf(buffer, "%pI4:%u", &addr->ip, port); + else + return sprintf(buffer, "[%pI6c]:%u", &addr->ip6, port); +} + +static int map_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, union nf_inet_addr *addr, __be16 port) @@ -73,32 +95,32 @@ static int map_addr(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; - __be32 newaddr; + union nf_inet_addr newaddr; __be16 newport; - if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip && + if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, addr) && ct->tuplehash[dir].tuple.src.u.udp.port == port) { - newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip; + newaddr = ct->tuplehash[!dir].tuple.dst.u3; newport = ct->tuplehash[!dir].tuple.dst.u.udp.port; - } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip && + } else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) && ct->tuplehash[dir].tuple.dst.u.udp.port == port) { - newaddr = ct->tuplehash[!dir].tuple.src.u3.ip; + newaddr = ct->tuplehash[!dir].tuple.src.u3; newport = ct->tuplehash[!dir].tuple.src.u.udp.port; } else return 1; - if (newaddr == addr->ip && newport == port) + if (nf_inet_addr_cmp(&newaddr, addr) && newport == port) return 1; - buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); - - return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen); + buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, ntohs(newport)); + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen); } -static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, +static int map_sip_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, enum sip_header_types type) { @@ -111,11 +133,12 @@ static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, &matchoff, &matchlen, &addr, &port) <= 0) return 1; - return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port); + return map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port); } -static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, +static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -132,8 +155,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_request(ct, *dptr, *datalen, &matchoff, &matchlen, &addr, &port) > 0 && - !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port)) + !map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) return NF_DROP; request = 1; } else @@ -148,37 +171,41 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { - unsigned int matchend, poff, plen, buflen, n; - char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + unsigned int olen, matchend, poff, plen, buflen, n; + char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; /* We're only interested in headers related to this * connection */ if (request) { - if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip || + if (!nf_inet_addr_cmp(&addr, + &ct->tuplehash[dir].tuple.src.u3) || port != ct->tuplehash[dir].tuple.src.u.udp.port) goto next; } else { - if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip || + if (!nf_inet_addr_cmp(&addr, + &ct->tuplehash[dir].tuple.dst.u3) || port != ct->tuplehash[dir].tuple.dst.u.udp.port) goto next; } - if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port)) + olen = *datalen; + if (!map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) return NF_DROP; - matchend = matchoff + matchlen; + matchend = matchoff + matchlen + *datalen - olen; /* The maddr= parameter (RFC 2361) specifies where to send * the reply. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "maddr=", &poff, &plen, - &addr) > 0 && - addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && - addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { - buflen = sprintf(buffer, "%pI4", - &ct->tuplehash[!dir].tuple.dst.u3.ip); - if (!mangle_packet(skb, dataoff, dptr, datalen, + &addr, true) > 0 && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3) && + !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3)) { + buflen = sip_sprintf_addr(ct, buffer, + &ct->tuplehash[!dir].tuple.dst.u3, + true); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; } @@ -187,12 +214,13 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, * from which the server received the request. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "received=", &poff, &plen, - &addr) > 0 && - addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && - addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - buflen = sprintf(buffer, "%pI4", - &ct->tuplehash[!dir].tuple.src.u3.ip); - if (!mangle_packet(skb, dataoff, dptr, datalen, + &addr, false) > 0 && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.dst.u3) && + !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.src.u3)) { + buflen = sip_sprintf_addr(ct, buffer, + &ct->tuplehash[!dir].tuple.src.u3, + false); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; } @@ -206,7 +234,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; buflen = sprintf(buffer, "%u", ntohs(p)); - if (!mangle_packet(skb, dataoff, dptr, datalen, + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; } @@ -220,19 +248,21 @@ next: SIP_HDR_CONTACT, &in_header, &matchoff, &matchlen, &addr, &port) > 0) { - if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, + if (!map_addr(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, &addr, port)) return NF_DROP; } - if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || - !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) + if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) || + !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; return NF_ACCEPT; } -static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) +static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff, + s16 off) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -241,37 +271,38 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) return; - th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); + th = (struct tcphdr *)(skb->data + protoff); nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); } /* Handles expected signalling connections and media streams */ -static void ip_nat_sip_expected(struct nf_conn *ct, +static void nf_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip = exp->saved_ip; + range.min_proto = range.max_proto = exp->saved_proto; + range.min_addr = range.max_addr = exp->saved_addr; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); /* Change src to where master sends to, but only if the connection * actually came from the same source. */ - if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == - ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { + if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, + &ct->master->tuplehash[exp->dir].tuple.src.u3)) { range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } } -static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, +static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *exp, unsigned int matchoff, @@ -280,16 +311,17 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - __be32 newip; + union nf_inet_addr newaddr; u_int16_t port; - char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; /* Connection will come from reply */ - if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) - newip = exp->tuple.dst.u3.ip; + if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3)) + newaddr = exp->tuple.dst.u3; else - newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + newaddr = ct->tuplehash[!dir].tuple.dst.u3; /* If the signalling port matches the connection's source port in the * original direction, try to use the destination port in the opposite @@ -300,11 +332,11 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, else port = ntohs(exp->tuple.dst.u.udp.port); - exp->saved_ip = exp->tuple.dst.u3.ip; - exp->tuple.dst.u3.ip = newip; + exp->saved_addr = exp->tuple.dst.u3; + exp->tuple.dst.u3 = newaddr; exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; exp->dir = !dir; - exp->expectfn = ip_nat_sip_expected; + exp->expectfn = nf_nat_sip_expected; for (; port != 0; port++) { int ret; @@ -322,10 +354,10 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, if (port == 0) return NF_DROP; - if (exp->tuple.dst.u3.ip != exp->saved_ip || + if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { - buflen = sprintf(buffer, "%pI4:%u", &newip, port); - if (!mangle_packet(skb, dataoff, dptr, datalen, + buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port); + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen)) goto err; } @@ -336,7 +368,8 @@ err: return NF_DROP; } -static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, +static int mangle_content_len(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -358,11 +391,12 @@ static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, return 0; buflen = sprintf(buffer, "%u", c_len); - return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen); + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen); } -static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, +static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, enum sdp_header_types type, @@ -376,29 +410,33 @@ static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, &matchoff, &matchlen) <= 0) return -ENOENT; - return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen) ? 0 : -EINVAL; + return mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL; } -static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, +static unsigned int nf_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, const union nf_inet_addr *addr) { - char buffer[sizeof("nnn.nnn.nnn.nnn")]; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + char buffer[INET6_ADDRSTRLEN]; unsigned int buflen; - buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term, - buffer, buflen)) + buflen = sip_sprintf_addr(ct, buffer, addr, false); + if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, + sdpoff, type, term, buffer, buflen)) return 0; - return mangle_content_len(skb, dataoff, dptr, datalen); + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, +static unsigned int nf_nat_sdp_port(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, @@ -408,30 +446,32 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, unsigned int buflen; buflen = sprintf(buffer, "%u", port); - if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, - buffer, buflen)) + if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen)) return 0; - return mangle_content_len(skb, dataoff, dptr, datalen); + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff, +static unsigned int nf_nat_sdp_session(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int sdpoff, const union nf_inet_addr *addr) { - char buffer[sizeof("nnn.nnn.nnn.nnn")]; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + char buffer[INET6_ADDRSTRLEN]; unsigned int buflen; /* Mangle session description owner and contact addresses */ - buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, - SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, - buffer, buflen)) + buflen = sip_sprintf_addr(ct, buffer, addr, false); + if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, + SDP_HDR_OWNER, SDP_HDR_MEDIA, buffer, buflen)) return 0; - switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, - SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, + switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff, + SDP_HDR_CONNECTION, SDP_HDR_MEDIA, buffer, buflen)) { case 0: /* @@ -447,12 +487,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff return 0; } - return mangle_content_len(skb, dataoff, dptr, datalen); + return mangle_content_len(skb, protoff, dataoff, dptr, datalen); } /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, +static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp, @@ -466,23 +507,23 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, u_int16_t port; /* Connection will come from reply */ - if (ct->tuplehash[dir].tuple.src.u3.ip == - ct->tuplehash[!dir].tuple.dst.u3.ip) - rtp_addr->ip = rtp_exp->tuple.dst.u3.ip; + if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3)) + *rtp_addr = rtp_exp->tuple.dst.u3; else - rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip; + *rtp_addr = ct->tuplehash[!dir].tuple.dst.u3; - rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip; - rtp_exp->tuple.dst.u3.ip = rtp_addr->ip; + rtp_exp->saved_addr = rtp_exp->tuple.dst.u3; + rtp_exp->tuple.dst.u3 = *rtp_addr; rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; rtp_exp->dir = !dir; - rtp_exp->expectfn = ip_nat_sip_expected; + rtp_exp->expectfn = nf_nat_sip_expected; - rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip; - rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip; + rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3; + rtcp_exp->tuple.dst.u3 = *rtp_addr; rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; rtcp_exp->dir = !dir; - rtcp_exp->expectfn = ip_nat_sip_expected; + rtcp_exp->expectfn = nf_nat_sip_expected; /* Try to get same pair of ports: if not, try to change them. */ for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); @@ -501,7 +542,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); port = 0; break; @@ -513,7 +557,7 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, /* Update media port. */ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && - !ip_nat_sdp_port(skb, dataoff, dptr, datalen, + !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, port)) goto err2; @@ -527,8 +571,8 @@ err1: } static struct nf_ct_helper_expectfn sip_nat = { - .name = "sip", - .expectfn = ip_nat_sip_expected, + .name = "sip", + .expectfn = nf_nat_sip_expected, }; static void __exit nf_nat_sip_fini(void) @@ -553,13 +597,13 @@ static int __init nf_nat_sip_init(void) BUG_ON(nf_nat_sdp_port_hook != NULL); BUG_ON(nf_nat_sdp_session_hook != NULL); BUG_ON(nf_nat_sdp_media_hook != NULL); - RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media); + RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media); nf_ct_helper_expectfn_register(&sip_nat); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c index 9dbb8d284f99..ccabbda71a3e 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/netfilter/nf_nat_tftp.c @@ -11,7 +11,6 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <linux/netfilter/nf_conntrack_tftp.h> MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ce60cf0f6c11..8d2cf9ec37a8 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -118,7 +118,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) * through nf_reinject(). */ static int __nf_queue(struct sk_buff *skb, - struct list_head *elem, + struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, @@ -155,7 +155,7 @@ static int __nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, - .elem = list_entry(elem, struct nf_hook_ops, list), + .elem = elem, .pf = pf, .hook = hook, .indev = indev, @@ -225,7 +225,7 @@ static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) #endif int nf_queue(struct sk_buff *skb, - struct list_head *elem, + struct nf_hook_ops *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, @@ -287,7 +287,7 @@ int nf_queue(struct sk_buff *skb, void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct sk_buff *skb = entry->skb; - struct list_head *elem = &entry->elem->list; + struct nf_hook_ops *elem = entry->elem; const struct nf_afinfo *afinfo; int err; @@ -297,7 +297,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) { - elem = elem->prev; + elem = list_entry(elem->list.prev, struct nf_hook_ops, list); verdict = NF_ACCEPT; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a26503342e71..ffb92c03a358 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -241,7 +241,7 @@ static int __net_init nfnetlink_net_init(struct net *net) #endif }; - nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, THIS_MODULE, &cfg); + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); if (!nfnl) return -ENOMEM; net->nfnl_stash = nfnl; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index b2e7310ca0b8..589d686f0b4c 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -79,11 +79,11 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, - be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); + be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES]))); } if (tb[NFACCT_PKTS]) { atomic64_set(&nfacct->pkts, - be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); + be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } atomic_set(&nfacct->refcnt, 1); list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); @@ -91,16 +91,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, } static int -nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; event |= NFNL_SUBSYS_ACCT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -150,7 +150,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur) < 0) { @@ -195,7 +195,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur); @@ -203,7 +203,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d6836193d479..945950a8b1f1 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -74,7 +74,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; - tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM])); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); return 0; @@ -85,6 +85,9 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { const struct nf_conn_help *help = nfct_help(ct); + if (attr == NULL) + return -EINVAL; + if (help->helper->data_len == 0) return -EINVAL; @@ -395,16 +398,16 @@ nla_put_failure: } static int -nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event |= NFNL_SUBSYS_CTHELPER << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -468,7 +471,7 @@ restart: cb->args[1] = 0; } if (nfnl_cthelper_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur) < 0) { @@ -538,7 +541,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur); @@ -547,7 +550,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index cdecbc8fe965..8847b4d8be06 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -155,16 +155,16 @@ err_proto_put: } static int -ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -222,7 +222,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { @@ -268,7 +268,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, break; } - ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid, + ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur); @@ -276,7 +276,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 169ab59ed9d4..9f199f2e31fa 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -55,7 +55,8 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; - int peer_pid; /* PID of the peer process */ + struct user_namespace *peer_user_ns; /* User namespace of the peer process */ + int peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ @@ -132,7 +133,7 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int pid) +instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; int err; @@ -162,7 +163,8 @@ instance_create(u_int16_t group_num, int pid) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); - inst->peer_pid = pid; + inst->peer_user_ns = user_ns; + inst->peer_portid = portid; inst->group_num = group_num; inst->qthreshold = NFULNL_QTHRESH_DEFAULT; @@ -334,7 +336,7 @@ __nfulnl_send(struct nfulnl_instance *inst) if (!nlh) goto out; } - status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, MSG_DONTWAIT); inst->qlen = 0; @@ -381,6 +383,7 @@ __build_packet_message(struct nfulnl_instance *inst, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; + struct sock *sk; nlh = nlmsg_put(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, @@ -480,7 +483,7 @@ __build_packet_message(struct nfulnl_instance *inst, } if (indev && skb_mac_header_was_set(skb)) { - if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || + if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, htons(skb->dev->hard_header_len)) || nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, @@ -499,18 +502,21 @@ __build_packet_message(struct nfulnl_instance *inst, } /* UID */ - if (skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) { - struct file *file = skb->sk->sk_socket->file; - __be32 uid = htonl(file->f_cred->fsuid); - __be32 gid = htonl(file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); + sk = skb->sk; + if (sk && sk->sk_state != TCP_TIME_WAIT) { + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + struct file *file = sk->sk_socket->file; + const struct cred *cred = file->f_cred; + struct user_namespace *user_ns = inst->peer_user_ns; + __be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid)); + __be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid)); + read_unlock_bh(&sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) goto nla_put_failure; } else - read_unlock_bh(&skb->sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /* local sequence number */ @@ -698,7 +704,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -707,7 +713,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -769,7 +775,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_lookup_get(group_num); - if (inst && inst->peer_pid != NETLINK_CB(skb).pid) { + if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; } @@ -783,7 +789,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_create(group_num, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid, + sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); goto out; @@ -941,7 +948,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", inst->group_num, - inst->peer_pid, inst->qlen, + inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, inst->flushtimeout, atomic_read(&inst->use)); } @@ -996,8 +1003,10 @@ static int __init nfnetlink_log_init(void) #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_log", 0440, - proc_net_netfilter, &nful_file_ops)) + proc_net_netfilter, &nful_file_ops)) { + status = -ENOMEM; goto cleanup_logger; + } #endif return status; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index c0496a55ad0c..e12d44e75b21 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -44,7 +44,7 @@ struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; - int peer_pid; + int peer_portid; unsigned int queue_maxlen; unsigned int copy_range; unsigned int queue_dropped; @@ -92,7 +92,7 @@ instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int pid) +instance_create(u_int16_t queue_num, int portid) { struct nfqnl_instance *inst; unsigned int h; @@ -111,7 +111,7 @@ instance_create(u_int16_t queue_num, int pid) } inst->queue_num = queue_num; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; @@ -225,7 +225,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, { sk_buff_data_t old_tail; size_t size; - size_t data_len = 0; + size_t data_len = 0, cap_len = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -247,7 +247,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, #endif + nla_total_size(sizeof(u_int32_t)) /* mark */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) + + nla_total_size(sizeof(u_int32_t))); /* cap_len */ outdev = entry->outdev; @@ -266,6 +267,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, data_len = entskb->len; size += nla_total_size(data_len); + cap_len = entskb->len; break; } @@ -402,12 +404,14 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; + if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + goto nla_put_failure; + nlh->nlmsg_len = skb->tail - old_tail; return skb; nla_put_failure: - if (skb) - kfree_skb(skb); + kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); return NULL; } @@ -440,7 +444,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } spin_lock_bh(&queue->lock); - if (!queue->peer_pid) { + if (!queue->peer_portid) { err = -EINVAL; goto err_out_free_nskb; } @@ -459,7 +463,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; @@ -527,9 +531,13 @@ nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* we're using struct nlattr which has 16bit nla_len */ - if (range > 0xffff) - queue->copy_range = 0xffff; + /* We're using struct nlattr which has 16bit nla_len. Note that + * nla_len includes the header length. Thus, the maximum packet + * length that we support is 65531 bytes. We send truncated + * packets if the specified length is larger than that. + */ + if (range > 0xffff - NLA_HDRLEN) + queue->copy_range = 0xffff - NLA_HDRLEN; else queue->copy_range = range; break; @@ -616,7 +624,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -625,7 +633,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((n->net == &init_net) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -650,7 +658,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) { struct nfqnl_instance *queue; @@ -658,7 +666,7 @@ static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) if (!queue) return ERR_PTR(-ENODEV); - if (queue->peer_pid != nlpid) + if (queue->peer_portid != nlportid) return ERR_PTR(-EPERM); return queue; @@ -698,7 +706,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -749,7 +757,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, queue = instance_lookup(queue_num); if (!queue) - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -832,7 +840,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); queue = instance_lookup(queue_num); - if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto err_out_unlock; } @@ -844,7 +852,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EBUSY; goto err_out_unlock; } - queue = instance_create(queue_num, NETLINK_CB(skb).pid); + queue = instance_create(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) { ret = PTR_ERR(queue); goto err_out_unlock; @@ -1016,7 +1024,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", inst->queue_num, - inst->peer_pid, inst->queue_total, + inst->peer_portid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, inst->id_sequence, 1); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 116018560c60..16c712563860 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -72,14 +72,44 @@ static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) return 0; } +static int +xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, + const struct xt_tgchk_param *par) +{ + struct nf_conntrack_helper *helper; + struct nf_conn_help *help; + u8 proto; + + proto = xt_ct_find_proto(par); + if (!proto) { + pr_info("You must specify a L4 protocol, and not use " + "inversions on it.\n"); + return -ENOENT; + } + + helper = nf_conntrack_helper_try_module_get(helper_name, par->family, + proto); + if (helper == NULL) { + pr_info("No such helper \"%s\"\n", helper_name); + return -ENOENT; + } + + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) { + module_put(helper->me); + return -ENOMEM; + } + + help->helper = helper; + return 0; +} + static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) { struct xt_ct_target_info *info = par->targinfo; struct nf_conntrack_tuple t; - struct nf_conn_help *help; struct nf_conn *ct; - int ret = 0; - u8 proto; + int ret; if (info->flags & ~XT_CT_NOTRACK) return -EINVAL; @@ -112,31 +142,9 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { - struct nf_conntrack_helper *helper; - - ret = -ENOENT; - proto = xt_ct_find_proto(par); - if (!proto) { - pr_info("You must specify a L4 protocol, " - "and not use inversions on it.\n"); - goto err3; - } - - ret = -ENOENT; - helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (helper == NULL) { - pr_info("No such helper \"%s\"\n", info->helper); - goto err3; - } - - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); - if (help == NULL) + ret = xt_ct_set_helper(ct, info->helper, par); + if (ret < 0) goto err3; - - help->helper = helper; } __set_bit(IPS_TEMPLATE_BIT, &ct->status); @@ -164,17 +172,77 @@ static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout) } #endif +static int +xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, + const char *timeout_name) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + typeof(nf_ct_timeout_find_get_hook) timeout_find_get; + struct ctnl_timeout *timeout; + struct nf_conn_timeout *timeout_ext; + const struct ipt_entry *e = par->entryinfo; + struct nf_conntrack_l4proto *l4proto; + int ret = 0; + + rcu_read_lock(); + timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); + if (timeout_find_get == NULL) { + ret = -ENOENT; + pr_info("Timeout policy base is empty\n"); + goto out; + } + + if (e->ip.invflags & IPT_INV_PROTO) { + ret = -EINVAL; + pr_info("You cannot use inversion on L4 protocol\n"); + goto out; + } + + timeout = timeout_find_get(timeout_name); + if (timeout == NULL) { + ret = -ENOENT; + pr_info("No such timeout policy \"%s\"\n", timeout_name); + goto out; + } + + if (timeout->l3num != par->family) { + ret = -EINVAL; + pr_info("Timeout policy `%s' can only be used by L3 protocol " + "number %d\n", timeout_name, timeout->l3num); + goto err_put_timeout; + } + /* Make sure the timeout policy matches any existing protocol tracker, + * otherwise default to generic. + */ + l4proto = __nf_ct_l4proto_find(par->family, e->ip.proto); + if (timeout->l4proto->l4proto != l4proto->l4proto) { + ret = -EINVAL; + pr_info("Timeout policy `%s' can only be used by L4 protocol " + "number %d\n", + timeout_name, timeout->l4proto->l4proto); + goto err_put_timeout; + } + timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); + if (timeout_ext == NULL) + ret = -ENOMEM; + +err_put_timeout: + __xt_ct_tg_timeout_put(timeout); +out: + rcu_read_unlock(); + return ret; +#else + return -EOPNOTSUPP; +#endif +} + static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) { struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conntrack_tuple t; - struct nf_conn_help *help; struct nf_conn *ct; - int ret = 0; - u8 proto; -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - struct ctnl_timeout *timeout; -#endif + int ret; + if (info->flags & ~XT_CT_NOTRACK) return -EINVAL; @@ -206,93 +274,16 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { - struct nf_conntrack_helper *helper; - - ret = -ENOENT; - proto = xt_ct_find_proto(par); - if (!proto) { - pr_info("You must specify a L4 protocol, " - "and not use inversions on it.\n"); - goto err3; - } - - ret = -ENOENT; - helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (helper == NULL) { - pr_info("No such helper \"%s\"\n", info->helper); - goto err3; - } - - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); - if (help == NULL) + ret = xt_ct_set_helper(ct, info->helper, par); + if (ret < 0) goto err3; - - help->helper = helper; } -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT if (info->timeout[0]) { - typeof(nf_ct_timeout_find_get_hook) timeout_find_get; - struct nf_conn_timeout *timeout_ext; - - rcu_read_lock(); - timeout_find_get = - rcu_dereference(nf_ct_timeout_find_get_hook); - - if (timeout_find_get) { - const struct ipt_entry *e = par->entryinfo; - struct nf_conntrack_l4proto *l4proto; - - if (e->ip.invflags & IPT_INV_PROTO) { - ret = -EINVAL; - pr_info("You cannot use inversion on " - "L4 protocol\n"); - goto err4; - } - timeout = timeout_find_get(info->timeout); - if (timeout == NULL) { - ret = -ENOENT; - pr_info("No such timeout policy \"%s\"\n", - info->timeout); - goto err4; - } - if (timeout->l3num != par->family) { - ret = -EINVAL; - pr_info("Timeout policy `%s' can only be " - "used by L3 protocol number %d\n", - info->timeout, timeout->l3num); - goto err5; - } - /* Make sure the timeout policy matches any existing - * protocol tracker, otherwise default to generic. - */ - l4proto = __nf_ct_l4proto_find(par->family, - e->ip.proto); - if (timeout->l4proto->l4proto != l4proto->l4proto) { - ret = -EINVAL; - pr_info("Timeout policy `%s' can only be " - "used by L4 protocol number %d\n", - info->timeout, - timeout->l4proto->l4proto); - goto err5; - } - timeout_ext = nf_ct_timeout_ext_add(ct, timeout, - GFP_ATOMIC); - if (timeout_ext == NULL) { - ret = -ENOMEM; - goto err5; - } - } else { - ret = -ENOENT; - pr_info("Timeout policy base is empty\n"); - goto err4; - } - rcu_read_unlock(); + ret = xt_ct_set_timeout(ct, par, info->timeout); + if (ret < 0) + goto err3; } -#endif __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); @@ -300,12 +291,6 @@ out: info->ct = ct; return 0; -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT -err5: - __xt_ct_tg_timeout_put(timeout); -err4: - rcu_read_unlock(); -#endif err3: nf_conntrack_free(ct); err2: @@ -330,15 +315,30 @@ static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) nf_ct_put(info->ct); } -static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +static void xt_ct_destroy_timeout(struct nf_conn *ct) { - struct xt_ct_target_info_v1 *info = par->targinfo; - struct nf_conn *ct = info->ct; - struct nf_conn_help *help; #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; typeof(nf_ct_timeout_put_hook) timeout_put; + + rcu_read_lock(); + timeout_put = rcu_dereference(nf_ct_timeout_put_hook); + + if (timeout_put) { + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) + timeout_put(timeout_ext->timeout); + } + rcu_read_unlock(); #endif +} + +static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + struct nf_conn *ct = info->ct; + struct nf_conn_help *help; + if (!nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) @@ -346,17 +346,7 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) nf_ct_l3proto_module_put(par->family); -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - rcu_read_lock(); - timeout_put = rcu_dereference(nf_ct_timeout_put_hook); - - if (timeout_put) { - timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) - timeout_put(timeout_ext->timeout); - } - rcu_read_unlock(); -#endif + xt_ct_destroy_timeout(ct); } nf_ct_put(info->ct); } diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index ff5f75fddb15..fa40096940a1 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -145,6 +145,21 @@ static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb, return 0; } +static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk) +{ + if (!sk || sk->sk_state == TCP_TIME_WAIT) + return; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + const struct cred *cred = sk->sk_socket->file->f_cred; + sb_add(m, "UID=%u GID=%u ", + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } + read_unlock_bh(&sk->sk_callback_lock); +} + /* One level of recursion won't kill us */ static void dump_ipv4_packet(struct sbuff *m, const struct nf_loginfo *info, @@ -361,14 +376,8 @@ static void dump_ipv4_packet(struct sbuff *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } + if ((logflags & XT_LOG_UID) && !iphoff) + dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!iphoff && skb->mark) @@ -436,8 +445,8 @@ log_packet_common(struct sbuff *m, const struct nf_loginfo *loginfo, const char *prefix) { - sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, + sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", + '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER @@ -717,14 +726,8 @@ static void dump_ipv6_packet(struct sbuff *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } + if ((logflags & XT_LOG_UID) && recurse) + dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!recurse && skb->mark) diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c new file mode 100644 index 000000000000..b253e07cb1c5 --- /dev/null +++ b/net/netfilter/xt_NETMAP.c @@ -0,0 +1,165 @@ +/* + * (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk> + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/ipv6.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_nat.h> + +static unsigned int +netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + union nf_inet_addr new_addr, netmask; + unsigned int i; + + ct = nf_ct_get(skb, &ctinfo); + for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) + netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ + range->max_addr.ip6[i]); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_addr.in6 = ipv6_hdr(skb)->daddr; + else + new_addr.in6 = ipv6_hdr(skb)->saddr; + + for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { + new_addr.ip6[i] &= ~netmask.ip6[i]; + new_addr.ip6[i] |= range->min_addr.ip6[i] & + netmask.ip6[i]; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = new_addr; + newrange.max_addr = new_addr; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) + return -EINVAL; + return 0; +} + +static unsigned int +netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 new_ip, netmask; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range newrange; + + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT || + par->hooknum == NF_INET_LOCAL_IN); + ct = nf_ct_get(skb, &ctinfo); + + netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_ip = ip_hdr(skb)->daddr & ~netmask; + else + new_ip = ip_hdr(skb)->saddr & ~netmask; + new_ip |= mr->range[0].min_ip & netmask; + + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = new_ip; + newrange.max_addr.ip = new_ip; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg4_check(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("bad rangesize %u.\n", mr->rangesize); + return -EINVAL; + } + return 0; +} + +static struct xt_target netmap_tg_reg[] __read_mostly = { + { + .name = "NETMAP", + .family = NFPROTO_IPV6, + .revision = 0, + .target = netmap_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg6_checkentry, + .me = THIS_MODULE, + }, + { + .name = "NETMAP", + .family = NFPROTO_IPV4, + .revision = 0, + .target = netmap_tg4, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg4_check, + .me = THIS_MODULE, + }, +}; + +static int __init netmap_tg_init(void) +{ + return xt_register_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); +} + +static void netmap_tg_exit(void) +{ + xt_unregister_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); +} + +module_init(netmap_tg_init); +module_exit(netmap_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of subnets"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS("ip6t_NETMAP"); +MODULE_ALIAS("ipt_NETMAP"); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 7babe7d68716..817f9e9f2b16 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -43,7 +43,7 @@ static u32 hash_v4(const struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); /* packets in either direction go into same queue */ - if (iph->saddr < iph->daddr) + if ((__force u32)iph->saddr < (__force u32)iph->daddr) return jhash_3words((__force u32)iph->saddr, (__force u32)iph->daddr, iph->protocol, jhash_initval); @@ -57,7 +57,8 @@ static u32 hash_v6(const struct sk_buff *skb) const struct ipv6hdr *ip6h = ipv6_hdr(skb); u32 a, b, c; - if (ip6h->saddr.s6_addr32[3] < ip6h->daddr.s6_addr32[3]) { + if ((__force u32)ip6h->saddr.s6_addr32[3] < + (__force u32)ip6h->daddr.s6_addr32[3]) { a = (__force u32) ip6h->saddr.s6_addr32[3]; b = (__force u32) ip6h->daddr.s6_addr32[3]; } else { @@ -65,7 +66,8 @@ static u32 hash_v6(const struct sk_buff *skb) a = (__force u32) ip6h->daddr.s6_addr32[3]; } - if (ip6h->saddr.s6_addr32[1] < ip6h->daddr.s6_addr32[1]) + if ((__force u32)ip6h->saddr.s6_addr32[1] < + (__force u32)ip6h->daddr.s6_addr32[1]) c = (__force u32) ip6h->saddr.s6_addr32[1]; else c = (__force u32) ip6h->daddr.s6_addr32[1]; diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c deleted file mode 100644 index 9d782181b6c8..000000000000 --- a/net/netfilter/xt_NOTRACK.c +++ /dev/null @@ -1,53 +0,0 @@ -/* This is a module which is used for setting up fake conntracks - * on packets so that they are not seen by the conntrack/NAT code. - */ -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter/x_tables.h> -#include <net/netfilter/nf_conntrack.h> - -MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_NOTRACK"); -MODULE_ALIAS("ip6t_NOTRACK"); - -static unsigned int -notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) - return XT_CONTINUE; - - /* Attach fake conntrack entry. - If there is a real ct entry correspondig to this packet, - it'll hang aroun till timing out. We don't deal with it - for performance reasons. JK */ - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); - - return XT_CONTINUE; -} - -static struct xt_target notrack_tg_reg __read_mostly = { - .name = "NOTRACK", - .revision = 0, - .family = NFPROTO_UNSPEC, - .target = notrack_tg, - .table = "raw", - .me = THIS_MODULE, -}; - -static int __init notrack_tg_init(void) -{ - return xt_register_target(¬rack_tg_reg); -} - -static void __exit notrack_tg_exit(void) -{ - xt_unregister_target(¬rack_tg_reg); -} - -module_init(notrack_tg_init); -module_exit(notrack_tg_exit); diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c new file mode 100644 index 000000000000..22a10309297c --- /dev/null +++ b/net/netfilter/xt_REDIRECT.c @@ -0,0 +1,190 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include <linux/if.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/netfilter.h> +#include <linux/types.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/x_tables.h> +#include <net/addrconf.h> +#include <net/checksum.h> +#include <net/protocol.h> +#include <net/netfilter/nf_nat.h> + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +static unsigned int +redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = loopback_addr; + else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +/* FIXME: Take multiple ranges --RR */ +static int redirect_tg4_check(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("bad rangesize %u.\n", mr->rangesize); + return -EINVAL; + } + return 0; +} + +static unsigned int +redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 newdst; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range newrange; + + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + /* Local packets: make them go to loopback */ + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = htonl(0x7F000001); + else { + struct in_device *indev; + struct in_ifaddr *ifa; + + newdst = 0; + + rcu_read_lock(); + indev = __in_dev_get_rcu(skb->dev); + if (indev && (ifa = indev->ifa_list)) + newdst = ifa->ifa_local; + rcu_read_unlock(); + + if (!newdst) + return NF_DROP; + } + + /* Transfer from original range. */ + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newdst; + newrange.max_addr.ip = newdst; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static struct xt_target redirect_tg_reg[] __read_mostly = { + { + .name = "REDIRECT", + .family = NFPROTO_IPV6, + .revision = 0, + .table = "nat", + .checkentry = redirect_tg6_checkentry, + .target = redirect_tg6, + .targetsize = sizeof(struct nf_nat_range), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "REDIRECT", + .family = NFPROTO_IPV4, + .revision = 0, + .table = "nat", + .target = redirect_tg4, + .checkentry = redirect_tg4_check, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, +}; + +static int __init redirect_tg_init(void) +{ + return xt_register_targets(redirect_tg_reg, + ARRAY_SIZE(redirect_tg_reg)); +} + +static void __exit redirect_tg_exit(void) +{ + xt_unregister_targets(redirect_tg_reg, ARRAY_SIZE(redirect_tg_reg)); +} + +module_init(redirect_tg_init); +module_exit(redirect_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); +MODULE_ALIAS("ip6t_REDIRECT"); +MODULE_ALIAS("ipt_REDIRECT"); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 5c22ce8ab309..a4c1e4528cac 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* For SMP, we only want to use one set of state. */ r->master = priv; + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - priv->prev = jiffies; - priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c new file mode 100644 index 000000000000..81aafa8e4fef --- /dev/null +++ b/net/netfilter/xt_nat.c @@ -0,0 +1,170 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_nat_core.h> + +static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (mr->rangesize != 1) { + pr_info("%s: multiple ranges no longer supported\n", + par->target->name); + return -EINVAL; + } + return 0; +} + +static void xt_nat_convert_range(struct nf_nat_range *dst, + const struct nf_nat_ipv4_range *src) +{ + memset(&dst->min_addr, 0, sizeof(dst->min_addr)); + memset(&dst->max_addr, 0, sizeof(dst->max_addr)); + + dst->flags = src->flags; + dst->min_addr.ip = src->min_ip; + dst->max_addr.ip = src->max_ip; + dst->min_proto = src->min; + dst->max_proto = src->max; +} + +static unsigned int +xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range range; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + xt_nat_convert_range(&range, &mr->range[0]); + return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); +} + +static unsigned int +xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range range; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + xt_nat_convert_range(&range, &mr->range[0]); + return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} + +static unsigned int +xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC); +} + +static unsigned int +xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST); +} + +static struct xt_target xt_nat_target_reg[] __read_mostly = { + { + .name = "SNAT", + .revision = 0, + .checkentry = xt_nat_checkentry_v0, + .target = xt_snat_target_v0, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .family = NFPROTO_IPV4, + .table = "nat", + .hooks = (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "DNAT", + .revision = 0, + .checkentry = xt_nat_checkentry_v0, + .target = xt_dnat_target_v0, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .family = NFPROTO_IPV4, + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, + { + .name = "SNAT", + .revision = 1, + .target = xt_snat_target_v1, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "DNAT", + .revision = 1, + .target = xt_dnat_target_v1, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +}; + +static int __init xt_nat_init(void) +{ + return xt_register_targets(xt_nat_target_reg, + ARRAY_SIZE(xt_nat_target_reg)); +} + +static void __exit xt_nat_exit(void) +{ + xt_unregister_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg)); +} + +module_init(xt_nat_init); +module_exit(xt_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS("ipt_SNAT"); +MODULE_ALIAS("ipt_DNAT"); +MODULE_ALIAS("ip6t_SNAT"); +MODULE_ALIAS("ip6t_DNAT"); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 846f895cb656..a5e673d32bda 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -269,7 +269,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) mss <<= 8; mss |= optp[2]; - mss = ntohs(mss); + mss = ntohs((__force __be16)mss); break; case OSFOPT_TS: loop_cont = 1; diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 772d7389b337..ca2e577ed8ac 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -17,6 +17,17 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_owner.h> +static int owner_check(const struct xt_mtchk_param *par) +{ + struct xt_owner_match_info *info = par->matchinfo; + + /* For now only allow adding matches from the initial user namespace */ + if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) && + (current_user_ns() != &init_user_ns)) + return -EINVAL; + return 0; +} + static bool owner_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -37,17 +48,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; - if (info->match & XT_OWNER_UID) - if ((filp->f_cred->fsuid >= info->uid_min && - filp->f_cred->fsuid <= info->uid_max) ^ + if (info->match & XT_OWNER_UID) { + kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); + kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); + if ((uid_gte(filp->f_cred->fsuid, uid_min) && + uid_lte(filp->f_cred->fsuid, uid_max)) ^ !(info->invert & XT_OWNER_UID)) return false; + } - if (info->match & XT_OWNER_GID) - if ((filp->f_cred->fsgid >= info->gid_min && - filp->f_cred->fsgid <= info->gid_max) ^ + if (info->match & XT_OWNER_GID) { + kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); + kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); + if ((gid_gte(filp->f_cred->fsgid, gid_min) && + gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_OWNER_GID)) return false; + } return true; } @@ -56,6 +73,7 @@ static struct xt_match owner_mt_reg __read_mostly = { .name = "owner", .revision = 1, .family = NFPROTO_UNSPEC, + .checkentry = owner_check, .match = owner_mt, .matchsize = sizeof(struct xt_owner_match_info), .hooks = (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index ae2ad1eec8d0..4635c9b00459 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -317,6 +317,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, struct recent_table *t; #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; + kuid_t uid; + kgid_t gid; #endif unsigned int i; int ret = -EINVAL; @@ -372,6 +374,13 @@ static int recent_mt_check(const struct xt_mtchk_param *par, for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS + uid = make_kuid(&init_user_ns, ip_list_uid); + gid = make_kgid(&init_user_ns, ip_list_gid); + if (!uid_valid(uid) || !gid_valid(gid)) { + kfree(t); + ret = -EINVAL; + goto out; + } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_fops, t); if (pde == NULL) { @@ -379,8 +388,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, ret = -ENOMEM; goto out; } - pde->uid = ip_list_uid; - pde->gid = ip_list_gid; + pde->uid = uid; + pde->gid = gid; #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &recent_net->tables); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index c6f7db720d84..865a9e54f3ad 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -356,6 +356,27 @@ static struct xt_match set_matches[] __read_mostly = { .destroy = set_match_v1_destroy, .me = THIS_MODULE }, + /* --return-nomatch flag support */ + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 2, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 2, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, + .me = THIS_MODULE + }, }; static struct xt_target set_targets[] __read_mostly = { @@ -389,6 +410,7 @@ static struct xt_target set_targets[] __read_mostly = { .destroy = set_target_v1_destroy, .me = THIS_MODULE }, + /* --timeout and --exist flags support */ { .name = "SET", .revision = 2, diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9ea482d08cf7..63b2bdb59e95 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -108,9 +108,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk; - __be32 daddr, saddr; - __be16 dport, sport; - u8 protocol; + __be32 uninitialized_var(daddr), uninitialized_var(saddr); + __be16 uninitialized_var(dport), uninitialized_var(sport); + u8 uninitialized_var(protocol); #ifdef XT_SOCKET_HAVE_CONNTRACK struct nf_conn const *ct; enum ip_conntrack_info ctinfo; @@ -261,9 +261,9 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk; - struct in6_addr *daddr, *saddr; - __be16 dport, sport; - int thoff = 0, tproto; + struct in6_addr *daddr = NULL, *saddr = NULL; + __be16 uninitialized_var(dport), uninitialized_var(sport); + int thoff = 0, uninitialized_var(tproto); const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index c48975ff8ea2..0ae55a36f492 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -42,6 +42,7 @@ static const u_int16_t days_since_leapyear[] = { */ enum { DSE_FIRST = 2039, + SECONDS_PER_DAY = 86400, }; static const u_int16_t days_since_epoch[] = { /* 2039 - 2030 */ @@ -78,7 +79,7 @@ static inline unsigned int localtime_1(struct xtm *r, time_t time) unsigned int v, w; /* Each day has 86400s, so finding the hour/minute is actually easy. */ - v = time % 86400; + v = time % SECONDS_PER_DAY; r->second = v % 60; w = v / 60; r->minute = w % 60; @@ -199,6 +200,18 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) if (packet_time < info->daytime_start && packet_time > info->daytime_stop) return false; + + /** if user asked to ignore 'next day', then e.g. + * '1 PM Wed, August 1st' should be treated + * like 'Tue 1 PM July 31st'. + * + * This also causes + * 'Monday, "23:00 to 01:00", to match for 2 hours, starting + * Monday 23:00 to Tuesday 01:00. + */ + if ((info->flags & XT_TIME_CONTIGUOUS) && + packet_time <= info->daytime_stop) + stamp -= SECONDS_PER_DAY; } localtime_2(¤t_time, stamp); @@ -227,6 +240,15 @@ static int time_mt_check(const struct xt_mtchk_param *par) return -EDOM; } + if (info->flags & ~XT_TIME_ALL_FLAGS) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS); + return -EINVAL; + } + + if ((info->flags & XT_TIME_CONTIGUOUS) && + info->daytime_start < info->daytime_stop) + return -EINVAL; + return 0; } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 6bf878335d94..c15042f987bd 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -627,7 +627,7 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_cipsov4_gnl_family, NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL); if (data == NULL) diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 4809e2e48b02..c5384ffc6146 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -448,7 +448,7 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) struct netlbl_domhsh_walk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_LISTALL); if (data == NULL) @@ -613,7 +613,7 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, int ret_val = -ENOMEM; void *data; - data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_PROTOCOLS); if (data == NULL) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..847d495cd4de 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1096,7 +1096,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, char *secctx; u32 secctx_len; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, NLM_F_MULTI, cmd); if (data == NULL) @@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ security_task_getsecid(current, &audit_info.secid); - audit_info.loginuid = 0; + audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 9fae63f10298..9650c4ad5f88 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -109,7 +109,7 @@ struct audit_buffer *netlbl_audit_start_common(int type, return NULL; audit_log_format(audit_buf, "netlabel: auid=%u ses=%u", - audit_info->loginuid, + from_kuid(&init_user_ns, audit_info->loginuid), audit_info->sessionid); if (audit_info->secid != 0 && diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5463969da45b..0f2e3ad69c47 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,8 +67,8 @@ struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; - u32 pid; - u32 dst_pid; + u32 portid; + u32 dst_portid; u32 dst_group; u32 flags; u32 subscriptions; @@ -104,7 +104,7 @@ static inline int netlink_is_kernel(struct sock *sk) return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct nl_pid_hash { +struct nl_portid_hash { struct hlist_head *table; unsigned long rehash_time; @@ -118,10 +118,10 @@ struct nl_pid_hash { }; struct netlink_table { - struct nl_pid_hash hash; + struct nl_portid_hash hash; struct hlist_head mc_list; struct listeners __rcu *listeners; - unsigned int nl_nonroot; + unsigned int flags; unsigned int groups; struct mutex *cb_mutex; struct module *module; @@ -145,9 +145,9 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } -static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) +static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) { - return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; + return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } static void netlink_destroy_callback(struct netlink_callback *cb) @@ -239,17 +239,17 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) +static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_pid_hash *hash = &nl_table[protocol].hash; + struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; struct hlist_node *node; read_lock(&nl_table_lock); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(sk, node, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { + if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -260,7 +260,7 @@ found: return sk; } -static struct hlist_head *nl_pid_hash_zalloc(size_t size) +static struct hlist_head *nl_portid_hash_zalloc(size_t size) { if (size <= PAGE_SIZE) return kzalloc(size, GFP_ATOMIC); @@ -270,7 +270,7 @@ static struct hlist_head *nl_pid_hash_zalloc(size_t size) get_order(size)); } -static void nl_pid_hash_free(struct hlist_head *table, size_t size) +static void nl_portid_hash_free(struct hlist_head *table, size_t size) { if (size <= PAGE_SIZE) kfree(table); @@ -278,7 +278,7 @@ static void nl_pid_hash_free(struct hlist_head *table, size_t size) free_pages((unsigned long)table, get_order(size)); } -static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) +static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) { unsigned int omask, mask, shift; size_t osize, size; @@ -296,7 +296,7 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) size *= 2; } - table = nl_pid_hash_zalloc(size); + table = nl_portid_hash_zalloc(size); if (!table) return 0; @@ -311,23 +311,23 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) struct hlist_node *node, *tmp; sk_for_each_safe(sk, node, tmp, &otable[i]) - __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); + __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } - nl_pid_hash_free(otable, osize); + nl_portid_hash_free(otable, osize); hash->rehash_time = jiffies + 10 * 60 * HZ; return 1; } -static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) +static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) { int avg = hash->entries >> hash->shift; - if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) + if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) return 1; if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { - nl_pid_hash_rehash(hash, 0); + nl_portid_hash_rehash(hash, 0); return 1; } @@ -356,9 +356,9 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -366,10 +366,10 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) int len; netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, node, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) + if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } @@ -377,17 +377,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) goto err; err = -EBUSY; - if (nlk_sk(sk)->pid) + if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) goto err; - if (len && nl_pid_hash_dilute(hash, len)) - head = nl_pid_hashfn(hash, pid); + if (len && nl_portid_hash_dilute(hash, len)) + head = nl_portid_hashfn(hash, portid); hash->entries++; - nlk_sk(sk)->pid = pid; + nlk_sk(sk)->portid = portid; sk_add_node(sk, head); err = 0; @@ -518,11 +518,11 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid) { + if (nlk->portid) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, - .pid = nlk->pid, + .portid = nlk->portid, }; atomic_notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); @@ -536,6 +536,8 @@ static int netlink_release(struct socket *sock) if (--nl_table[sk->sk_protocol].registered == 0) { kfree(nl_table[sk->sk_protocol].listeners); nl_table[sk->sk_protocol].module = NULL; + nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } } else if (nlk->subscriptions) { @@ -557,24 +559,24 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; struct hlist_node *node; - s32 pid = task_tgid_vnr(current); + s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; retry: cond_resched(); netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(osk, node, head) { if (!net_eq(sock_net(osk), net)) continue; - if (nlk_sk(osk)->pid == pid) { - /* Bind collision, search negative pid values. */ - pid = rover--; + if (nlk_sk(osk)->portid == portid) { + /* Bind collision, search negative portid values. */ + portid = rover--; if (rover > -4097) rover = -4097; netlink_table_ungrab(); @@ -583,7 +585,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, net, pid); + err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) goto retry; @@ -596,7 +598,7 @@ retry: static inline int netlink_capable(const struct socket *sock, unsigned int flag) { - return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || + return (nl_table[sock->sk->sk_protocol].flags & flag) || capable(CAP_NET_ADMIN); } @@ -659,15 +661,15 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_NONROOT_RECV)) + if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) return err; } - if (nlk->pid) { - if (nladdr->nl_pid != nlk->pid) + if (nlk->portid) { + if (nladdr->nl_pid != nlk->portid) return -EINVAL; } else { err = nladdr->nl_pid ? @@ -713,7 +715,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_pid = 0; + nlk->dst_portid = 0; nlk->dst_group = 0; return 0; } @@ -721,15 +723,15 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to send multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND)) + if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->pid) + if (!nlk->portid) err = netlink_autobind(sock); if (err == 0) { sk->sk_state = NETLINK_CONNECTED; - nlk->dst_pid = nladdr->nl_pid; + nlk->dst_portid = nladdr->nl_pid; nlk->dst_group = ffs(nladdr->nl_groups); } @@ -748,10 +750,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*nladdr); if (peer) { - nladdr->nl_pid = nlk->dst_pid; + nladdr->nl_pid = nlk->dst_portid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { - nladdr->nl_pid = nlk->pid; + nladdr->nl_pid = nlk->portid; nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; @@ -770,19 +772,19 @@ static void netlink_overrun(struct sock *sk) atomic_inc(&sk->sk_drops); } -static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) +static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); + sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid) { + nlk->dst_portid != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -912,7 +914,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) +static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, + struct sock *ssk) { int ret; struct netlink_sock *nlk = nlk_sk(sk); @@ -921,6 +924,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) if (nlk->netlink_rcv != NULL) { ret = skb->len; skb_set_owner_r(skb, sk); + NETLINK_CB(skb).ssk = ssk; nlk->netlink_rcv(skb); consume_skb(skb); } else { @@ -931,7 +935,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) } int netlink_unicast(struct sock *ssk, struct sk_buff *skb, - u32 pid, int nonblock) + u32 portid, int nonblock) { struct sock *sk; int err; @@ -941,13 +945,13 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, timeo = sock_sndtimeo(ssk, nonblock); retry: - sk = netlink_getsockbypid(ssk, pid); + sk = netlink_getsockbyportid(ssk, portid); if (IS_ERR(sk)) { kfree_skb(skb); return PTR_ERR(sk); } if (netlink_is_kernel(sk)) - return netlink_unicast_kernel(sk, skb); + return netlink_unicast_kernel(sk, skb, ssk); if (sk_filter(sk, skb)) { err = skb->len; @@ -1001,7 +1005,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_broadcast_data { struct sock *exclude_sk; struct net *net; - u32 pid; + u32 portid; u32 group; int failure; int delivery_failure; @@ -1022,7 +1026,7 @@ static int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1074,7 +1078,7 @@ out: return 0; } -int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data) @@ -1088,7 +1092,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, info.exclude_sk = ssk; info.net = net; - info.pid = pid; + info.portid = portid; info.group = group; info.failure = 0; info.delivery_failure = 0; @@ -1126,17 +1130,17 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, } EXPORT_SYMBOL(netlink_broadcast_filtered); -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation) { - return netlink_broadcast_filtered(ssk, skb, pid, group, allocation, + return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, NULL, NULL); } EXPORT_SYMBOL(netlink_broadcast); struct netlink_set_err_data { struct sock *exclude_sk; - u32 pid; + u32 portid; u32 group; int code; }; @@ -1152,7 +1156,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1170,14 +1174,14 @@ out: /** * netlink_set_err - report error to broadcast listeners * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() - * @pid: the PID of a process that we want to skip (if any) + * @portid: the PORTID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; @@ -1185,7 +1189,7 @@ int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) int ret = 0; info.exclude_sk = ssk; - info.pid = pid; + info.portid = portid; info.group = group; /* sk->sk_err wants a positive error value */ info.code = -code; @@ -1242,7 +1246,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_NONROOT_RECV)) + if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -1350,7 +1354,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr = msg->msg_name; - u32 dst_pid; + u32 dst_portid; u32 dst_group; struct sk_buff *skb; int err; @@ -1362,7 +1366,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &scm; - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, true); if (err < 0) return err; @@ -1370,17 +1374,18 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = -EINVAL; if (addr->nl_family != AF_NETLINK) goto out; - dst_pid = addr->nl_pid; + dst_portid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_portid) && + !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { - dst_pid = nlk->dst_pid; + dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; } - if (!nlk->pid) { + if (!nlk->portid) { err = netlink_autobind(sock); if (err) goto out; @@ -1394,9 +1399,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - NETLINK_CB(skb).pid = nlk->pid; + NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); + NETLINK_CB(skb).creds = siocb->scm->creds; err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -1412,9 +1417,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } - err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); + err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: scm_destroy(siocb->scm); @@ -1477,7 +1482,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; addr->nl_family = AF_NETLINK; addr->nl_pad = 0; - addr->nl_pid = NETLINK_CB(skb).pid; + addr->nl_pid = NETLINK_CB(skb).portid; addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } @@ -1521,9 +1526,8 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(struct net *net, int unit, - struct module *module, - struct netlink_kernel_cfg *cfg) +__netlink_kernel_create(struct net *net, int unit, struct module *module, + struct netlink_kernel_cfg *cfg) { struct socket *sock; struct sock *sk; @@ -1577,7 +1581,10 @@ netlink_kernel_create(struct net *net, int unit, rcu_assign_pointer(nl_table[unit].listeners, listeners); nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; - nl_table[unit].bind = cfg ? cfg->bind : NULL; + if (cfg) { + nl_table[unit].bind = cfg->bind; + nl_table[unit].flags = cfg->flags; + } nl_table[unit].registered = 1; } else { kfree(listeners); @@ -1595,8 +1602,7 @@ out_sock_release_nosk: sock_release(sock); return NULL; } -EXPORT_SYMBOL(netlink_kernel_create); - +EXPORT_SYMBOL(__netlink_kernel_create); void netlink_kernel_release(struct sock *sk) @@ -1676,15 +1682,8 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) netlink_table_ungrab(); } -void netlink_set_nonroot(int protocol, unsigned int flags) -{ - if ((unsigned int)protocol < MAX_LINKS) - nl_table[protocol].nl_nonroot = flags; -} -EXPORT_SYMBOL(netlink_set_nonroot); - struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = NLMSG_LENGTH(len); @@ -1693,7 +1692,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; - nlh->nlmsg_pid = pid; + nlh->nlmsg_pid = portid; nlh->nlmsg_seq = seq; if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); @@ -1789,7 +1788,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1837,7 +1836,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) sk = netlink_lookup(sock_net(in_skb->sk), in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).pid); + NETLINK_CB(in_skb).portid); if (sk) { sk->sk_err = ENOBUFS; sk->sk_error_report(sk); @@ -1846,12 +1845,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) return; } - rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); - netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); @@ -1901,33 +1900,33 @@ EXPORT_SYMBOL(netlink_rcv_skb); * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use * @skb: notification message - * @pid: destination netlink pid for reports or 0 + * @portid: destination netlink portid for reports or 0 * @group: destination multicast group or 0 * @report: 1 to report back, 0 to disable * @flags: allocation flags */ -int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags) { int err = 0; if (group) { - int exclude_pid = 0; + int exclude_portid = 0; if (report) { atomic_inc(&skb->users); - exclude_pid = pid; + exclude_portid = portid; } /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ - err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); + err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); } if (report) { int err2; - err2 = nlmsg_unicast(sk, skb, pid); + err2 = nlmsg_unicast(sk, skb, portid); if (!err || err == -ESRCH) err = err2; } @@ -1952,7 +1951,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { @@ -2000,7 +1999,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) j = iter->hash_idx + 1; do { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); @@ -2039,7 +2038,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", s, s->sk_protocol, - nlk->pid, + nlk->portid, nlk->groups ? (u32)nlk->groups[0] : 0, sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), @@ -2147,6 +2146,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND; netlink_table_ungrab(); } @@ -2183,12 +2183,12 @@ static int __init netlink_proto_init(void) order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; - hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); + hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) - nl_pid_hash_free(nl_table[i].hash.table, + nl_portid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table)); kfree(nl_table); goto panic; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index fda497412fc3..f2aabb6f4105 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -501,7 +501,7 @@ EXPORT_SYMBOL(genl_unregister_family); /** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message - * @pid: netlink pid the message is addressed to + * @portid: netlink portid the message is addressed to * @seq: sequence number (usually the one of the sender) * @family: generic netlink family * @flags: netlink message flags @@ -509,13 +509,13 @@ EXPORT_SYMBOL(genl_unregister_family); * * Returns pointer to user specific header */ -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; - nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN + + nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN + family->hdrsize, flags); if (nlh == NULL) return NULL; @@ -585,7 +585,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } info.snd_seq = nlh->nlmsg_seq; - info.snd_pid = NETLINK_CB(skb).pid; + info.snd_portid = NETLINK_CB(skb).portid; info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; @@ -626,12 +626,12 @@ static struct genl_family genl_ctrl = { .netnsok = true, }; -static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, +static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -701,7 +701,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { @@ -709,7 +709,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, struct nlattr *nla_grps; struct nlattr *nest; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -756,7 +756,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++n < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, CTRL_CMD_NEWFAMILY) < 0) goto errout; @@ -773,7 +773,7 @@ errout: } static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -782,7 +782,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); + err = ctrl_fill_info(family, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -792,7 +792,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -801,7 +801,7 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -853,7 +853,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return -ENOENT; } - msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq, CTRL_CMD_NEWFAMILY); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -918,11 +918,11 @@ static int __net_init genl_pernet_init(struct net *net) struct netlink_kernel_cfg cfg = { .input = genl_rcv, .cb_mutex = &genl_mutex, + .flags = NL_CFG_F_NONROOT_RECV, }; /* we'll bump the group number right afterwards */ - net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, - THIS_MODULE, &cfg); + net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, &cfg); if (!net->genl_sock && net_eq(net, &init_net)) panic("GENL: Cannot initialize generic netlink\n"); @@ -955,8 +955,6 @@ static int __init genl_init(void) if (err < 0) goto problem; - netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); - err = register_pernet_subsys(&genl_pernet_ops); if (err) goto problem; @@ -973,7 +971,7 @@ problem: subsys_initcall(genl_init); -static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, +static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { struct sk_buff *tmp; @@ -988,7 +986,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, goto error; } err = nlmsg_multicast(prev->genl_sock, tmp, - pid, group, flags); + portid, group, flags); if (err) goto error; } @@ -996,20 +994,20 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, prev = net; } - return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); error: kfree_skb(skb); return err; } -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_mcast(skb, pid, group, flags); + return genlmsg_mcast(skb, portid, group, flags); } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, +void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; @@ -1018,6 +1016,6 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, if (nlh) report = nlmsg_report(nlh); - nlmsg_notify(sk, skb, pid, group, report, flags); + nlmsg_notify(sk, skb, portid, group, report, flags); } EXPORT_SYMBOL(genl_notify); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 06592d8b4a2b..7261eb81974f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -601,7 +601,7 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); release_sock(sk); - return -EACCES; + return -EPERM; } nr->user_addr = addr->fsa_digipeater[0]; nr->source_addr = addr->fsa_ax25.sax25_call; @@ -1169,7 +1169,12 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_flags |= MSG_TRUNC; } - skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (er < 0) { + skb_free_datagram(sk, skb); + release_sock(sk); + return er; + } if (sax != NULL) { sax->sax25_family = AF_NETROM; diff --git a/net/nfc/core.c b/net/nfc/core.c index ff749794bc5b..479bee36dc3e 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -679,7 +679,7 @@ static void nfc_release(struct device *d) if (dev->ops->check_presence) { del_timer_sync(&dev->check_pres_timer); - destroy_workqueue(dev->check_pres_wq); + cancel_work_sync(&dev->check_pres_work); } nfc_genl_data_exit(&dev->genl_data); @@ -715,7 +715,7 @@ static void nfc_check_pres_timeout(unsigned long data) { struct nfc_dev *dev = (struct nfc_dev *)data; - queue_work(dev->check_pres_wq, &dev->check_pres_work); + schedule_work(&dev->check_pres_work); } struct class nfc_class = { @@ -784,20 +784,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->targets_generation = 1; if (ops->check_presence) { - char name[32]; init_timer(&dev->check_pres_timer); dev->check_pres_timer.data = (unsigned long)dev; dev->check_pres_timer.function = nfc_check_pres_timeout; INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); - snprintf(name, sizeof(name), "nfc%d_check_pres_wq", dev->idx); - dev->check_pres_wq = alloc_workqueue(name, WQ_NON_REENTRANT | - WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (dev->check_pres_wq == NULL) { - kfree(dev); - return NULL; - } } return dev; diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile index f9c44b2fb065..c5dbb6891b24 100644 --- a/net/nfc/hci/Makefile +++ b/net/nfc/hci/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_NFC_HCI) += hci.o -hci-y := core.o hcp.o command.o -hci-$(CONFIG_NFC_SHDLC) += shdlc.o +hci-y := core.o hcp.o command.o llc.o llc_nop.o +hci-$(CONFIG_NFC_SHDLC) += llc_shdlc.o diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 46362ef979db..71c6a7086b8f 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -28,10 +28,29 @@ #include "hci.h" -static void nfc_hci_execute_cb(struct nfc_hci_dev *hdev, int err, - struct sk_buff *skb, void *cb_data) +static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, + const u8 *param, size_t param_len, + data_exchange_cb_t cb, void *cb_context) { - struct hcp_exec_waiter *hcp_ew = (struct hcp_exec_waiter *)cb_data; + pr_debug("exec cmd async through pipe=%d, cmd=%d, plen=%zd\n", pipe, + cmd, param_len); + + /* TODO: Define hci cmd execution delay. Should it be the same + * for all commands? + */ + return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd, + param, param_len, cb, cb_context, 3000); +} + +/* + * HCI command execution completion callback. + * err will be a standard linux error (may be converted from HCI response) + * skb contains the response data and must be disposed, or may be NULL if + * an error occured + */ +static void nfc_hci_execute_cb(void *context, struct sk_buff *skb, int err) +{ + struct hcp_exec_waiter *hcp_ew = (struct hcp_exec_waiter *)context; pr_debug("HCI Cmd completed with result=%d\n", err); @@ -55,7 +74,8 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, hcp_ew.exec_complete = false; hcp_ew.result_skb = NULL; - pr_debug("through pipe=%d, cmd=%d, plen=%zd\n", pipe, cmd, param_len); + pr_debug("exec cmd sync through pipe=%d, cmd=%d, plen=%zd\n", pipe, + cmd, param_len); /* TODO: Define hci cmd execution delay. Should it be the same * for all commands? @@ -133,6 +153,23 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, } EXPORT_SYMBOL(nfc_hci_send_cmd); +int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, + const u8 *param, size_t param_len, + data_exchange_cb_t cb, void *cb_context) +{ + u8 pipe; + + pr_debug("\n"); + + pipe = hdev->gate2pipe[gate]; + if (pipe == NFC_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + return nfc_hci_execute_cmd_async(hdev, pipe, cmd, param, param_len, + cb, cb_context); +} +EXPORT_SYMBOL(nfc_hci_send_cmd_async); + int nfc_hci_set_param(struct nfc_hci_dev *hdev, u8 gate, u8 idx, const u8 *param, size_t param_len) { diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 1ac7b3fac6c9..5fbb6e40793e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -26,6 +26,7 @@ #include <net/nfc/nfc.h> #include <net/nfc/hci.h> +#include <net/nfc/llc.h> #include "hci.h" @@ -57,12 +58,11 @@ static void nfc_hci_msg_tx_work(struct work_struct *work) if (hdev->cmd_pending_msg) { if (timer_pending(&hdev->cmd_timer) == 0) { if (hdev->cmd_pending_msg->cb) - hdev->cmd_pending_msg->cb(hdev, - -ETIME, - NULL, - hdev-> + hdev->cmd_pending_msg->cb(hdev-> cmd_pending_msg-> - cb_context); + cb_context, + NULL, + -ETIME); kfree(hdev->cmd_pending_msg); hdev->cmd_pending_msg = NULL; } else @@ -78,12 +78,12 @@ next_msg: pr_debug("msg_tx_queue has a cmd to send\n"); while ((skb = skb_dequeue(&msg->msg_frags)) != NULL) { - r = hdev->ops->xmit(hdev, skb); + r = nfc_llc_xmit_from_hci(hdev->llc, skb); if (r < 0) { kfree_skb(skb); skb_queue_purge(&msg->msg_frags); if (msg->cb) - msg->cb(hdev, r, NULL, msg->cb_context); + msg->cb(msg->cb_context, NULL, r); kfree(msg); break; } @@ -133,15 +133,15 @@ static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err, del_timer_sync(&hdev->cmd_timer); if (hdev->cmd_pending_msg->cb) - hdev->cmd_pending_msg->cb(hdev, err, skb, - hdev->cmd_pending_msg->cb_context); + hdev->cmd_pending_msg->cb(hdev->cmd_pending_msg->cb_context, + skb, err); else kfree_skb(skb); kfree(hdev->cmd_pending_msg); hdev->cmd_pending_msg = NULL; - queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work); + schedule_work(&hdev->msg_tx_work); } void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result, @@ -326,7 +326,7 @@ static void nfc_hci_cmd_timeout(unsigned long data) { struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data; - queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work); + schedule_work(&hdev->msg_tx_work); } static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count, @@ -398,8 +398,7 @@ disconnect_all: nfc_hci_disconnect_all_gates(hdev); exit: - if (skb) - kfree_skb(skb); + kfree_skb(skb); return r; } @@ -470,29 +469,38 @@ static int hci_dev_up(struct nfc_dev *nfc_dev) return r; } + r = nfc_llc_start(hdev->llc); + if (r < 0) + goto exit_close; + r = hci_dev_session_init(hdev); if (r < 0) - goto exit; + goto exit_llc; r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, NFC_HCI_EVT_END_OPERATION, NULL, 0); if (r < 0) - goto exit; + goto exit_llc; if (hdev->ops->hci_ready) { r = hdev->ops->hci_ready(hdev); if (r < 0) - goto exit; + goto exit_llc; } r = hci_dev_version(hdev); if (r < 0) - goto exit; + goto exit_llc; + + return 0; + +exit_llc: + nfc_llc_stop(hdev->llc); + +exit_close: + if (hdev->ops->close) + hdev->ops->close(hdev); -exit: - if (r < 0) - if (hdev->ops->close) - hdev->ops->close(hdev); return r; } @@ -500,6 +508,8 @@ static int hci_dev_down(struct nfc_dev *nfc_dev) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + nfc_llc_stop(hdev->llc); + if (hdev->ops->close) hdev->ops->close(hdev); @@ -539,13 +549,37 @@ static void hci_deactivate_target(struct nfc_dev *nfc_dev, { } +#define HCI_CB_TYPE_TRANSCEIVE 1 + +static void hci_transceive_cb(void *context, struct sk_buff *skb, int err) +{ + struct nfc_hci_dev *hdev = context; + + switch (hdev->async_cb_type) { + case HCI_CB_TYPE_TRANSCEIVE: + /* + * TODO: Check RF Error indicator to make sure data is valid. + * It seems that HCI cmd can complete without error, but data + * can be invalid if an RF error occured? Ignore for now. + */ + if (err == 0) + skb_trim(skb, skb->len - 1); /* RF Err ind */ + + hdev->async_cb(hdev->async_cb_context, skb, err); + break; + default: + if (err == 0) + kfree_skb(skb); + break; + } +} + static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); int r; - struct sk_buff *res_skb = NULL; pr_debug("target_idx=%d\n", target->idx); @@ -553,40 +587,37 @@ static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, case NFC_HCI_RF_READER_A_GATE: case NFC_HCI_RF_READER_B_GATE: if (hdev->ops->data_exchange) { - r = hdev->ops->data_exchange(hdev, target, skb, - &res_skb); + r = hdev->ops->data_exchange(hdev, target, skb, cb, + cb_context); if (r <= 0) /* handled */ break; } *skb_push(skb, 1) = 0; /* CTR, see spec:10.2.2.1 */ - r = nfc_hci_send_cmd(hdev, target->hci_reader_gate, - NFC_HCI_WR_XCHG_DATA, - skb->data, skb->len, &res_skb); - /* - * TODO: Check RF Error indicator to make sure data is valid. - * It seems that HCI cmd can complete without error, but data - * can be invalid if an RF error occured? Ignore for now. - */ - if (r == 0) - skb_trim(res_skb, res_skb->len - 1); /* RF Err ind */ + + hdev->async_cb_type = HCI_CB_TYPE_TRANSCEIVE; + hdev->async_cb = cb; + hdev->async_cb_context = cb_context; + + r = nfc_hci_send_cmd_async(hdev, target->hci_reader_gate, + NFC_HCI_WR_XCHG_DATA, skb->data, + skb->len, hci_transceive_cb, hdev); break; default: if (hdev->ops->data_exchange) { - r = hdev->ops->data_exchange(hdev, target, skb, - &res_skb); + r = hdev->ops->data_exchange(hdev, target, skb, cb, + cb_context); if (r == 1) r = -ENOTSUPP; } else r = -ENOTSUPP; + break; } kfree_skb(skb); - cb(cb_context, res_skb, r); - - return 0; + return r; } static int hci_check_presence(struct nfc_dev *nfc_dev, @@ -600,6 +631,93 @@ static int hci_check_presence(struct nfc_dev *nfc_dev, return 0; } +static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) +{ + mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->cmd_pending_msg == NULL) { + nfc_driver_failure(hdev->ndev, err); + goto exit; + } + + __nfc_hci_cmd_completion(hdev, err, NULL); + +exit: + mutex_unlock(&hdev->msg_tx_mutex); +} + +static void nfc_hci_llc_failure(struct nfc_hci_dev *hdev, int err) +{ + nfc_hci_failure(hdev, err); +} + +static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) +{ + struct hcp_packet *packet; + u8 type; + u8 instruction; + struct sk_buff *hcp_skb; + u8 pipe; + struct sk_buff *frag_skb; + int msg_len; + + packet = (struct hcp_packet *)skb->data; + if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) { + skb_queue_tail(&hdev->rx_hcp_frags, skb); + return; + } + + /* it's the last fragment. Does it need re-aggregation? */ + if (skb_queue_len(&hdev->rx_hcp_frags)) { + pipe = packet->header & NFC_HCI_FRAGMENT; + skb_queue_tail(&hdev->rx_hcp_frags, skb); + + msg_len = 0; + skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { + msg_len += (frag_skb->len - + NFC_HCI_HCP_PACKET_HEADER_LEN); + } + + hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN + + msg_len, GFP_KERNEL); + if (hcp_skb == NULL) { + nfc_hci_failure(hdev, -ENOMEM); + return; + } + + *skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe; + + skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { + msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN; + memcpy(skb_put(hcp_skb, msg_len), + frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN, + msg_len); + } + + skb_queue_purge(&hdev->rx_hcp_frags); + } else { + packet->header &= NFC_HCI_FRAGMENT; + hcp_skb = skb; + } + + /* if this is a response, dispatch immediately to + * unblock waiting cmd context. Otherwise, enqueue to dispatch + * in separate context where handler can also execute command. + */ + packet = (struct hcp_packet *)hcp_skb->data; + type = HCP_MSG_GET_TYPE(packet->message.header); + if (type == NFC_HCI_HCP_RESPONSE) { + pipe = packet->header; + instruction = HCP_MSG_GET_CMD(packet->message.header); + skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN + + NFC_HCI_HCP_MESSAGE_HEADER_LEN); + nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb); + } else { + skb_queue_tail(&hdev->msg_rx_queue, hcp_skb); + schedule_work(&hdev->msg_rx_work); + } +} + static struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, @@ -614,6 +732,7 @@ static struct nfc_ops hci_nfc_ops = { struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, u32 protocols, + const char *llc_name, int tx_headroom, int tx_tailroom, int max_link_payload) @@ -630,10 +749,19 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, if (hdev == NULL) return NULL; + hdev->llc = nfc_llc_allocate(llc_name, hdev, ops->xmit, + nfc_hci_recv_from_llc, tx_headroom, + tx_tailroom, nfc_hci_llc_failure); + if (hdev->llc == NULL) { + kfree(hdev); + return NULL; + } + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { + nfc_llc_free(hdev->llc); kfree(hdev); return NULL; } @@ -653,29 +781,18 @@ EXPORT_SYMBOL(nfc_hci_allocate_device); void nfc_hci_free_device(struct nfc_hci_dev *hdev) { nfc_free_device(hdev->ndev); + nfc_llc_free(hdev->llc); kfree(hdev); } EXPORT_SYMBOL(nfc_hci_free_device); int nfc_hci_register_device(struct nfc_hci_dev *hdev) { - struct device *dev = &hdev->ndev->dev; - const char *devname = dev_name(dev); - char name[32]; - int r = 0; - mutex_init(&hdev->msg_tx_mutex); INIT_LIST_HEAD(&hdev->msg_tx_queue); INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); - snprintf(name, sizeof(name), "%s_hci_msg_tx_wq", devname); - hdev->msg_tx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (hdev->msg_tx_wq == NULL) { - r = -ENOMEM; - goto exit; - } init_timer(&hdev->cmd_timer); hdev->cmd_timer.data = (unsigned long)hdev; @@ -684,27 +801,10 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev) skb_queue_head_init(&hdev->rx_hcp_frags); INIT_WORK(&hdev->msg_rx_work, nfc_hci_msg_rx_work); - snprintf(name, sizeof(name), "%s_hci_msg_rx_wq", devname); - hdev->msg_rx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (hdev->msg_rx_wq == NULL) { - r = -ENOMEM; - goto exit; - } skb_queue_head_init(&hdev->msg_rx_queue); - r = nfc_register_device(hdev->ndev); - -exit: - if (r < 0) { - if (hdev->msg_tx_wq) - destroy_workqueue(hdev->msg_tx_wq); - if (hdev->msg_rx_wq) - destroy_workqueue(hdev->msg_rx_wq); - } - - return r; + return nfc_register_device(hdev->ndev); } EXPORT_SYMBOL(nfc_hci_register_device); @@ -725,9 +825,8 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) nfc_unregister_device(hdev->ndev); - destroy_workqueue(hdev->msg_tx_wq); - - destroy_workqueue(hdev->msg_rx_wq); + cancel_work_sync(&hdev->msg_tx_work); + cancel_work_sync(&hdev->msg_rx_work); } EXPORT_SYMBOL(nfc_hci_unregister_device); @@ -743,93 +842,30 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev) } EXPORT_SYMBOL(nfc_hci_get_clientdata); -static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) -{ - mutex_lock(&hdev->msg_tx_mutex); - - if (hdev->cmd_pending_msg == NULL) { - nfc_driver_failure(hdev->ndev, err); - goto exit; - } - - __nfc_hci_cmd_completion(hdev, err, NULL); - -exit: - mutex_unlock(&hdev->msg_tx_mutex); -} - void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err) { nfc_hci_failure(hdev, err); } EXPORT_SYMBOL(nfc_hci_driver_failure); -void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) +void inline nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) { - struct hcp_packet *packet; - u8 type; - u8 instruction; - struct sk_buff *hcp_skb; - u8 pipe; - struct sk_buff *frag_skb; - int msg_len; - - packet = (struct hcp_packet *)skb->data; - if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) { - skb_queue_tail(&hdev->rx_hcp_frags, skb); - return; - } - - /* it's the last fragment. Does it need re-aggregation? */ - if (skb_queue_len(&hdev->rx_hcp_frags)) { - pipe = packet->header & NFC_HCI_FRAGMENT; - skb_queue_tail(&hdev->rx_hcp_frags, skb); - - msg_len = 0; - skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { - msg_len += (frag_skb->len - - NFC_HCI_HCP_PACKET_HEADER_LEN); - } - - hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN + - msg_len, GFP_KERNEL); - if (hcp_skb == NULL) { - nfc_hci_failure(hdev, -ENOMEM); - return; - } - - *skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe; - - skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) { - msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN; - memcpy(skb_put(hcp_skb, msg_len), - frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN, - msg_len); - } + nfc_llc_rcv_from_drv(hdev->llc, skb); +} +EXPORT_SYMBOL(nfc_hci_recv_frame); - skb_queue_purge(&hdev->rx_hcp_frags); - } else { - packet->header &= NFC_HCI_FRAGMENT; - hcp_skb = skb; - } +static int __init nfc_hci_init(void) +{ + return nfc_llc_init(); +} - /* if this is a response, dispatch immediately to - * unblock waiting cmd context. Otherwise, enqueue to dispatch - * in separate context where handler can also execute command. - */ - packet = (struct hcp_packet *)hcp_skb->data; - type = HCP_MSG_GET_TYPE(packet->message.header); - if (type == NFC_HCI_HCP_RESPONSE) { - pipe = packet->header; - instruction = HCP_MSG_GET_CMD(packet->message.header); - skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN + - NFC_HCI_HCP_MESSAGE_HEADER_LEN); - nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb); - } else { - skb_queue_tail(&hdev->msg_rx_queue, hcp_skb); - queue_work(hdev->msg_rx_wq, &hdev->msg_rx_work); - } +static void __exit nfc_hci_exit(void) +{ + nfc_llc_exit(); } -EXPORT_SYMBOL(nfc_hci_recv_frame); + +subsys_initcall(nfc_hci_init); +module_exit(nfc_hci_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("NFC HCI Core"); diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index fa9a21e92239..b274d12c18ac 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -20,6 +20,8 @@ #ifndef __LOCAL_HCI_H #define __LOCAL_HCI_H +#include <net/nfc/hci.h> + struct gate_pipe_map { u8 gate; u8 pipe; @@ -35,15 +37,6 @@ struct hcp_packet { struct hcp_message message; } __packed; -/* - * HCI command execution completion callback. - * result will be a standard linux error (may be converted from HCI response) - * skb contains the response data and must be disposed, or may be NULL if - * an error occured - */ -typedef void (*hci_cmd_cb_t) (struct nfc_hci_dev *hdev, int result, - struct sk_buff *skb, void *cb_data); - struct hcp_exec_waiter { wait_queue_head_t *wq; bool exec_complete; @@ -55,7 +48,7 @@ struct hci_msg { struct list_head msg_l; struct sk_buff_head msg_frags; bool wait_response; - hci_cmd_cb_t cb; + data_exchange_cb_t cb; void *cb_context; unsigned long completion_delay; }; @@ -83,7 +76,7 @@ struct hci_create_pipe_resp { int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, u8 instruction, const u8 *payload, size_t payload_len, - hci_cmd_cb_t cb, void *cb_data, + data_exchange_cb_t cb, void *cb_context, unsigned long completion_delay); u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index f4dad1a89740..bc308a7ca609 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -35,7 +35,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, u8 instruction, const u8 *payload, size_t payload_len, - hci_cmd_cb_t cb, void *cb_data, + data_exchange_cb_t cb, void *cb_context, unsigned long completion_delay) { struct nfc_dev *ndev = hdev->ndev; @@ -52,7 +52,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, skb_queue_head_init(&cmd->msg_frags); cmd->wait_response = (type == NFC_HCI_HCP_COMMAND) ? true : false; cmd->cb = cb; - cmd->cb_context = cb_data; + cmd->cb_context = cb_context; cmd->completion_delay = completion_delay; hci_len = payload_len + 1; @@ -108,7 +108,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue); mutex_unlock(&hdev->msg_tx_mutex); - queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work); + schedule_work(&hdev->msg_tx_work); return 0; diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c new file mode 100644 index 000000000000..ae1205ded87f --- /dev/null +++ b/net/nfc/hci/llc.c @@ -0,0 +1,170 @@ +/* + * Link Layer Control manager + * + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <net/nfc/llc.h> + +#include "llc.h" + +static struct list_head llc_engines; + +int nfc_llc_init(void) +{ + int r; + + INIT_LIST_HEAD(&llc_engines); + + r = nfc_llc_nop_register(); + if (r) + goto exit; + + r = nfc_llc_shdlc_register(); + if (r) + goto exit; + + return 0; + +exit: + nfc_llc_exit(); + return r; +} + +void nfc_llc_exit(void) +{ + struct nfc_llc_engine *llc_engine, *n; + + list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) { + list_del(&llc_engine->entry); + kfree(llc_engine->name); + kfree(llc_engine); + } +} + +int nfc_llc_register(const char *name, struct nfc_llc_ops *ops) +{ + struct nfc_llc_engine *llc_engine; + + llc_engine = kzalloc(sizeof(struct nfc_llc_engine), GFP_KERNEL); + if (llc_engine == NULL) + return -ENOMEM; + + llc_engine->name = kstrdup(name, GFP_KERNEL); + if (llc_engine->name == NULL) { + kfree(llc_engine); + return -ENOMEM; + } + llc_engine->ops = ops; + + INIT_LIST_HEAD(&llc_engine->entry); + list_add_tail (&llc_engine->entry, &llc_engines); + + return 0; +} + +static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name) +{ + struct nfc_llc_engine *llc_engine; + + list_for_each_entry(llc_engine, &llc_engines, entry) { + if (strcmp(llc_engine->name, name) == 0) + return llc_engine; + } + + return NULL; +} + +void nfc_llc_unregister(const char *name) +{ + struct nfc_llc_engine *llc_engine; + + llc_engine = nfc_llc_name_to_engine(name); + if (llc_engine == NULL) + return; + + list_del(&llc_engine->entry); + kfree(llc_engine->name); + kfree(llc_engine); +} + +struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, + xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, llc_failure_t llc_failure) +{ + struct nfc_llc_engine *llc_engine; + struct nfc_llc *llc; + + llc_engine = nfc_llc_name_to_engine(name); + if (llc_engine == NULL) + return NULL; + + llc = kzalloc(sizeof(struct nfc_llc), GFP_KERNEL); + if (llc == NULL) + return NULL; + + llc->data = llc_engine->ops->init(hdev, xmit_to_drv, rcv_to_hci, + tx_headroom, tx_tailroom, + &llc->rx_headroom, &llc->rx_tailroom, + llc_failure); + if (llc->data == NULL) { + kfree(llc); + return NULL; + } + llc->ops = llc_engine->ops; + + return llc; +} + +void nfc_llc_free(struct nfc_llc *llc) +{ + llc->ops->deinit(llc); + kfree(llc); +} + +inline void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom, + int *rx_tailroom) +{ + *rx_headroom = llc->rx_headroom; + *rx_tailroom = llc->rx_tailroom; +} + +inline int nfc_llc_start(struct nfc_llc *llc) +{ + return llc->ops->start(llc); +} + +inline int nfc_llc_stop(struct nfc_llc *llc) +{ + return llc->ops->stop(llc); +} + +inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) +{ + llc->ops->rcv_from_drv(llc, skb); +} + +inline int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) +{ + return llc->ops->xmit_from_hci(llc, skb); +} + +inline void *nfc_llc_get_data(struct nfc_llc *llc) +{ + return llc->data; +} diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h new file mode 100644 index 000000000000..7be0b7f3ceb6 --- /dev/null +++ b/net/nfc/hci/llc.h @@ -0,0 +1,69 @@ +/* + * Link Layer Control manager + * + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LOCAL_LLC_H_ +#define __LOCAL_LLC_H_ + +#include <net/nfc/hci.h> +#include <net/nfc/llc.h> +#include <linux/skbuff.h> + +struct nfc_llc_ops { + void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, int *rx_headroom, int *rx_tailroom, + llc_failure_t llc_failure); + void (*deinit) (struct nfc_llc *llc); + int (*start) (struct nfc_llc *llc); + int (*stop) (struct nfc_llc *llc); + void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb); + int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb); +}; + +struct nfc_llc_engine { + const char *name; + struct nfc_llc_ops *ops; + struct list_head entry; +}; + +struct nfc_llc { + void *data; + struct nfc_llc_ops *ops; + int rx_headroom; + int rx_tailroom; +}; + +void *nfc_llc_get_data(struct nfc_llc *llc); + +int nfc_llc_register(const char *name, struct nfc_llc_ops *ops); +void nfc_llc_unregister(const char *name); + +int nfc_llc_nop_register(void); + +#if defined(CONFIG_NFC_SHDLC) +int nfc_llc_shdlc_register(void); +#else +static inline int nfc_llc_shdlc_register(void) +{ + return 0; +} +#endif + +#endif /* __LOCAL_LLC_H_ */ diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c new file mode 100644 index 000000000000..87b10291b40f --- /dev/null +++ b/net/nfc/hci/llc_nop.c @@ -0,0 +1,99 @@ +/* + * nop (passthrough) Link Layer Control + * + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <linux/types.h> + +#include "llc.h" + +struct llc_nop { + struct nfc_hci_dev *hdev; + xmit_to_drv_t xmit_to_drv; + rcv_to_hci_t rcv_to_hci; + int tx_headroom; + int tx_tailroom; + llc_failure_t llc_failure; +}; + +static void *llc_nop_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, int *rx_headroom, int *rx_tailroom, + llc_failure_t llc_failure) +{ + struct llc_nop *llc_nop; + + *rx_headroom = 0; + *rx_tailroom = 0; + + llc_nop = kzalloc(sizeof(struct llc_nop), GFP_KERNEL); + if (llc_nop == NULL) + return NULL; + + llc_nop->hdev = hdev; + llc_nop->xmit_to_drv = xmit_to_drv; + llc_nop->rcv_to_hci = rcv_to_hci; + llc_nop->tx_headroom = tx_headroom; + llc_nop->tx_tailroom = tx_tailroom; + llc_nop->llc_failure = llc_failure; + + return llc_nop; +} + +static void llc_nop_deinit(struct nfc_llc *llc) +{ + kfree(nfc_llc_get_data(llc)); +} + +static int llc_nop_start(struct nfc_llc *llc) +{ + return 0; +} + +static int llc_nop_stop(struct nfc_llc *llc) +{ + return 0; +} + +static void llc_nop_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) +{ + struct llc_nop *llc_nop = nfc_llc_get_data(llc); + + llc_nop->rcv_to_hci(llc_nop->hdev, skb); +} + +static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) +{ + struct llc_nop *llc_nop = nfc_llc_get_data(llc); + + return llc_nop->xmit_to_drv(llc_nop->hdev, skb); +} + +static struct nfc_llc_ops llc_nop_ops = { + .init = llc_nop_init, + .deinit = llc_nop_deinit, + .start = llc_nop_start, + .stop = llc_nop_stop, + .rcv_from_drv = llc_nop_rcv_from_drv, + .xmit_from_hci = llc_nop_xmit_from_hci, +}; + +int nfc_llc_nop_register(void) +{ + return nfc_llc_register(LLC_NOP_NAME, &llc_nop_ops); +} diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/llc_shdlc.c index 6f840c18c892..01cbc72943cd 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -1,10 +1,11 @@ /* + * shdlc Link Layer Control + * * Copyright (C) 2012 Intel Corporation. All rights reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -19,18 +20,65 @@ #define pr_fmt(fmt) "shdlc: %s: " fmt, __func__ +#include <linux/types.h> #include <linux/sched.h> -#include <linux/export.h> #include <linux/wait.h> -#include <linux/crc-ccitt.h> #include <linux/slab.h> #include <linux/skbuff.h> -#include <net/nfc/hci.h> -#include <net/nfc/shdlc.h> +#include "llc.h" + +enum shdlc_state { + SHDLC_DISCONNECTED = 0, + SHDLC_CONNECTING = 1, + SHDLC_NEGOTIATING = 2, + SHDLC_HALF_CONNECTED = 3, + SHDLC_CONNECTED = 4 +}; + +struct llc_shdlc { + struct nfc_hci_dev *hdev; + xmit_to_drv_t xmit_to_drv; + rcv_to_hci_t rcv_to_hci; + + struct mutex state_mutex; + enum shdlc_state state; + int hard_fault; + + wait_queue_head_t *connect_wq; + int connect_tries; + int connect_result; + struct timer_list connect_timer;/* aka T3 in spec 10.6.1 */ + + u8 w; /* window size */ + bool srej_support; + + struct timer_list t1_timer; /* send ack timeout */ + bool t1_active; + + struct timer_list t2_timer; /* guard/retransmit timeout */ + bool t2_active; + + int ns; /* next seq num for send */ + int nr; /* next expected seq num for receive */ + int dnr; /* oldest sent unacked seq num */ + + struct sk_buff_head rcv_q; + + struct sk_buff_head send_q; + bool rnr; /* other side is not ready to receive */ + + struct sk_buff_head ack_pending_q; + + struct work_struct sm_work; + + int tx_headroom; + int tx_tailroom; + + llc_failure_t llc_failure; +}; #define SHDLC_LLC_HEAD_ROOM 2 -#define SHDLC_LLC_TAIL_ROOM 2 #define SHDLC_MAX_WINDOW 4 #define SHDLC_SREJ_SUPPORT false @@ -71,7 +119,7 @@ do { \ } while (0) /* checks x < y <= z modulo 8 */ -static bool nfc_shdlc_x_lt_y_lteq_z(int x, int y, int z) +static bool llc_shdlc_x_lt_y_lteq_z(int x, int y, int z) { if (x < z) return ((x < y) && (y <= z)) ? true : false; @@ -80,7 +128,7 @@ static bool nfc_shdlc_x_lt_y_lteq_z(int x, int y, int z) } /* checks x <= y < z modulo 8 */ -static bool nfc_shdlc_x_lteq_y_lt_z(int x, int y, int z) +static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z) { if (x <= z) return ((x <= y) && (y < z)) ? true : false; @@ -88,36 +136,21 @@ static bool nfc_shdlc_x_lteq_y_lt_z(int x, int y, int z) return ((y >= x) || (y < z)) ? true : false; } -static struct sk_buff *nfc_shdlc_alloc_skb(struct nfc_shdlc *shdlc, +static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, int payload_len) { struct sk_buff *skb; - skb = alloc_skb(shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM + - shdlc->client_tailroom + SHDLC_LLC_TAIL_ROOM + - payload_len, GFP_KERNEL); + skb = alloc_skb(shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM + + shdlc->tx_tailroom + payload_len, GFP_KERNEL); if (skb) - skb_reserve(skb, shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM); + skb_reserve(skb, shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM); return skb; } -static void nfc_shdlc_add_len_crc(struct sk_buff *skb) -{ - u16 crc; - int len; - - len = skb->len + 2; - *skb_push(skb, 1) = len; - - crc = crc_ccitt(0xffff, skb->data, skb->len); - crc = ~crc; - *skb_put(skb, 1) = crc & 0xff; - *skb_put(skb, 1) = crc >> 8; -} - /* immediately sends an S frame. */ -static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc, +static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, enum sframe_type sframe_type, int nr) { int r; @@ -125,15 +158,13 @@ static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc, pr_debug("sframe_type=%d nr=%d\n", sframe_type, nr); - skb = nfc_shdlc_alloc_skb(shdlc, 0); + skb = llc_shdlc_alloc_skb(shdlc, 0); if (skb == NULL) return -ENOMEM; *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr; - nfc_shdlc_add_len_crc(skb); - - r = shdlc->ops->xmit(shdlc, skb); + r = shdlc->xmit_to_drv(shdlc->hdev, skb); kfree_skb(skb); @@ -141,7 +172,7 @@ static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc, } /* immediately sends an U frame. skb may contain optional payload */ -static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc, +static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc, struct sk_buff *skb, enum uframe_modifier uframe_modifier) { @@ -151,9 +182,7 @@ static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc, *skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier; - nfc_shdlc_add_len_crc(skb); - - r = shdlc->ops->xmit(shdlc, skb); + r = shdlc->xmit_to_drv(shdlc->hdev, skb); kfree_skb(skb); @@ -164,7 +193,7 @@ static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc, * Free ack_pending frames until y_nr - 1, and reset t2 according to * the remaining oldest ack_pending frame sent time */ -static void nfc_shdlc_reset_t2(struct nfc_shdlc *shdlc, int y_nr) +static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr) { struct sk_buff *skb; int dnr = shdlc->dnr; /* MUST initially be < y_nr */ @@ -204,7 +233,7 @@ static void nfc_shdlc_reset_t2(struct nfc_shdlc *shdlc, int y_nr) * Receive validated frames from lower layer. skb contains HCI payload only. * Handle according to algorithm at spec:10.8.2 */ -static void nfc_shdlc_rcv_i_frame(struct nfc_shdlc *shdlc, +static void llc_shdlc_rcv_i_frame(struct llc_shdlc *shdlc, struct sk_buff *skb, int ns, int nr) { int x_ns = ns; @@ -216,66 +245,64 @@ static void nfc_shdlc_rcv_i_frame(struct nfc_shdlc *shdlc, goto exit; if (x_ns != shdlc->nr) { - nfc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr); + llc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr); goto exit; } if (shdlc->t1_active == false) { shdlc->t1_active = true; - mod_timer(&shdlc->t1_timer, + mod_timer(&shdlc->t1_timer, jiffies + msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w))); pr_debug("(re)Start T1(send ack)\n"); } if (skb->len) { - nfc_hci_recv_frame(shdlc->hdev, skb); + shdlc->rcv_to_hci(shdlc->hdev, skb); skb = NULL; } shdlc->nr = (shdlc->nr + 1) % 8; - if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { - nfc_shdlc_reset_t2(shdlc, y_nr); + if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { + llc_shdlc_reset_t2(shdlc, y_nr); shdlc->dnr = y_nr; } exit: - if (skb) - kfree_skb(skb); + kfree_skb(skb); } -static void nfc_shdlc_rcv_ack(struct nfc_shdlc *shdlc, int y_nr) +static void llc_shdlc_rcv_ack(struct llc_shdlc *shdlc, int y_nr) { pr_debug("remote acked up to frame %d excluded\n", y_nr); - if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { - nfc_shdlc_reset_t2(shdlc, y_nr); + if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) { + llc_shdlc_reset_t2(shdlc, y_nr); shdlc->dnr = y_nr; } } -static void nfc_shdlc_requeue_ack_pending(struct nfc_shdlc *shdlc) +static void llc_shdlc_requeue_ack_pending(struct llc_shdlc *shdlc) { struct sk_buff *skb; pr_debug("ns reset to %d\n", shdlc->dnr); while ((skb = skb_dequeue_tail(&shdlc->ack_pending_q))) { - skb_pull(skb, 2); /* remove len+control */ - skb_trim(skb, skb->len - 2); /* remove crc */ + skb_pull(skb, 1); /* remove control field */ skb_queue_head(&shdlc->send_q, skb); } shdlc->ns = shdlc->dnr; } -static void nfc_shdlc_rcv_rej(struct nfc_shdlc *shdlc, int y_nr) +static void llc_shdlc_rcv_rej(struct llc_shdlc *shdlc, int y_nr) { struct sk_buff *skb; pr_debug("remote asks retransmition from frame %d\n", y_nr); - if (nfc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) { + if (llc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) { if (shdlc->t2_active) { del_timer_sync(&shdlc->t2_timer); shdlc->t2_active = false; @@ -289,12 +316,12 @@ static void nfc_shdlc_rcv_rej(struct nfc_shdlc *shdlc, int y_nr) } } - nfc_shdlc_requeue_ack_pending(shdlc); + llc_shdlc_requeue_ack_pending(shdlc); } } /* See spec RR:10.8.3 REJ:10.8.4 */ -static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc, +static void llc_shdlc_rcv_s_frame(struct llc_shdlc *shdlc, enum sframe_type s_frame_type, int nr) { struct sk_buff *skb; @@ -304,21 +331,21 @@ static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc, switch (s_frame_type) { case S_FRAME_RR: - nfc_shdlc_rcv_ack(shdlc, nr); + llc_shdlc_rcv_ack(shdlc, nr); if (shdlc->rnr == true) { /* see SHDLC 10.7.7 */ shdlc->rnr = false; if (shdlc->send_q.qlen == 0) { - skb = nfc_shdlc_alloc_skb(shdlc, 0); + skb = llc_shdlc_alloc_skb(shdlc, 0); if (skb) skb_queue_tail(&shdlc->send_q, skb); } } break; case S_FRAME_REJ: - nfc_shdlc_rcv_rej(shdlc, nr); + llc_shdlc_rcv_rej(shdlc, nr); break; case S_FRAME_RNR: - nfc_shdlc_rcv_ack(shdlc, nr); + llc_shdlc_rcv_ack(shdlc, nr); shdlc->rnr = true; break; default: @@ -326,7 +353,7 @@ static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc, } } -static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r) +static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) { pr_debug("result=%d\n", r); @@ -337,7 +364,7 @@ static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r) shdlc->nr = 0; shdlc->dnr = 0; - shdlc->state = SHDLC_CONNECTED; + shdlc->state = SHDLC_HALF_CONNECTED; } else { shdlc->state = SHDLC_DISCONNECTED; } @@ -347,36 +374,36 @@ static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r) wake_up(shdlc->connect_wq); } -static int nfc_shdlc_connect_initiate(struct nfc_shdlc *shdlc) +static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) { struct sk_buff *skb; pr_debug("\n"); - skb = nfc_shdlc_alloc_skb(shdlc, 2); + skb = llc_shdlc_alloc_skb(shdlc, 2); if (skb == NULL) return -ENOMEM; *skb_put(skb, 1) = SHDLC_MAX_WINDOW; *skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0; - return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); + return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); } -static int nfc_shdlc_connect_send_ua(struct nfc_shdlc *shdlc) +static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc) { struct sk_buff *skb; pr_debug("\n"); - skb = nfc_shdlc_alloc_skb(shdlc, 0); + skb = llc_shdlc_alloc_skb(shdlc, 0); if (skb == NULL) return -ENOMEM; - return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA); + return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA); } -static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc, +static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc, struct sk_buff *skb, enum uframe_modifier u_frame_modifier) { @@ -388,8 +415,13 @@ static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc, switch (u_frame_modifier) { case U_FRAME_RSET: - if (shdlc->state == SHDLC_NEGOCIATING) { - /* we sent RSET, but chip wants to negociate */ + switch (shdlc->state) { + case SHDLC_NEGOTIATING: + case SHDLC_CONNECTING: + /* + * We sent RSET, but chip wants to negociate or we + * got RSET before we managed to send out our. + */ if (skb->len > 0) w = skb->data[0]; @@ -401,22 +433,34 @@ static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc, (SHDLC_SREJ_SUPPORT || (srej_support == false))) { shdlc->w = w; shdlc->srej_support = srej_support; - r = nfc_shdlc_connect_send_ua(shdlc); - nfc_shdlc_connect_complete(shdlc, r); + r = llc_shdlc_connect_send_ua(shdlc); + llc_shdlc_connect_complete(shdlc, r); } - } else if (shdlc->state == SHDLC_CONNECTED) { + break; + case SHDLC_HALF_CONNECTED: + /* + * Chip resent RSET due to its timeout - Ignote it + * as we already sent UA. + */ + break; + case SHDLC_CONNECTED: /* * Chip wants to reset link. This is unexpected and * unsupported. */ shdlc->hard_fault = -ECONNRESET; + break; + default: + break; } break; case U_FRAME_UA: if ((shdlc->state == SHDLC_CONNECTING && shdlc->connect_tries > 0) || - (shdlc->state == SHDLC_NEGOCIATING)) - nfc_shdlc_connect_complete(shdlc, 0); + (shdlc->state == SHDLC_NEGOTIATING)) { + llc_shdlc_connect_complete(shdlc, 0); + shdlc->state = SHDLC_CONNECTED; + } break; default: break; @@ -425,7 +469,7 @@ static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc, kfree_skb(skb); } -static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc) +static void llc_shdlc_handle_rcv_queue(struct llc_shdlc *shdlc) { struct sk_buff *skb; u8 control; @@ -443,19 +487,25 @@ static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc) switch (control & SHDLC_CONTROL_HEAD_MASK) { case SHDLC_CONTROL_HEAD_I: case SHDLC_CONTROL_HEAD_I2: + if (shdlc->state == SHDLC_HALF_CONNECTED) + shdlc->state = SHDLC_CONNECTED; + ns = (control & SHDLC_CONTROL_NS_MASK) >> 3; nr = control & SHDLC_CONTROL_NR_MASK; - nfc_shdlc_rcv_i_frame(shdlc, skb, ns, nr); + llc_shdlc_rcv_i_frame(shdlc, skb, ns, nr); break; case SHDLC_CONTROL_HEAD_S: + if (shdlc->state == SHDLC_HALF_CONNECTED) + shdlc->state = SHDLC_CONNECTED; + s_frame_type = (control & SHDLC_CONTROL_TYPE_MASK) >> 3; nr = control & SHDLC_CONTROL_NR_MASK; - nfc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr); + llc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr); kfree_skb(skb); break; case SHDLC_CONTROL_HEAD_U: u_frame_modifier = control & SHDLC_CONTROL_M_MASK; - nfc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier); + llc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier); break; default: pr_err("UNKNOWN Control=%d\n", control); @@ -465,7 +515,7 @@ static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc) } } -static int nfc_shdlc_w_used(int ns, int dnr) +static int llc_shdlc_w_used(int ns, int dnr) { int unack_count; @@ -478,7 +528,7 @@ static int nfc_shdlc_w_used(int ns, int dnr) } /* Send frames according to algorithm at spec:10.8.1 */ -static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) +static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) { struct sk_buff *skb; int r; @@ -489,7 +539,7 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n", shdlc->send_q.qlen, shdlc->ns, shdlc->dnr, shdlc->rnr == false ? "false" : "true", - shdlc->w - nfc_shdlc_w_used(shdlc->ns, shdlc->dnr), + shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr), shdlc->ack_pending_q.qlen); while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w && @@ -508,11 +558,9 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns, shdlc->nr); - /* SHDLC_DUMP_SKB("shdlc frame written", skb); */ - - nfc_shdlc_add_len_crc(skb); + SHDLC_DUMP_SKB("shdlc frame written", skb); - r = shdlc->ops->xmit(shdlc, skb); + r = shdlc->xmit_to_drv(shdlc->hdev, skb); if (r < 0) { shdlc->hard_fault = r; break; @@ -534,36 +582,36 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) } } -static void nfc_shdlc_connect_timeout(unsigned long data) +static void llc_shdlc_connect_timeout(unsigned long data) { - struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data; + struct llc_shdlc *shdlc = (struct llc_shdlc *)data; pr_debug("\n"); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } -static void nfc_shdlc_t1_timeout(unsigned long data) +static void llc_shdlc_t1_timeout(unsigned long data) { - struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data; + struct llc_shdlc *shdlc = (struct llc_shdlc *)data; pr_debug("SoftIRQ: need to send ack\n"); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } -static void nfc_shdlc_t2_timeout(unsigned long data) +static void llc_shdlc_t2_timeout(unsigned long data) { - struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data; + struct llc_shdlc *shdlc = (struct llc_shdlc *)data; pr_debug("SoftIRQ: need to retransmit\n"); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } -static void nfc_shdlc_sm_work(struct work_struct *work) +static void llc_shdlc_sm_work(struct work_struct *work) { - struct nfc_shdlc *shdlc = container_of(work, struct nfc_shdlc, sm_work); + struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work); int r; pr_debug("\n"); @@ -578,46 +626,47 @@ static void nfc_shdlc_sm_work(struct work_struct *work) break; case SHDLC_CONNECTING: if (shdlc->hard_fault) { - nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + llc_shdlc_connect_complete(shdlc, shdlc->hard_fault); break; } if (shdlc->connect_tries++ < 5) - r = nfc_shdlc_connect_initiate(shdlc); + r = llc_shdlc_connect_initiate(shdlc); else r = -ETIME; if (r < 0) - nfc_shdlc_connect_complete(shdlc, r); + llc_shdlc_connect_complete(shdlc, r); else { mod_timer(&shdlc->connect_timer, jiffies + msecs_to_jiffies(SHDLC_CONNECT_VALUE_MS)); - shdlc->state = SHDLC_NEGOCIATING; + shdlc->state = SHDLC_NEGOTIATING; } break; - case SHDLC_NEGOCIATING: + case SHDLC_NEGOTIATING: if (timer_pending(&shdlc->connect_timer) == 0) { shdlc->state = SHDLC_CONNECTING; - queue_work(shdlc->sm_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } - nfc_shdlc_handle_rcv_queue(shdlc); + llc_shdlc_handle_rcv_queue(shdlc); if (shdlc->hard_fault) { - nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + llc_shdlc_connect_complete(shdlc, shdlc->hard_fault); break; } break; + case SHDLC_HALF_CONNECTED: case SHDLC_CONNECTED: - nfc_shdlc_handle_rcv_queue(shdlc); - nfc_shdlc_handle_send_queue(shdlc); + llc_shdlc_handle_rcv_queue(shdlc); + llc_shdlc_handle_send_queue(shdlc); if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) { pr_debug ("Handle T1(send ack) elapsed (T1 now inactive)\n"); shdlc->t1_active = false; - r = nfc_shdlc_send_s_frame(shdlc, S_FRAME_RR, + r = llc_shdlc_send_s_frame(shdlc, S_FRAME_RR, shdlc->nr); if (r < 0) shdlc->hard_fault = r; @@ -629,12 +678,12 @@ static void nfc_shdlc_sm_work(struct work_struct *work) shdlc->t2_active = false; - nfc_shdlc_requeue_ack_pending(shdlc); - nfc_shdlc_handle_send_queue(shdlc); + llc_shdlc_requeue_ack_pending(shdlc); + llc_shdlc_handle_send_queue(shdlc); } if (shdlc->hard_fault) { - nfc_hci_driver_failure(shdlc->hdev, shdlc->hard_fault); + shdlc->llc_failure(shdlc->hdev, shdlc->hard_fault); } break; default: @@ -647,7 +696,7 @@ static void nfc_shdlc_sm_work(struct work_struct *work) * Called from syscall context to establish shdlc link. Sleeps until * link is ready or failure. */ -static int nfc_shdlc_connect(struct nfc_shdlc *shdlc) +static int llc_shdlc_connect(struct llc_shdlc *shdlc) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq); @@ -662,14 +711,14 @@ static int nfc_shdlc_connect(struct nfc_shdlc *shdlc) mutex_unlock(&shdlc->state_mutex); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); wait_event(connect_wq, shdlc->connect_result != 1); return shdlc->connect_result; } -static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc) +static void llc_shdlc_disconnect(struct llc_shdlc *shdlc) { pr_debug("\n"); @@ -679,7 +728,7 @@ static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc) mutex_unlock(&shdlc->state_mutex); - queue_work(shdlc->sm_wq, &shdlc->sm_work); + schedule_work(&shdlc->sm_work); } /* @@ -687,7 +736,7 @@ static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc) * skb contains only LLC header and payload. * If skb == NULL, it is a notification that the link below is dead. */ -void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb) +static void llc_shdlc_recv_frame(struct llc_shdlc *shdlc, struct sk_buff *skb) { if (skb == NULL) { pr_err("NULL Frame -> link is dead\n"); @@ -697,176 +746,37 @@ void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb) skb_queue_tail(&shdlc->rcv_q, skb); } - queue_work(shdlc->sm_wq, &shdlc->sm_work); -} -EXPORT_SYMBOL(nfc_shdlc_recv_frame); - -static int nfc_shdlc_open(struct nfc_hci_dev *hdev) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - int r; - - pr_debug("\n"); - - if (shdlc->ops->open) { - r = shdlc->ops->open(shdlc); - if (r < 0) - return r; - } - - r = nfc_shdlc_connect(shdlc); - if (r < 0 && shdlc->ops->close) - shdlc->ops->close(shdlc); - - return r; -} - -static void nfc_shdlc_close(struct nfc_hci_dev *hdev) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - pr_debug("\n"); - - nfc_shdlc_disconnect(shdlc); - - if (shdlc->ops->close) - shdlc->ops->close(shdlc); + schedule_work(&shdlc->sm_work); } -static int nfc_shdlc_hci_ready(struct nfc_hci_dev *hdev) +static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, + rcv_to_hci_t rcv_to_hci, int tx_headroom, + int tx_tailroom, int *rx_headroom, int *rx_tailroom, + llc_failure_t llc_failure) { - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - int r = 0; - - pr_debug("\n"); + struct llc_shdlc *shdlc; - if (shdlc->ops->hci_ready) - r = shdlc->ops->hci_ready(shdlc); - - return r; -} - -static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - SHDLC_DUMP_SKB("queuing HCP packet to shdlc", skb); - - skb_queue_tail(&shdlc->send_q, skb); + *rx_headroom = SHDLC_LLC_HEAD_ROOM; + *rx_tailroom = 0; - queue_work(shdlc->sm_wq, &shdlc->sm_work); - - return 0; -} - -static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev, - u32 im_protocols, u32 tm_protocols) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - pr_debug("\n"); - - if (shdlc->ops->start_poll) - return shdlc->ops->start_poll(shdlc, - im_protocols, tm_protocols); - - return 0; -} - -static int nfc_shdlc_target_from_gate(struct nfc_hci_dev *hdev, u8 gate, - struct nfc_target *target) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - if (shdlc->ops->target_from_gate) - return shdlc->ops->target_from_gate(shdlc, gate, target); - - return -EPERM; -} - -static int nfc_shdlc_complete_target_discovered(struct nfc_hci_dev *hdev, - u8 gate, - struct nfc_target *target) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - pr_debug("\n"); - - if (shdlc->ops->complete_target_discovered) - return shdlc->ops->complete_target_discovered(shdlc, gate, - target); - - return 0; -} - -static int nfc_shdlc_data_exchange(struct nfc_hci_dev *hdev, - struct nfc_target *target, - struct sk_buff *skb, - struct sk_buff **res_skb) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - if (shdlc->ops->data_exchange) - return shdlc->ops->data_exchange(shdlc, target, skb, res_skb); - - return -EPERM; -} - -static int nfc_shdlc_check_presence(struct nfc_hci_dev *hdev, - struct nfc_target *target) -{ - struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); - - if (shdlc->ops->check_presence) - return shdlc->ops->check_presence(shdlc, target); - - return 0; -} - -static struct nfc_hci_ops shdlc_ops = { - .open = nfc_shdlc_open, - .close = nfc_shdlc_close, - .hci_ready = nfc_shdlc_hci_ready, - .xmit = nfc_shdlc_xmit, - .start_poll = nfc_shdlc_start_poll, - .target_from_gate = nfc_shdlc_target_from_gate, - .complete_target_discovered = nfc_shdlc_complete_target_discovered, - .data_exchange = nfc_shdlc_data_exchange, - .check_presence = nfc_shdlc_check_presence, -}; - -struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops, - struct nfc_hci_init_data *init_data, - u32 protocols, - int tx_headroom, int tx_tailroom, - int max_link_payload, const char *devname) -{ - struct nfc_shdlc *shdlc; - int r; - char name[32]; - - if (ops->xmit == NULL) - return NULL; - - shdlc = kzalloc(sizeof(struct nfc_shdlc), GFP_KERNEL); + shdlc = kzalloc(sizeof(struct llc_shdlc), GFP_KERNEL); if (shdlc == NULL) return NULL; mutex_init(&shdlc->state_mutex); - shdlc->ops = ops; shdlc->state = SHDLC_DISCONNECTED; init_timer(&shdlc->connect_timer); shdlc->connect_timer.data = (unsigned long)shdlc; - shdlc->connect_timer.function = nfc_shdlc_connect_timeout; + shdlc->connect_timer.function = llc_shdlc_connect_timeout; init_timer(&shdlc->t1_timer); shdlc->t1_timer.data = (unsigned long)shdlc; - shdlc->t1_timer.function = nfc_shdlc_t1_timeout; + shdlc->t1_timer.function = llc_shdlc_t1_timeout; init_timer(&shdlc->t2_timer); shdlc->t2_timer.data = (unsigned long)shdlc; - shdlc->t2_timer.function = nfc_shdlc_t2_timeout; + shdlc->t2_timer.function = llc_shdlc_t2_timeout; shdlc->w = SHDLC_MAX_WINDOW; shdlc->srej_support = SHDLC_SREJ_SUPPORT; @@ -875,77 +785,73 @@ struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops, skb_queue_head_init(&shdlc->send_q); skb_queue_head_init(&shdlc->ack_pending_q); - INIT_WORK(&shdlc->sm_work, nfc_shdlc_sm_work); - snprintf(name, sizeof(name), "%s_shdlc_sm_wq", devname); - shdlc->sm_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (shdlc->sm_wq == NULL) - goto err_allocwq; + INIT_WORK(&shdlc->sm_work, llc_shdlc_sm_work); - shdlc->client_headroom = tx_headroom; - shdlc->client_tailroom = tx_tailroom; - - shdlc->hdev = nfc_hci_allocate_device(&shdlc_ops, init_data, protocols, - tx_headroom + SHDLC_LLC_HEAD_ROOM, - tx_tailroom + SHDLC_LLC_TAIL_ROOM, - max_link_payload); - if (shdlc->hdev == NULL) - goto err_allocdev; - - nfc_hci_set_clientdata(shdlc->hdev, shdlc); - - r = nfc_hci_register_device(shdlc->hdev); - if (r < 0) - goto err_regdev; + shdlc->hdev = hdev; + shdlc->xmit_to_drv = xmit_to_drv; + shdlc->rcv_to_hci = rcv_to_hci; + shdlc->tx_headroom = tx_headroom; + shdlc->tx_tailroom = tx_tailroom; + shdlc->llc_failure = llc_failure; return shdlc; +} -err_regdev: - nfc_hci_free_device(shdlc->hdev); +static void llc_shdlc_deinit(struct nfc_llc *llc) +{ + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); -err_allocdev: - destroy_workqueue(shdlc->sm_wq); + skb_queue_purge(&shdlc->rcv_q); + skb_queue_purge(&shdlc->send_q); + skb_queue_purge(&shdlc->ack_pending_q); -err_allocwq: kfree(shdlc); - - return NULL; } -EXPORT_SYMBOL(nfc_shdlc_allocate); -void nfc_shdlc_free(struct nfc_shdlc *shdlc) +static int llc_shdlc_start(struct nfc_llc *llc) { - pr_debug("\n"); + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); - nfc_hci_unregister_device(shdlc->hdev); - nfc_hci_free_device(shdlc->hdev); + return llc_shdlc_connect(shdlc); +} - destroy_workqueue(shdlc->sm_wq); +static int llc_shdlc_stop(struct nfc_llc *llc) +{ + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); - skb_queue_purge(&shdlc->rcv_q); - skb_queue_purge(&shdlc->send_q); - skb_queue_purge(&shdlc->ack_pending_q); + llc_shdlc_disconnect(shdlc); - kfree(shdlc); + return 0; } -EXPORT_SYMBOL(nfc_shdlc_free); -void nfc_shdlc_set_clientdata(struct nfc_shdlc *shdlc, void *clientdata) +static void llc_shdlc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) { - pr_debug("\n"); + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); - shdlc->clientdata = clientdata; + llc_shdlc_recv_frame(shdlc, skb); } -EXPORT_SYMBOL(nfc_shdlc_set_clientdata); -void *nfc_shdlc_get_clientdata(struct nfc_shdlc *shdlc) +static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) { - return shdlc->clientdata; + struct llc_shdlc *shdlc = nfc_llc_get_data(llc); + + skb_queue_tail(&shdlc->send_q, skb); + + schedule_work(&shdlc->sm_work); + + return 0; } -EXPORT_SYMBOL(nfc_shdlc_get_clientdata); -struct nfc_hci_dev *nfc_shdlc_get_hci_dev(struct nfc_shdlc *shdlc) +static struct nfc_llc_ops llc_shdlc_ops = { + .init = llc_shdlc_init, + .deinit = llc_shdlc_deinit, + .start = llc_shdlc_start, + .stop = llc_shdlc_stop, + .rcv_from_drv = llc_shdlc_rcv_from_drv, + .xmit_from_hci = llc_shdlc_xmit_from_hci, +}; + +int nfc_llc_shdlc_register(void) { - return shdlc->hdev; + return nfc_llc_register(LLC_SHDLC_NAME, &llc_shdlc_ops); } -EXPORT_SYMBOL(nfc_shdlc_get_hci_dev); diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index b982b5b890d7..c45ccd6c094c 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -312,6 +312,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); + return nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); } diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 82f0f7588b46..cc10d073c338 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -56,7 +56,7 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) sk_for_each_safe(sk, node, tmp, &local->sockets.head) { llcp_sock = nfc_llcp_sock(sk); - lock_sock(sk); + bh_lock_sock(sk); if (sk->sk_state == LLCP_CONNECTED) nfc_put_device(llcp_sock->dev); @@ -68,26 +68,26 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) list_for_each_entry_safe(lsk, n, &llcp_sock->accept_queue, accept_queue) { accept_sk = &lsk->sk; - lock_sock(accept_sk); + bh_lock_sock(accept_sk); nfc_llcp_accept_unlink(accept_sk); accept_sk->sk_state = LLCP_CLOSED; - release_sock(accept_sk); + bh_unlock_sock(accept_sk); sock_orphan(accept_sk); } if (listen == true) { - release_sock(sk); + bh_unlock_sock(sk); continue; } } sk->sk_state = LLCP_CLOSED; - release_sock(sk); + bh_unlock_sock(sk); sock_orphan(sk); @@ -114,9 +114,9 @@ static void local_release(struct kref *ref) nfc_llcp_socket_release(local, false); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); - destroy_workqueue(local->tx_wq); - destroy_workqueue(local->rx_wq); - destroy_workqueue(local->timeout_wq); + cancel_work_sync(&local->tx_work); + cancel_work_sync(&local->rx_work); + cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); kfree(local); } @@ -181,7 +181,7 @@ static void nfc_llcp_symm_timer(unsigned long data) pr_err("SYMM timeout\n"); - queue_work(local->timeout_wq, &local->timeout_work); + schedule_work(&local->timeout_work); } struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) @@ -426,6 +426,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) u8 *miux_tlv, miux_length; __be16 miux; u8 gb_len = 0; + int ret = 0; version = LLCP_VERSION_11; version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, @@ -450,8 +451,8 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) gb_len += ARRAY_SIZE(llcp_magic); if (gb_len > NFC_MAX_GT_LEN) { - kfree(version_tlv); - return -EINVAL; + ret = -EINVAL; + goto out; } gb_cur = local->gb; @@ -471,12 +472,15 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) memcpy(gb_cur, miux_tlv, miux_length); gb_cur += miux_length; + local->gb_len = gb_len; + +out: kfree(version_tlv); kfree(lto_tlv); + kfree(wks_tlv); + kfree(miux_tlv); - local->gb_len = gb_len; - - return 0; + return ret; } u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) @@ -554,6 +558,46 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu) sock->recv_ack_n = (sock->recv_n - 1) % 16; } +void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, + struct sk_buff *skb, u8 direction) +{ + struct hlist_node *node; + struct sk_buff *skb_copy = NULL, *nskb; + struct sock *sk; + u8 *data; + + read_lock(&local->raw_sockets.lock); + + sk_for_each(sk, node, &local->raw_sockets.head) { + if (sk->sk_state != LLCP_BOUND) + continue; + + if (skb_copy == NULL) { + skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE, + GFP_ATOMIC); + + if (skb_copy == NULL) + continue; + + data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE); + + data[0] = local->dev ? local->dev->idx : 0xFF; + data[1] = direction; + } + + nskb = skb_clone(skb_copy, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&local->raw_sockets.lock); + + kfree_skb(skb_copy); +} + static void nfc_llcp_tx_work(struct work_struct *work) { struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, @@ -574,6 +618,9 @@ static void nfc_llcp_tx_work(struct work_struct *work) DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + nfc_llcp_send_to_raw_sock(local, skb, + NFC_LLCP_DIRECTION_TX); + ret = nfc_data_exchange(local->dev, local->target_idx, skb, nfc_llcp_recv, local); @@ -1018,6 +1065,8 @@ static void nfc_llcp_rx_work(struct work_struct *work) print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); + switch (ptype) { case LLCP_PDU_SYMM: pr_debug("SYMM\n"); @@ -1052,7 +1101,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) } - queue_work(local->tx_wq, &local->tx_work); + schedule_work(&local->tx_work); kfree_skb(local->rx_pending); local->rx_pending = NULL; @@ -1071,7 +1120,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) local->rx_pending = skb_get(skb); del_timer(&local->link_timer); - queue_work(local->rx_wq, &local->rx_work); + schedule_work(&local->rx_work); return; } @@ -1086,7 +1135,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) local->rx_pending = skb_get(skb); del_timer(&local->link_timer); - queue_work(local->rx_wq, &local->rx_work); + schedule_work(&local->rx_work); return 0; } @@ -1121,7 +1170,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, if (rf_mode == NFC_RF_INITIATOR) { pr_debug("Queueing Tx work\n"); - queue_work(local->tx_wq, &local->tx_work); + schedule_work(&local->tx_work); } else { mod_timer(&local->link_timer, jiffies + msecs_to_jiffies(local->remote_lto)); @@ -1130,10 +1179,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, int nfc_llcp_register_device(struct nfc_dev *ndev) { - struct device *dev = &ndev->dev; struct nfc_llcp_local *local; - char name[32]; - int err; local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL); if (local == NULL) @@ -1149,41 +1195,15 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) skb_queue_head_init(&local->tx_queue); INIT_WORK(&local->tx_work, nfc_llcp_tx_work); - snprintf(name, sizeof(name), "%s_llcp_tx_wq", dev_name(dev)); - local->tx_wq = - alloc_workqueue(name, - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, - 1); - if (local->tx_wq == NULL) { - err = -ENOMEM; - goto err_local; - } local->rx_pending = NULL; INIT_WORK(&local->rx_work, nfc_llcp_rx_work); - snprintf(name, sizeof(name), "%s_llcp_rx_wq", dev_name(dev)); - local->rx_wq = - alloc_workqueue(name, - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, - 1); - if (local->rx_wq == NULL) { - err = -ENOMEM; - goto err_tx_wq; - } INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work); - snprintf(name, sizeof(name), "%s_llcp_timeout_wq", dev_name(dev)); - local->timeout_wq = - alloc_workqueue(name, - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, - 1); - if (local->timeout_wq == NULL) { - err = -ENOMEM; - goto err_rx_wq; - } - local->sockets.lock = __RW_LOCK_UNLOCKED(local->sockets.lock); - local->connecting_sockets.lock = __RW_LOCK_UNLOCKED(local->connecting_sockets.lock); + rwlock_init(&local->sockets.lock); + rwlock_init(&local->connecting_sockets.lock); + rwlock_init(&local->raw_sockets.lock); nfc_llcp_build_gb(local); @@ -1193,17 +1213,6 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) list_add(&llcp_devices, &local->list); return 0; - -err_rx_wq: - destroy_workqueue(local->rx_wq); - -err_tx_wq: - destroy_workqueue(local->tx_wq); - -err_local: - kfree(local); - - return 0; } void nfc_llcp_unregister_device(struct nfc_dev *dev) diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 83b8bba5a280..fdb2d24e60bd 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -56,12 +56,9 @@ struct nfc_llcp_local { struct timer_list link_timer; struct sk_buff_head tx_queue; - struct workqueue_struct *tx_wq; struct work_struct tx_work; - struct workqueue_struct *rx_wq; struct work_struct rx_work; struct sk_buff *rx_pending; - struct workqueue_struct *timeout_wq; struct work_struct timeout_work; u32 target_idx; @@ -89,6 +86,7 @@ struct nfc_llcp_local { /* sockets array */ struct llcp_sock_list sockets; struct llcp_sock_list connecting_sockets; + struct llcp_sock_list raw_sockets; }; struct nfc_llcp_sock { @@ -187,6 +185,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local); void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap); int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock); +void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, + struct sk_buff *skb, u8 direction); /* Sock API */ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index ddeb9aa398f0..63e4cdc92376 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -142,6 +142,60 @@ error: return ret; } +static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, + int alen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + struct nfc_llcp_local *local; + struct nfc_dev *dev; + struct sockaddr_nfc_llcp llcp_addr; + int len, ret = 0; + + if (!addr || addr->sa_family != AF_NFC) + return -EINVAL; + + pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); + + memset(&llcp_addr, 0, sizeof(llcp_addr)); + len = min_t(unsigned int, sizeof(llcp_addr), alen); + memcpy(&llcp_addr, addr, len); + + lock_sock(sk); + + if (sk->sk_state != LLCP_CLOSED) { + ret = -EBADFD; + goto error; + } + + dev = nfc_get_device(llcp_addr.dev_idx); + if (dev == NULL) { + ret = -ENODEV; + goto error; + } + + local = nfc_llcp_find_local(dev); + if (local == NULL) { + ret = -ENODEV; + goto put_dev; + } + + llcp_sock->dev = dev; + llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; + + nfc_llcp_sock_link(&local->raw_sockets, sk); + + sk->sk_state = LLCP_BOUND; + +put_dev: + nfc_put_device(dev); + +error: + release_sock(sk); + return ret; +} + static int llcp_sock_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; @@ -300,9 +354,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx, llcp_sock->dsap, llcp_sock->ssap); - if (llcp_sock == NULL || llcp_sock->dev == NULL) - return -EBADFD; - uaddr->sa_family = AF_NFC; *len = sizeof(struct sockaddr_nfc_llcp); @@ -421,7 +472,10 @@ static int llcp_sock_release(struct socket *sock) release_sock(sk); - nfc_llcp_sock_unlink(&local->sockets, sk); + if (sock->type == SOCK_RAW) + nfc_llcp_sock_unlink(&local->raw_sockets, sk); + else + nfc_llcp_sock_unlink(&local->sockets, sk); out: sock_orphan(sk); @@ -443,15 +497,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); if (!addr || len < sizeof(struct sockaddr_nfc) || - addr->sa_family != AF_NFC) { - pr_err("Invalid socket\n"); + addr->sa_family != AF_NFC) return -EINVAL; - } - if (addr->service_name_len == 0 && addr->dsap == 0) { - pr_err("Missing service name or dsap\n"); + if (addr->service_name_len == 0 && addr->dsap == 0) return -EINVAL; - } pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", addr->dev_idx, addr->target_idx, addr->nfc_protocol); @@ -617,7 +667,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { /* SOCK_STREAM: re-queue skb if it contains unreceived data */ - if (sk->sk_type == SOCK_STREAM) { + if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_RAW) { skb_pull(skb, copied); if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); @@ -658,6 +708,26 @@ static const struct proto_ops llcp_sock_ops = { .mmap = sock_no_mmap, }; +static const struct proto_ops llcp_rawsock_ops = { + .family = PF_NFC, + .owner = THIS_MODULE, + .bind = llcp_raw_sock_bind, + .connect = sock_no_connect, + .release = llcp_sock_release, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = llcp_sock_getname, + .poll = llcp_sock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = llcp_sock_recvmsg, + .mmap = sock_no_mmap, +}; + static void llcp_sock_destruct(struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -735,10 +805,15 @@ static int llcp_sock_create(struct net *net, struct socket *sock, pr_debug("%p\n", sock); - if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM) + if (sock->type != SOCK_STREAM && + sock->type != SOCK_DGRAM && + sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sock->ops = &llcp_sock_ops; + if (sock->type == SOCK_RAW) + sock->ops = &llcp_rawsock_ops; + else + sock->ops = &llcp_sock_ops; sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); if (sk == NULL) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index f81efe13985a..acf9abb7d99b 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -176,6 +176,27 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) (1 + ((*num) * sizeof(struct disc_map_config))), &cmd); } +struct nci_set_config_param { + __u8 id; + size_t len; + __u8 *val; +}; + +static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_set_config_param *param = (struct nci_set_config_param *)opt; + struct nci_core_set_config_cmd cmd; + + BUG_ON(param->len > NCI_MAX_PARAM_LEN); + + cmd.num_params = 1; + cmd.param.id = param->id; + cmd.param.len = param->len; + memcpy(cmd.param.val, param->val, param->len); + + nci_send_cmd(ndev, NCI_OP_CORE_SET_CONFIG_CMD, (3 + param->len), &cmd); +} + static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt) { struct nci_rf_disc_cmd cmd; @@ -388,6 +409,32 @@ static int nci_dev_down(struct nfc_dev *nfc_dev) return nci_close_device(ndev); } +static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + struct nci_set_config_param param; + __u8 local_gb[NFC_MAX_GT_LEN]; + int i, rc = 0; + + param.val = nfc_get_local_general_bytes(nfc_dev, ¶m.len); + if ((param.val == NULL) || (param.len == 0)) + return rc; + + if (param.len > NCI_MAX_PARAM_LEN) + return -EINVAL; + + for (i = 0; i < param.len; i++) + local_gb[param.len-1-i] = param.val[i]; + + param.id = NCI_PN_ATR_REQ_GEN_BYTES; + param.val = local_gb; + + rc = nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); + + return rc; +} + static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, __u32 tm_protocols) { @@ -415,6 +462,14 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, return -EBUSY; } + if (im_protocols & NFC_PROTO_NFC_DEP_MASK) { + rc = nci_set_local_general_bytes(nfc_dev); + if (rc) { + pr_err("failed to set local general bytes\n"); + return rc; + } + } + rc = nci_request(ndev, nci_rf_discover_req, im_protocols, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); @@ -509,7 +564,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); - pr_debug("target_idx %d\n", target->idx); + pr_debug("entry\n"); if (!ndev->target_active_prot) { pr_err("unable to deactivate target, no active target\n"); @@ -524,6 +579,38 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, } } + +static int nci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, + __u8 comm_mode, __u8 *gb, size_t gb_len) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + int rc; + + pr_debug("target_idx %d, comm_mode %d\n", target->idx, comm_mode); + + rc = nci_activate_target(nfc_dev, target, NFC_PROTO_NFC_DEP); + if (rc) + return rc; + + rc = nfc_set_remote_general_bytes(nfc_dev, ndev->remote_gb, + ndev->remote_gb_len); + if (!rc) + rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_PASSIVE, + NFC_RF_INITIATOR); + + return rc; +} + +static int nci_dep_link_down(struct nfc_dev *nfc_dev) +{ + pr_debug("entry\n"); + + nci_deactivate_target(nfc_dev, NULL); + + return 0; +} + + static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) @@ -557,6 +644,8 @@ static struct nfc_ops nci_nfc_ops = { .dev_down = nci_dev_down, .start_poll = nci_start_poll, .stop_poll = nci_stop_poll, + .dep_link_up = nci_dep_link_up, + .dep_link_down = nci_dep_link_down, .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, .im_transceive = nci_transceive, diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index af7a93b04393..b2aa98ef0927 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -176,6 +176,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, protocol = NFC_PROTO_ISO14443_B_MASK; else if (rf_protocol == NCI_RF_PROTOCOL_T3T) protocol = NFC_PROTO_FELICA_MASK; + else if (rf_protocol == NCI_RF_PROTOCOL_NFC_DEP) + protocol = NFC_PROTO_NFC_DEP_MASK; else protocol = 0; @@ -361,6 +363,33 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, return NCI_STATUS_OK; } +static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, + struct nci_rf_intf_activated_ntf *ntf, __u8 *data) +{ + struct activation_params_poll_nfc_dep *poll; + int i; + + switch (ntf->activation_rf_tech_and_mode) { + case NCI_NFC_A_PASSIVE_POLL_MODE: + case NCI_NFC_F_PASSIVE_POLL_MODE: + poll = &ntf->activation_params.poll_nfc_dep; + poll->atr_res_len = min_t(__u8, *data++, 63); + pr_debug("atr_res_len %d\n", poll->atr_res_len); + if (poll->atr_res_len > 0) { + for (i = 0; i < poll->atr_res_len; i++) + poll->atr_res[poll->atr_res_len-1-i] = data[i]; + } + break; + + default: + pr_err("unsupported activation_rf_tech_and_mode 0x%x\n", + ntf->activation_rf_tech_and_mode); + return NCI_STATUS_RF_PROTOCOL_ERROR; + } + + return NCI_STATUS_OK; +} + static void nci_target_auto_activated(struct nci_dev *ndev, struct nci_rf_intf_activated_ntf *ntf) { @@ -454,6 +483,11 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, &ntf, data); break; + case NCI_RF_INTERFACE_NFC_DEP: + err = nci_extract_activation_params_nfc_dep(ndev, + &ntf, data); + break; + case NCI_RF_INTERFACE_FRAME: /* no activation params */ break; @@ -473,6 +507,24 @@ exit: /* set the available credits to initial value */ atomic_set(&ndev->credits_cnt, ndev->initial_num_credits); + + /* store general bytes to be reported later in dep_link_up */ + if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) { + ndev->remote_gb_len = 0; + + if (ntf.activation_params_len > 0) { + /* ATR_RES general bytes at offset 15 */ + ndev->remote_gb_len = min_t(__u8, + (ntf.activation_params + .poll_nfc_dep.atr_res_len + - NFC_ATR_RES_GT_OFFSET), + NFC_MAX_GT_LEN); + memcpy(ndev->remote_gb, + (ntf.activation_params.poll_nfc_dep + .atr_res + NFC_ATR_RES_GT_OFFSET), + ndev->remote_gb_len); + } + } } if (atomic_read(&ndev->state) == NCI_DISCOVERY) { diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index 3003c3390e49..dd072f38ad00 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -119,6 +119,16 @@ exit: nci_req_complete(ndev, rsp_1->status); } +static void nci_core_set_config_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_core_set_config_rsp *rsp = (void *) skb->data; + + pr_debug("status 0x%x\n", rsp->status); + + nci_req_complete(ndev, rsp->status); +} + static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) { @@ -194,6 +204,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_core_init_rsp_packet(ndev, skb); break; + case NCI_OP_CORE_SET_CONFIG_RSP: + nci_core_set_config_rsp_packet(ndev, skb); + break; + case NCI_OP_RF_DISCOVER_MAP_RSP: nci_rf_disc_map_rsp_packet(ndev, skb); break; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 4c51714ee741..c1b5285cbde7 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -58,7 +58,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, { void *hdr; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) return -EMSGSIZE; @@ -165,7 +165,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) @@ -347,13 +347,13 @@ free_msg: } static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, - u32 pid, u32 seq, + u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; - hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags, + hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) return -EMSGSIZE; @@ -401,7 +401,7 @@ static int nfc_genl_dump_devices(struct sk_buff *skb, while (dev) { int rc; - rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).pid, + rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; @@ -520,7 +520,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) goto out_putdev; } - rc = nfc_genl_send_device(msg, dev, info->snd_pid, info->snd_seq, + rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq, NULL, 0); if (rc < 0) goto out_free; @@ -611,7 +611,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) - dev->genl_data.poll_req_pid = info->snd_pid; + dev->genl_data.poll_req_portid = info->snd_portid; mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -645,13 +645,13 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) mutex_lock(&dev->genl_data.genl_data_mutex); - if (dev->genl_data.poll_req_pid != info->snd_pid) { + if (dev->genl_data.poll_req_portid != info->snd_portid) { rc = -EBUSY; goto out; } rc = nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; out: mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -761,38 +761,70 @@ static struct genl_ops nfc_genl_ops[] = { }, }; -static int nfc_genl_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) + +struct urelease_work { + struct work_struct w; + int portid; +}; + +static void nfc_urelease_event_work(struct work_struct *work) { - struct netlink_notify *n = ptr; + struct urelease_work *w = container_of(work, struct urelease_work, w); struct class_dev_iter iter; struct nfc_dev *dev; - if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) - goto out; + pr_debug("portid %d\n", w->portid); - pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); + mutex_lock(&nfc_devlist_mutex); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { - if (dev->genl_data.poll_req_pid == n->pid) { + mutex_lock(&dev->genl_data.genl_data_mutex); + + if (dev->genl_data.poll_req_portid == w->portid) { nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; } + + mutex_unlock(&dev->genl_data.genl_data_mutex); + dev = nfc_device_iter_next(&iter); } nfc_device_iter_exit(&iter); + mutex_unlock(&nfc_devlist_mutex); + + kfree(w); +} + +static int nfc_genl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + struct urelease_work *w; + + if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) + goto out; + + pr_debug("NETLINK_URELEASE event from id %d\n", n->portid); + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (w) { + INIT_WORK((struct work_struct *) w, nfc_urelease_event_work); + w->portid = n->portid; + schedule_work((struct work_struct *) w); + } + out: return NOTIFY_DONE; } void nfc_genl_data_init(struct nfc_genl_data *genl_data) { - genl_data->poll_req_pid = 0; + genl_data->poll_req_portid = 0; mutex_init(&genl_data->genl_data_mutex); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 320fa0e6951a..08114478cb85 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -45,7 +45,7 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } -/* remove VLAN header from packet and update csum accrodingly. */ +/* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { struct vlan_hdr *vhdr; @@ -266,7 +266,7 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) if (unlikely(!skb)) return -ENOMEM; - vport = rcu_dereference(dp->ports[out_port]); + vport = ovs_vport_rcu(dp, out_port); if (unlikely(!vport)) { kfree_skb(skb); return -ENODEV; @@ -286,7 +286,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = &OVS_CB(skb)->flow->key; upcall.userdata = NULL; - upcall.pid = 0; + upcall.portid = 0; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -296,7 +296,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.pid = nla_get_u32(a); + upcall.portid = nla_get_u32(a); break; } } @@ -325,9 +325,6 @@ static int sample(struct datapath *dp, struct sk_buff *skb, } } - if (!acts_list) - return 0; - return do_execute_actions(dp, skb, nla_data(acts_list), nla_len(acts_list), true); } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8277d29e710..4c4b62ccc7d7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -49,12 +49,29 @@ #include <linux/dmi.h> #include <linux/workqueue.h> #include <net/genetlink.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include "datapath.h" #include "flow.h" #include "vport-internal_dev.h" /** + * struct ovs_net - Per net-namespace data for ovs. + * @dps: List of datapaths to enable dumping them all out. + * Protected by genl_mutex. + */ +struct ovs_net { + struct list_head dps; +}; + +static int ovs_net_id __read_mostly; + +#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) +static void rehash_flow_table(struct work_struct *work); +static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); + +/** * DOC: Locking: * * Writes to device state (add/remove datapath, port, set operations on vports, @@ -71,29 +88,21 @@ * each other. */ -/* Global list of datapaths to enable dumping them all out. - * Protected by genl_mutex. - */ -static LIST_HEAD(dps); - -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) -static void rehash_flow_table(struct work_struct *work); -static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); - static struct vport *new_vport(const struct vport_parms *); -static int queue_gso_packets(int dp_ifindex, struct sk_buff *, +static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, const struct dp_upcall_info *); -static int queue_userspace_packet(int dp_ifindex, struct sk_buff *, +static int queue_userspace_packet(struct net *, int dp_ifindex, + struct sk_buff *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ -static struct datapath *get_dp(int dp_ifindex) +static struct datapath *get_dp(struct net *net, int dp_ifindex) { struct datapath *dp = NULL; struct net_device *dev; rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, dp_ifindex); + dev = dev_get_by_index_rcu(net, dp_ifindex); if (dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); if (vport) @@ -107,7 +116,7 @@ static struct datapath *get_dp(int dp_ifindex) /* Must be called with rcu_read_lock or RTNL lock. */ const char *ovs_dp_name(const struct datapath *dp) { - struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]); + struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); } @@ -118,7 +127,7 @@ static int get_dpifindex(struct datapath *dp) rcu_read_lock(); - local = rcu_dereference(dp->ports[OVSP_LOCAL]); + local = ovs_vport_rcu(dp, OVSP_LOCAL); if (local) ifindex = local->ops->get_ifindex(local); else @@ -135,9 +144,31 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); free_percpu(dp->stats_percpu); + release_net(ovs_dp_get_net(dp)); + kfree(dp->ports); kfree(dp); } +static struct hlist_head *vport_hash_bucket(const struct datapath *dp, + u16 port_no) +{ + return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; +} + +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) +{ + struct vport *vport; + struct hlist_node *n; + struct hlist_head *head; + + head = vport_hash_bucket(dp, port_no); + hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { + if (vport->port_no == port_no) + return vport; + } + return NULL; +} + /* Called with RTNL lock and genl_lock. */ static struct vport *new_vport(const struct vport_parms *parms) { @@ -146,9 +177,9 @@ static struct vport *new_vport(const struct vport_parms *parms) vport = ovs_vport_add(parms); if (!IS_ERR(vport)) { struct datapath *dp = parms->dp; + struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); - rcu_assign_pointer(dp->ports[parms->port_no], vport); - list_add(&vport->node, &dp->port_list); + hlist_add_head_rcu(&vport->dp_hash_node, head); } return vport; @@ -160,8 +191,7 @@ void ovs_dp_detach_port(struct vport *p) ASSERT_RTNL(); /* First drop references to device. */ - list_del(&p->node); - rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + hlist_del_rcu(&p->dp_hash_node); /* Then destroy it. */ ovs_vport_del(p); @@ -195,7 +225,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.pid = p->upcall_pid; + upcall.portid = p->upcall_portid; ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -220,17 +250,18 @@ static struct genl_family dp_packet_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, - .maxattr = OVS_PACKET_ATTR_MAX + .maxattr = OVS_PACKET_ATTR_MAX, + .netnsok = true }; int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, - const struct dp_upcall_info *upcall_info) + const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; int dp_ifindex; int err; - if (upcall_info->pid == 0) { + if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } @@ -242,9 +273,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, } if (!skb_is_gso(skb)) - err = queue_userspace_packet(dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); else - err = queue_gso_packets(dp_ifindex, skb, upcall_info); + err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); if (err) goto err; @@ -260,7 +291,8 @@ err: return err; } -static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, +static int queue_gso_packets(struct net *net, int dp_ifindex, + struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { unsigned short gso_type = skb_shinfo(skb)->gso_type; @@ -276,7 +308,7 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, /* Queue all of the segments. */ skb = segs; do { - err = queue_userspace_packet(dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); if (err) break; @@ -306,7 +338,8 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, return err; } -static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb, +static int queue_userspace_packet(struct net *net, int dp_ifindex, + struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct ovs_header *upcall; @@ -362,7 +395,7 @@ static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb, skb_copy_and_csum_dev(skb, nla_data(nla)); - err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid); + err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: kfree_skb(nskb); @@ -370,15 +403,10 @@ out: } /* Called with genl_mutex. */ -static int flush_flows(int dp_ifindex) +static int flush_flows(struct datapath *dp) { struct flow_table *old_table; struct flow_table *new_table; - struct datapath *dp; - - dp = get_dp(dp_ifindex); - if (!dp) - return -ENODEV; old_table = genl_dereference(dp->table); new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); @@ -425,10 +453,10 @@ static int validate_sample(const struct nlattr *attr, static int validate_tp_port(const struct sw_flow_key *flow_key) { if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src && flow_key->ipv4.tp.dst) + if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) return 0; } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src && flow_key->ipv6.tp.dst) + if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) return 0; } @@ -460,7 +488,7 @@ static int validate_set(const struct nlattr *a, if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; - if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst) + if (!flow_key->ip.proto) return -EINVAL; ipv4_key = nla_data(ovs_key); @@ -668,7 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->priority = flow->key.phy.priority; rcu_read_lock(); - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto err_unlock; @@ -742,7 +770,8 @@ static struct genl_family dp_flow_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, - .maxattr = OVS_FLOW_ATTR_MAX + .maxattr = OVS_FLOW_ATTR_MAX, + .netnsok = true }; static struct genl_multicast_group ovs_dp_flow_multicast_group = { @@ -751,7 +780,7 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { /* Called with genl_lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, - struct sk_buff *skb, u32 pid, + struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { const int skb_orig_len = skb->len; @@ -766,7 +795,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_protected(flow->sf_acts, lockdep_genl_is_held()); - ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd); + ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -850,7 +879,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 pid, u32 seq, u8 cmd) + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -859,7 +888,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; } @@ -894,7 +923,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; } - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); error = -ENODEV; if (!dp) goto error; @@ -941,7 +970,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->hash = ovs_flow_hash(&key, key_len); ovs_flow_tbl_insert(table, flow); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); } else { @@ -979,7 +1008,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_flow_deferred_free_acts(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); /* Clear stats. */ @@ -991,11 +1020,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } if (!IS_ERR(reply)) - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); else - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; @@ -1023,7 +1052,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (err) return err; - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; @@ -1032,7 +1061,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!flow) return -ENOENT; - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1052,16 +1081,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) int err; int key_len; + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) + return -ENODEV; + if (!a[OVS_FLOW_ATTR_KEY]) - return flush_flows(ovs_header->dp_ifindex); + return flush_flows(dp); + err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); if (err) return err; - dp = get_dp(ovs_header->dp_ifindex); - if (!dp) - return -ENODEV; - table = genl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); if (!flow) @@ -1073,13 +1103,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid, + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); ovs_flow_deferred_free(flow); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; } @@ -1090,7 +1120,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) struct datapath *dp; struct flow_table *table; - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; @@ -1107,7 +1137,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; if (ovs_flow_cmd_fill_info(flow, dp, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) break; @@ -1152,7 +1182,8 @@ static struct genl_family dp_datapath_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, - .maxattr = OVS_DP_ATTR_MAX + .maxattr = OVS_DP_ATTR_MAX, + .netnsok = true }; static struct genl_multicast_group ovs_dp_datapath_multicast_group = { @@ -1160,13 +1191,13 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = { }; static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, flags, cmd); if (!ovs_header) goto error; @@ -1191,7 +1222,7 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1201,7 +1232,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1210,18 +1241,19 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, } /* Called with genl_mutex and optionally with RTNL lock also. */ -static struct datapath *lookup_datapath(struct ovs_header *ovs_header, +static struct datapath *lookup_datapath(struct net *net, + struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) { struct datapath *dp; if (!a[OVS_DP_ATTR_NAME]) - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); else { struct vport *vport; rcu_read_lock(); - vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME])); + vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; rcu_read_unlock(); } @@ -1235,22 +1267,21 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct datapath *dp; struct vport *vport; - int err; + struct ovs_net *ovs_net; + int err, i; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; rtnl_lock(); - err = -ENODEV; - if (!try_module_get(THIS_MODULE)) - goto err_unlock_rtnl; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_put_module; - INIT_LIST_HEAD(&dp->port_list); + goto err_unlock_rtnl; + + ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ err = -ENOMEM; @@ -1264,13 +1295,23 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_table; } + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) { + err = -ENOMEM; + goto err_destroy_percpu; + } + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); vport = new_vport(&parms); if (IS_ERR(vport)) { @@ -1278,64 +1319,59 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (err == -EBUSY) err = -EEXIST; - goto err_destroy_percpu; + goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto err_destroy_local_port; - list_add_tail(&dp->list_node, &dps); + ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); + list_add_tail(&dp->list_node, &ovs_net->dps); rtnl_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; err_destroy_local_port: - ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); + ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); +err_destroy_ports_array: + kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(genl_dereference(dp->table)); err_free_dp: + release_net(ovs_dp_get_net(dp)); kfree(dp); -err_put_module: - module_put(THIS_MODULE); err_unlock_rtnl: rtnl_unlock(); err: return err; } -static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) +/* Called with genl_mutex. */ +static void __dp_destroy(struct datapath *dp) { - struct vport *vport, *next_vport; - struct sk_buff *reply; - struct datapath *dp; - int err; + int i; rtnl_lock(); - dp = lookup_datapath(info->userhdr, info->attrs); - err = PTR_ERR(dp); - if (IS_ERR(dp)) - goto exit_unlock; - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, - info->snd_seq, OVS_DP_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + struct hlist_node *node, *n; - list_for_each_entry_safe(vport, next_vport, &dp->port_list, node) - if (vport->port_no != OVSP_LOCAL) - ovs_dp_detach_port(vport); + hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) + if (vport->port_no != OVSP_LOCAL) + ovs_dp_detach_port(vport); + } list_del(&dp->list_node); - ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); + ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); /* rtnl_unlock() will wait until all the references to devices that * are pending unregistration have been dropped. We do it here to @@ -1345,17 +1381,32 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) rtnl_unlock(); call_rcu(&dp->rcu, destroy_dp_rcu); - module_put(THIS_MODULE); +} + +static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *reply; + struct datapath *dp; + int err; + + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + err = PTR_ERR(dp); + if (IS_ERR(dp)) + return err; - genl_notify(reply, genl_info_net(info), info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, + info->snd_seq, OVS_DP_CMD_DEL); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + return err; + + __dp_destroy(dp); + + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; - -exit_unlock: - rtnl_unlock(); - return err; } static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) @@ -1364,20 +1415,20 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - dp = lookup_datapath(info->userhdr, info->attrs); + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_datapath_multicast_group.id, err); return 0; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1389,11 +1440,11 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct datapath *dp; - dp = lookup_datapath(info->userhdr, info->attrs); + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1403,13 +1454,14 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); struct datapath *dp; int skip = cb->args[0]; int i = 0; - list_for_each_entry(dp, &dps, list_node) { + list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && - ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, + ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_DP_CMD_NEW) < 0) break; @@ -1459,7 +1511,8 @@ static struct genl_family dp_vport_genl_family = { .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, - .maxattr = OVS_VPORT_ATTR_MAX + .maxattr = OVS_VPORT_ATTR_MAX, + .netnsok = true }; struct genl_multicast_group ovs_dp_vport_multicast_group = { @@ -1468,13 +1521,13 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = { /* Called with RTNL lock or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -1484,7 +1537,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid)) + nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1506,7 +1559,7 @@ error: } /* Called with RTNL lock or RCU read lock. */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1516,7 +1569,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1525,14 +1578,15 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, } /* Called with RTNL lock or RCU read lock. */ -static struct vport *lookup_vport(struct ovs_header *ovs_header, +static struct vport *lookup_vport(struct net *net, + struct ovs_header *ovs_header, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) { struct datapath *dp; struct vport *vport; if (a[OVS_VPORT_ATTR_NAME]) { - vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME])); + vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) return ERR_PTR(-ENODEV); if (ovs_header->dp_ifindex && @@ -1545,11 +1599,11 @@ static struct vport *lookup_vport(struct ovs_header *ovs_header, if (port_no >= DP_MAX_PORTS) return ERR_PTR(-EFBIG); - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); if (!dp) return ERR_PTR(-ENODEV); - vport = rcu_dereference_rtnl(dp->ports[port_no]); + vport = ovs_vport_rtnl_rcu(dp, port_no); if (!vport) return ERR_PTR(-ENOENT); return vport; @@ -1574,7 +1628,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) goto exit; rtnl_lock(); - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto exit_unlock; @@ -1586,7 +1640,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (port_no >= DP_MAX_PORTS) goto exit_unlock; - vport = rtnl_dereference(dp->ports[port_no]); + vport = ovs_vport_rtnl_rcu(dp, port_no); err = -EBUSY; if (vport) goto exit_unlock; @@ -1596,7 +1650,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -EFBIG; goto exit_unlock; } - vport = rtnl_dereference(dp->ports[port_no]); + vport = ovs_vport_rtnl(dp, port_no); if (!vport) break; } @@ -1607,21 +1661,21 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); ovs_dp_detach_port(vport); goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1638,7 +1692,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) int err; rtnl_lock(); - vport = lookup_vport(info->userhdr, a); + vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; @@ -1653,17 +1707,17 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (err) goto exit_unlock; if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1679,7 +1733,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) int err; rtnl_lock(); - vport = lookup_vport(info->userhdr, a); + vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; @@ -1689,7 +1743,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1697,7 +1751,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_dp_detach_port(vport); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1714,12 +1768,12 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) int err; rcu_read_lock(); - vport = lookup_vport(ovs_header, a); + vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1738,54 +1792,39 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct datapath *dp; - u32 port_no; - int retval; + int bucket = cb->args[0], skip = cb->args[1]; + int i, j = 0; - dp = get_dp(ovs_header->dp_ifindex); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; rcu_read_lock(); - for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) { + for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - - vport = rcu_dereference(dp->ports[port_no]); - if (!vport) - continue; - - if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_VPORT_CMD_NEW) < 0) - break; - } - rcu_read_unlock(); - - cb->args[0] = port_no; - retval = skb->len; - - return retval; -} - -static void rehash_flow_table(struct work_struct *work) -{ - struct datapath *dp; - - genl_lock(); - - list_for_each_entry(dp, &dps, list_node) { - struct flow_table *old_table = genl_dereference(dp->table); - struct flow_table *new_table; - - new_table = ovs_flow_tbl_rehash(old_table); - if (!IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_deferred_destroy(old_table); + struct hlist_node *n; + + j = 0; + hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { + if (j >= skip && + ovs_vport_cmd_fill_info(vport, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + OVS_VPORT_CMD_NEW) < 0) + goto out; + + j++; } + skip = 0; } +out: + rcu_read_unlock(); - genl_unlock(); + cb->args[0] = i; + cb->args[1] = j; - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); + return skb->len; } static struct genl_ops dp_vport_genl_ops[] = { @@ -1872,6 +1911,59 @@ error: return err; } +static void rehash_flow_table(struct work_struct *work) +{ + struct datapath *dp; + struct net *net; + + genl_lock(); + rtnl_lock(); + for_each_net(net) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + list_for_each_entry(dp, &ovs_net->dps, list_node) { + struct flow_table *old_table = genl_dereference(dp->table); + struct flow_table *new_table; + + new_table = ovs_flow_tbl_rehash(old_table); + if (!IS_ERR(new_table)) { + rcu_assign_pointer(dp->table, new_table); + ovs_flow_tbl_deferred_destroy(old_table); + } + } + } + rtnl_unlock(); + genl_unlock(); + + schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); +} + +static int __net_init ovs_init_net(struct net *net) +{ + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + INIT_LIST_HEAD(&ovs_net->dps); + return 0; +} + +static void __net_exit ovs_exit_net(struct net *net) +{ + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + struct datapath *dp, *dp_next; + + genl_lock(); + list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) + __dp_destroy(dp); + genl_unlock(); +} + +static struct pernet_operations ovs_net_ops = { + .init = ovs_init_net, + .exit = ovs_exit_net, + .id = &ovs_net_id, + .size = sizeof(struct ovs_net), +}; + static int __init dp_init(void) { struct sk_buff *dummy_skb; @@ -1889,10 +1981,14 @@ static int __init dp_init(void) if (err) goto error_flow_exit; - err = register_netdevice_notifier(&ovs_dp_device_notifier); + err = register_pernet_device(&ovs_net_ops); if (err) goto error_vport_exit; + err = register_netdevice_notifier(&ovs_dp_device_notifier); + if (err) + goto error_netns_exit; + err = dp_register_genl(); if (err < 0) goto error_unreg_notifier; @@ -1903,6 +1999,8 @@ static int __init dp_init(void) error_unreg_notifier: unregister_netdevice_notifier(&ovs_dp_device_notifier); +error_netns_exit: + unregister_pernet_device(&ovs_net_ops); error_vport_exit: ovs_vport_exit(); error_flow_exit: @@ -1914,9 +2012,10 @@ error: static void dp_cleanup(void) { cancel_delayed_work_sync(&rehash_flow_wq); - rcu_barrier(); dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); unregister_netdevice_notifier(&ovs_dp_device_notifier); + unregister_pernet_device(&ovs_net_ops); + rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index c1105c147531..031dfbf37c93 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -27,10 +27,11 @@ #include <linux/u64_stats_sync.h> #include "flow.h" +#include "vport.h" -struct vport; +#define DP_MAX_PORTS USHRT_MAX +#define DP_VPORT_HASH_BUCKETS 1024 -#define DP_MAX_PORTS 1024 #define SAMPLE_ACTION_DEPTH 3 /** @@ -58,11 +59,10 @@ struct dp_stats_percpu { * @list_node: Element in global 'dps' list. * @n_flows: Number of flows currently in flow table. * @table: Current flow table. Protected by genl_lock and RCU. - * @ports: Map from port number to &struct vport. %OVSP_LOCAL port - * always exists, other ports may be %NULL. Protected by RTNL and RCU. - * @port_list: List of all ports in @ports in arbitrary order. RTNL required - * to iterate or modify. + * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by + * RTNL and RCU. * @stats_percpu: Per-CPU datapath statistics. + * @net: Reference to net namespace. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -75,13 +75,37 @@ struct datapath { struct flow_table __rcu *table; /* Switch ports. */ - struct vport __rcu *ports[DP_MAX_PORTS]; - struct list_head port_list; + struct hlist_head *ports; /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; + +#ifdef CONFIG_NET_NS + /* Network namespace ref. */ + struct net *net; +#endif }; +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); + +static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no) +{ + ASSERT_RTNL(); + return ovs_lookup_vport(dp, port_no); +} + /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. @@ -105,9 +129,19 @@ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; - u32 pid; + u32 portid; }; +static inline struct net *ovs_dp_get_net(struct datapath *dp) +{ + return read_pnet(&dp->net); +} + +static inline void ovs_dp_set_net(struct datapath *dp, struct net *net) +{ + write_pnet(&dp->net, net); +} + extern struct notifier_block ovs_dp_device_notifier; extern struct genl_multicast_group ovs_dp_vport_multicast_group; diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index 36dcee8fc84a..5558350e0d33 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -41,19 +41,21 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_UNREGISTER: if (!ovs_is_internal_dev(dev)) { struct sk_buff *notify; + struct datapath *dp = vport->dp; notify = ovs_vport_cmd_build_info(vport, 0, 0, OVS_VPORT_CMD_DEL); ovs_dp_detach_port(vport); if (IS_ERR(notify)) { - netlink_set_err(init_net.genl_sock, 0, + netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, ovs_dp_vport_multicast_group.id, PTR_ERR(notify)); break; } - genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id, - GFP_KERNEL); + genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, + ovs_dp_vport_multicast_group.id, + GFP_KERNEL); } break; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b7f38b161909..98c70630ad06 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -203,10 +203,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) int actions_len = nla_len(actions); struct sw_flow_actions *sfa; - /* At least DP_MAX_PORTS actions are required to be able to flood a - * packet to every port. Factor of 2 allows for setting VLAN tags, - * etc. */ - if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4)) + if (actions_len > MAX_ACTIONS_BUFSIZE) return ERR_PTR(-EINVAL); sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); @@ -427,19 +424,11 @@ void ovs_flow_deferred_free(struct sw_flow *flow) call_rcu(&flow->rcu, rcu_free_flow_callback); } -/* RCU callback used by ovs_flow_deferred_free_acts. */ -static void rcu_free_acts_callback(struct rcu_head *rcu) -{ - struct sw_flow_actions *sf_acts = container_of(rcu, - struct sw_flow_actions, rcu); - kfree(sf_acts); -} - /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) { - call_rcu(&sf_acts->rcu, rcu_free_acts_callback); + kfree_rcu(sf_acts, rcu); } static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) @@ -1000,7 +989,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, swkey->phy.in_port = in_port; attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); } else { - swkey->phy.in_port = USHRT_MAX; + swkey->phy.in_port = DP_MAX_PORTS; } /* Data attributes. */ @@ -1143,7 +1132,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, const struct nlattr *nla; int rem; - *in_port = USHRT_MAX; + *in_port = DP_MAX_PORTS; *priority = 0; nla_for_each_nested(nla, attr, rem) { @@ -1180,7 +1169,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; - if (swkey->phy.in_port != USHRT_MAX && + if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9b75617ca4e0..14a324eb017b 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -43,7 +43,7 @@ struct sw_flow_actions { struct sw_flow_key { struct { u32 priority; /* Packet QoS priority. */ - u16 in_port; /* Input switch port (or USHRT_MAX). */ + u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ @@ -145,15 +145,17 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 + * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_8021Q 4 -- 4 8 - * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 + * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation) + * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype) * OVS_KEY_ATTR_IPV6 40 -- 4 44 * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 132 + * total 144 */ -#define FLOW_BUFSIZE 132 +#define FLOW_BUFSIZE 144 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, @@ -161,6 +163,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, const struct nlattr *); +#define MAX_ACTIONS_BUFSIZE (16 * 1024) #define TBL_MIN_BUCKETS 1024 struct flow_table { diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 4061b9ee07f7..5d460c37df07 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -144,7 +144,7 @@ static void do_setup(struct net_device *netdev) netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; + NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; netdev->vlan_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_TX; @@ -175,9 +175,14 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) goto error_free_vport; } + dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(netdev_vport->dev); internal_dev->vport = vport; + /* Restrict bridge port to current netns. */ + if (vport->port_no == OVSP_LOCAL) + netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; + err = register_netdevice(netdev_vport->dev); if (err) goto error_free_netdev; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6ea3551cc78c..3c1e58ba714b 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -83,7 +83,7 @@ static struct vport *netdev_create(const struct vport_parms *parms) netdev_vport = netdev_vport_priv(vport); - netdev_vport->dev = dev_get_by_name(&init_net, parms->name); + netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name); if (!netdev_vport->dev) { err = -ENODEV; goto error_free_vport; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6140336e79d7..03779e8a2622 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -16,10 +16,10 @@ * 02110-1301, USA */ -#include <linux/dcache.h> #include <linux/etherdevice.h> #include <linux/if.h> #include <linux/if_vlan.h> +#include <linux/jhash.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mutex.h> @@ -27,7 +27,9 @@ #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/compat.h> +#include <net/net_namespace.h> +#include "datapath.h" #include "vport.h" #include "vport-internal_dev.h" @@ -67,9 +69,9 @@ void ovs_vport_exit(void) kfree(dev_table); } -static struct hlist_head *hash_bucket(const char *name) +static struct hlist_head *hash_bucket(struct net *net, const char *name) { - unsigned int hash = full_name_hash(name, strlen(name)); + unsigned int hash = jhash(name, strlen(name), (unsigned long) net); return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } @@ -80,14 +82,15 @@ static struct hlist_head *hash_bucket(const char *name) * * Must be called with RTNL or RCU read lock. */ -struct vport *ovs_vport_locate(const char *name) +struct vport *ovs_vport_locate(struct net *net, const char *name) { - struct hlist_head *bucket = hash_bucket(name); + struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; struct hlist_node *node; hlist_for_each_entry_rcu(vport, node, bucket, hash_node) - if (!strcmp(name, vport->ops->get_name(vport))) + if (!strcmp(name, vport->ops->get_name(vport)) && + net_eq(ovs_dp_get_net(vport->dp), net)) return vport; return NULL; @@ -122,8 +125,9 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_pid = parms->upcall_pid; + vport->upcall_portid = parms->upcall_portid; vport->ops = ops; + INIT_HLIST_NODE(&vport->dp_hash_node); vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); if (!vport->percpu_stats) { @@ -170,14 +174,17 @@ struct vport *ovs_vport_add(const struct vport_parms *parms) for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { if (vport_ops_list[i]->type == parms->type) { + struct hlist_head *bucket; + vport = vport_ops_list[i]->create(parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); goto out; } - hlist_add_head_rcu(&vport->hash_node, - hash_bucket(vport->ops->get_name(vport))); + bucket = hash_bucket(ovs_dp_get_net(vport->dp), + vport->ops->get_name(vport)); + hlist_add_head_rcu(&vport->hash_node, bucket); return vport; } } @@ -391,7 +398,7 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) case VPORT_E_TX_ERROR: vport->err_stats.tx_errors++; break; - }; + } spin_unlock(&vport->stats_lock); } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index aac680ca2b06..3f7961ea3c56 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -20,6 +20,7 @@ #define VPORT_H 1 #include <linux/list.h> +#include <linux/netlink.h> #include <linux/openvswitch.h> #include <linux/skbuff.h> #include <linux/spinlock.h> @@ -38,7 +39,7 @@ void ovs_vport_exit(void); struct vport *ovs_vport_add(const struct vport_parms *); void ovs_vport_del(struct vport *); -struct vport *ovs_vport_locate(const char *name); +struct vport *ovs_vport_locate(struct net *net, const char *name); void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); @@ -69,10 +70,10 @@ struct vport_err_stats { * @rcu: RCU callback head for deferred destruction. * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. - * @node: Element in @dp's @port_list. - * @upcall_pid: The Netlink port to use for packets received on this port that + * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. + * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @stats_lock: Protects @err_stats; @@ -82,10 +83,10 @@ struct vport { struct rcu_head rcu; u16 port_no; struct datapath *dp; - struct list_head node; - u32 upcall_pid; + u32 upcall_portid; struct hlist_node hash_node; + struct hlist_node dp_hash_node; const struct vport_ops *ops; struct vport_percpu_stats __percpu *percpu_stats; @@ -112,7 +113,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_pid; + u32 upcall_portid; }; /** diff --git a/net/packet/Kconfig b/net/packet/Kconfig index 0060e3b396b7..cc55b35f80e5 100644 --- a/net/packet/Kconfig +++ b/net/packet/Kconfig @@ -14,3 +14,11 @@ config PACKET be called af_packet. If unsure, say Y. + +config PACKET_DIAG + tristate "Packet: sockets monitoring interface" + depends on PACKET + default n + ---help--- + Support for PF_PACKET sockets monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/packet/Makefile b/net/packet/Makefile index 81183eabfdec..9df61347a3c3 100644 --- a/net/packet/Makefile +++ b/net/packet/Makefile @@ -3,3 +3,5 @@ # obj-$(CONFIG_PACKET) += af_packet.o +obj-$(CONFIG_PACKET_DIAG) += af_packet_diag.o +af_packet_diag-y += diag.o diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a0..94060edbbd70 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,8 @@ #include <net/inet_common.h> #endif +#include "internal.h" + /* Assumptions: - if device has no dev->hard_header routine, it adds and removes ll header @@ -146,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) /* Private packet socket structures. */ -struct packet_mclist { - struct packet_mclist *next; - int ifindex; - int count; - unsigned short type; - unsigned short alen; - unsigned char addr[MAX_ADDR_LEN]; -}; /* identical to struct packet_mreq except it has * a longer address field. */ @@ -175,63 +169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -/* kbdq - kernel block descriptor queue */ -struct tpacket_kbdq_core { - struct pgv *pkbdq; - unsigned int feature_req_word; - unsigned int hdrlen; - unsigned char reset_pending_on_curr_blk; - unsigned char delete_blk_timer; - unsigned short kactive_blk_num; - unsigned short blk_sizeof_priv; - - /* last_kactive_blk_num: - * trick to see if user-space has caught up - * in order to avoid refreshing timer when every single pkt arrives. - */ - unsigned short last_kactive_blk_num; - - char *pkblk_start; - char *pkblk_end; - int kblk_size; - unsigned int knum_blocks; - uint64_t knxt_seq_num; - char *prev; - char *nxt_offset; - struct sk_buff *skb; - - atomic_t blk_fill_in_prog; - - /* Default is set to 8ms */ -#define DEFAULT_PRB_RETIRE_TOV (8) - - unsigned short retire_blk_tov; - unsigned short version; - unsigned long tov_in_jiffies; - - /* timer to retire an outstanding block */ - struct timer_list retire_blk_timer; -}; - #define PGV_FROM_VMALLOC 1 -struct pgv { - char *buffer; -}; - -struct packet_ring_buffer { - struct pgv *pg_vec; - unsigned int head; - unsigned int frames_per_block; - unsigned int frame_size; - unsigned int frame_max; - - unsigned int pg_vec_order; - unsigned int pg_vec_pages; - unsigned int pg_vec_len; - - struct tpacket_kbdq_core prb_bdqc; - atomic_t pending; -}; #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) @@ -269,52 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); -struct packet_fanout; -struct packet_sock { - /* struct sock has to be the first member of packet_sock */ - struct sock sk; - struct packet_fanout *fanout; - struct tpacket_stats stats; - union tpacket_stats_u stats_u; - struct packet_ring_buffer rx_ring; - struct packet_ring_buffer tx_ring; - int copy_thresh; - spinlock_t bind_lock; - struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, - origdev:1, - has_vnet_hdr:1; - int ifindex; /* bound device */ - __be16 num; - struct packet_mclist *mclist; - atomic_t mapped; - enum tpacket_versions tp_version; - unsigned int tp_hdrlen; - unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tstamp; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - -#define PACKET_FANOUT_MAX 256 - -struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif - unsigned int num_members; - u16 id; - u8 type; - u8 defrag; - atomic_t rr_cur; - struct list_head list; - struct sock *arr[PACKET_FANOUT_MAX]; - spinlock_t lock; - atomic_t sk_ref; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - struct packet_skb_cb { unsigned int origlen; union { @@ -334,11 +226,6 @@ struct packet_skb_cb { (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ ((x)->kactive_blk_num+1) : 0) -static struct packet_sock *pkt_sk(struct sock *sk) -{ - return (struct packet_sock *)sk; -} - static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); @@ -968,7 +855,8 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); ppd->tp_status = TP_STATUS_VLAN_VALID; } else { - ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; + ppd->hv1.tp_vlan_tci = 0; + ppd->tp_status = TP_STATUS_AVAILABLE; } } @@ -1079,7 +967,7 @@ static void *packet_current_rx_frame(struct packet_sock *po, default: WARN(1, "TPACKET version not supported\n"); BUG(); - return 0; + return NULL; } } @@ -1243,7 +1131,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } -static DEFINE_MUTEX(fanout_mutex); +DEFINE_MUTEX(fanout_mutex); +EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static void __fanout_link(struct sock *sk, struct packet_sock *po) @@ -1273,6 +1162,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } +static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +{ + if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + return true; + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); @@ -1325,6 +1222,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.id_match = match_fanout_group; dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } @@ -1355,9 +1253,9 @@ static void fanout_release(struct sock *sk) if (!f) return; + mutex_lock(&fanout_mutex); po->fanout = NULL; - mutex_lock(&fanout_mutex); if (atomic_dec_and_test(&f->sk_ref)) { list_del(&f->list); dev_remove_pack(&f->prot_hook); @@ -1936,7 +1834,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); @@ -2055,7 +1952,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int tp_len, size_max; unsigned char *addr; int len_sum = 0; - int status = 0; + int status = TP_STATUS_AVAILABLE; int hlen, tlen; mutex_lock(&po->pg_vec_lock); @@ -2420,10 +2317,13 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); @@ -2622,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, register_prot_hook(sk); } - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_add_node_rcu(sk, &net->packet.sklist); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, &packet_proto, 1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); return 0; out: @@ -3846,7 +3749,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) po->ifindex, po->running, atomic_read(&s->sk_rmem_alloc), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } @@ -3878,7 +3781,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - spin_lock_init(&net->packet.sklist_lock); + mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) diff --git a/net/packet/diag.c b/net/packet/diag.c new file mode 100644 index 000000000000..8db6e21c46bd --- /dev/null +++ b/net/packet/diag.c @@ -0,0 +1,242 @@ +#include <linux/module.h> +#include <linux/sock_diag.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/packet_diag.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#include "internal.h" + +static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct packet_diag_info pinfo; + + pinfo.pdi_index = po->ifindex; + pinfo.pdi_version = po->tp_version; + pinfo.pdi_reserve = po->tp_reserve; + pinfo.pdi_copy_thresh = po->copy_thresh; + pinfo.pdi_tstamp = po->tp_tstamp; + + pinfo.pdi_flags = 0; + if (po->running) + pinfo.pdi_flags |= PDI_RUNNING; + if (po->auxdata) + pinfo.pdi_flags |= PDI_AUXDATA; + if (po->origdev) + pinfo.pdi_flags |= PDI_ORIGDEV; + if (po->has_vnet_hdr) + pinfo.pdi_flags |= PDI_VNETHDR; + if (po->tp_loss) + pinfo.pdi_flags |= PDI_LOSS; + + return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); +} + +static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct nlattr *mca; + struct packet_mclist *ml; + + mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST); + if (!mca) + return -EMSGSIZE; + + rtnl_lock(); + for (ml = po->mclist; ml; ml = ml->next) { + struct packet_diag_mclist *dml; + + dml = nla_reserve_nohdr(nlskb, sizeof(*dml)); + if (!dml) { + rtnl_unlock(); + nla_nest_cancel(nlskb, mca); + return -EMSGSIZE; + } + + dml->pdmc_index = ml->ifindex; + dml->pdmc_type = ml->type; + dml->pdmc_alen = ml->alen; + dml->pdmc_count = ml->count; + BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr)); + memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr)); + } + + rtnl_unlock(); + nla_nest_end(nlskb, mca); + + return 0; +} + +static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type, + struct sk_buff *nlskb) +{ + struct packet_diag_ring pdr; + + if (!ring->pg_vec || ((ver > TPACKET_V2) && + (nl_type == PACKET_DIAG_TX_RING))) + return 0; + + pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT; + pdr.pdr_block_nr = ring->pg_vec_len; + pdr.pdr_frame_size = ring->frame_size; + pdr.pdr_frame_nr = ring->frame_max + 1; + + if (ver > TPACKET_V2) { + pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov; + pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv; + pdr.pdr_features = ring->prb_bdqc.feature_req_word; + } else { + pdr.pdr_retire_tmo = 0; + pdr.pdr_sizeof_priv = 0; + pdr.pdr_features = 0; + } + + return nla_put(nlskb, nl_type, sizeof(pdr), &pdr); +} + +static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb) +{ + int ret; + + mutex_lock(&po->pg_vec_lock); + ret = pdiag_put_ring(&po->rx_ring, po->tp_version, + PACKET_DIAG_RX_RING, skb); + if (!ret) + ret = pdiag_put_ring(&po->tx_ring, po->tp_version, + PACKET_DIAG_TX_RING, skb); + mutex_unlock(&po->pg_vec_lock); + + return ret; +} + +static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) +{ + int ret = 0; + + mutex_lock(&fanout_mutex); + if (po->fanout) { + u32 val; + + val = (u32)po->fanout->id | ((u32)po->fanout->type << 16); + ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val); + } + mutex_unlock(&fanout_mutex); + + return ret; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct nlmsghdr *nlh; + struct packet_diag_msg *rp; + struct packet_sock *po = pkt_sk(sk); + + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + if (!nlh) + return -EMSGSIZE; + + rp = nlmsg_data(nlh); + rp->pdiag_family = AF_PACKET; + rp->pdiag_type = sk->sk_type; + rp->pdiag_num = ntohs(po->num); + rp->pdiag_ino = sk_ino; + sock_diag_save_cookie(sk, rp->pdiag_cookie); + + if ((req->pdiag_show & PACKET_SHOW_INFO) && + pdiag_put_info(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_MCLIST) && + pdiag_put_mclist(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_RING_CFG) && + pdiag_put_rings_cfg(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_FANOUT) && + pdiag_put_fanout(po, skb)) + goto out_nlmsg_trim; + + return nlmsg_end(skb, nlh); + +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int num = 0, s_num = cb->args[0]; + struct packet_diag_req *req; + struct net *net; + struct sock *sk; + struct hlist_node *node; + + net = sock_net(skb->sk); + req = nlmsg_data(cb->nlh); + + mutex_lock(&net->packet.sklist_lock); + sk_for_each(sk, node, &net->packet.sklist) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next; + + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) + goto done; +next: + num++; + } +done: + mutex_unlock(&net->packet.sklist_lock); + cb->args[0] = num; + + return skb->len; +} + +static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct packet_diag_req); + struct net *net = sock_net(skb->sk); + struct packet_diag_req *req; + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + req = nlmsg_data(h); + /* Make it possible to support protocol filtering later */ + if (req->sdiag_protocol) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = packet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } else + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler packet_diag_handler = { + .family = AF_PACKET, + .dump = packet_diag_handler_dump, +}; + +static int __init packet_diag_init(void) +{ + return sock_diag_register(&packet_diag_handler); +} + +static void __exit packet_diag_exit(void) +{ + sock_diag_unregister(&packet_diag_handler); +} + +module_init(packet_diag_init); +module_exit(packet_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */); diff --git a/net/packet/internal.h b/net/packet/internal.h new file mode 100644 index 000000000000..44945f6b7252 --- /dev/null +++ b/net/packet/internal.h @@ -0,0 +1,121 @@ +#ifndef __PACKET_INTERNAL_H__ +#define __PACKET_INTERNAL_H__ + +struct packet_mclist { + struct packet_mclist *next; + int ifindex; + int count; + unsigned short type; + unsigned short alen; + unsigned char addr[MAX_ADDR_LEN]; +}; + +/* kbdq - kernel block descriptor queue */ +struct tpacket_kbdq_core { + struct pgv *pkbdq; + unsigned int feature_req_word; + unsigned int hdrlen; + unsigned char reset_pending_on_curr_blk; + unsigned char delete_blk_timer; + unsigned short kactive_blk_num; + unsigned short blk_sizeof_priv; + + /* last_kactive_blk_num: + * trick to see if user-space has caught up + * in order to avoid refreshing timer when every single pkt arrives. + */ + unsigned short last_kactive_blk_num; + + char *pkblk_start; + char *pkblk_end; + int kblk_size; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; + char *nxt_offset; + struct sk_buff *skb; + + atomic_t blk_fill_in_prog; + + /* Default is set to 8ms */ +#define DEFAULT_PRB_RETIRE_TOV (8) + + unsigned short retire_blk_tov; + unsigned short version; + unsigned long tov_in_jiffies; + + /* timer to retire an outstanding block */ + struct timer_list retire_blk_timer; +}; + +struct pgv { + char *buffer; +}; + +struct packet_ring_buffer { + struct pgv *pg_vec; + unsigned int head; + unsigned int frames_per_block; + unsigned int frame_size; + unsigned int frame_max; + + unsigned int pg_vec_order; + unsigned int pg_vec_pages; + unsigned int pg_vec_len; + + struct tpacket_kbdq_core prb_bdqc; + atomic_t pending; +}; + +extern struct mutex fanout_mutex; +#define PACKET_FANOUT_MAX 256 + +struct packet_fanout { +#ifdef CONFIG_NET_NS + struct net *net; +#endif + unsigned int num_members; + u16 id; + u8 type; + u8 defrag; + atomic_t rr_cur; + struct list_head list; + struct sock *arr[PACKET_FANOUT_MAX]; + spinlock_t lock; + atomic_t sk_ref; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + +struct packet_sock { + /* struct sock has to be the first member of packet_sock */ + struct sock sk; + struct packet_fanout *fanout; + struct tpacket_stats stats; + union tpacket_stats_u stats_u; + struct packet_ring_buffer rx_ring; + struct packet_ring_buffer tx_ring; + int copy_thresh; + spinlock_t bind_lock; + struct mutex pg_vec_lock; + unsigned int running:1, /* prot_hook is attached*/ + auxdata:1, + origdev:1, + has_vnet_hdr:1; + int ifindex; /* bound device */ + __be16 num; + struct packet_mclist *mclist; + atomic_t mapped; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; + unsigned int tp_loss:1; + unsigned int tp_tstamp; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + +static struct packet_sock *pkt_sk(struct sock *sk) +{ + return (struct packet_sock *)sk; +} + +#endif diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 7dd762a464e5..83a8389619aa 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -33,7 +33,7 @@ /* Device address handling */ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event); + u32 portid, u32 seq, int event); void phonet_address_notify(int event, struct net_device *dev, u8 addr) { @@ -101,12 +101,12 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) } static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -148,7 +148,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; if (fill_addr(skb, pnd->netdev, addr << 2, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) goto out; } @@ -165,12 +165,12 @@ out: /* Routes handling */ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -276,7 +276,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (addr_idx++ < addr_start_idx) continue; - if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid, + if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE)) goto out; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 0acc943f713a..b7e982782255 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -612,7 +612,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) sk->sk_protocol, pn->sobject, pn->dobject, pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), - sock_i_uid(sk), sock_i_ino(sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, atomic_read(&sk->sk_drops), &len); } @@ -796,7 +797,8 @@ static int pn_res_seq_show(struct seq_file *seq, void *v) struct sock *sk = *psk; seq_printf(seq, "%02X %5d %lu%n", - (int) (psk - pnres.sk), sock_i_uid(sk), + (int) (psk - pnres.sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), &len); } seq_printf(seq, "%*s\n", 63 - len, ""); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index af95c8e058fc..a65ee78db0c5 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { state_change = sk->sk_state_change; @@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 72981375f47c..7787537e9c2e 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) rdsdebug("listen data ready sk %p\n", sk); - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) queue_work(rds_wq, &rds_tcp_listen_work); out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 6243258f840f..4fac4f2bb9dc 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -322,7 +322,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) rdsdebug("data ready sk %p bytes %d\n", sk, bytes); - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -336,7 +336,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 1b4fd68f0c7c..81cf5a4c5e40 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -174,7 +174,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { write_space = sk->sk_write_space; @@ -194,7 +194,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 752b72360ebc..a5c952741279 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -150,6 +150,20 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) rfkill_led_trigger_event(rfkill); } +const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) +{ + return rfkill->led_trigger.name; +} +EXPORT_SYMBOL(rfkill_get_led_trigger_name); + +void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) +{ + BUG_ON(!rfkill); + + rfkill->ledtrigname = name; +} +EXPORT_SYMBOL(rfkill_set_led_trigger_name); + static int rfkill_led_trigger_register(struct rfkill *rfkill) { rfkill->led_trigger.name = rfkill->ledtrigname @@ -256,6 +270,7 @@ static bool __rfkill_set_hw_state(struct rfkill *rfkill, static void rfkill_set_block(struct rfkill *rfkill, bool blocked) { unsigned long flags; + bool prev, curr; int err; if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) @@ -270,6 +285,8 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) rfkill->ops->query(rfkill, rfkill->data); spin_lock_irqsave(&rfkill->lock, flags); + prev = rfkill->state & RFKILL_BLOCK_SW; + if (rfkill->state & RFKILL_BLOCK_SW) rfkill->state |= RFKILL_BLOCK_SW_PREV; else @@ -299,10 +316,13 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) } rfkill->state &= ~RFKILL_BLOCK_SW_SETCALL; rfkill->state &= ~RFKILL_BLOCK_SW_PREV; + curr = rfkill->state & RFKILL_BLOCK_SW; spin_unlock_irqrestore(&rfkill->lock, flags); rfkill_led_trigger_event(rfkill); - rfkill_event(rfkill); + + if (prev != curr) + rfkill_event(rfkill); } #ifdef CONFIG_RFKILL_INPUT diff --git a/net/rfkill/input.c b/net/rfkill/input.c index 24c55c53e6a2..c9d931e7ffec 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -164,8 +164,7 @@ static void rfkill_schedule_global_op(enum rfkill_sched_op op) rfkill_op_pending = true; if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) { /* bypass the limiter for EPO */ - cancel_delayed_work(&rfkill_op_work); - schedule_delayed_work(&rfkill_op_work, 0); + mod_delayed_work(system_wq, &rfkill_op_work, 0); rfkill_last_scheduled = jiffies; } else rfkill_schedule_ratelimited(); diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 8b1f9f49960f..011d2384b115 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -948,7 +948,8 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, _enter(""); - key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0, + key = key_alloc(&key_type_rxrpc, "x", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); @@ -994,7 +995,8 @@ struct key *rxrpc_get_null_key(const char *keyname) struct key *key; int ret; - key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred, + key = key_alloc(&key_type_rxrpc, keyname, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) return key; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e3d2c78cb52c..102761d294cb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -644,7 +644,7 @@ errout: } static int -tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, +tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, u16 flags, int event, int bind, int ref) { struct tcamsg *t; @@ -652,7 +652,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); @@ -678,7 +678,7 @@ out_nlmsg_trim: } static int -act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, +act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; @@ -686,16 +686,16 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnl_unicast(skb, net, pid); + return rtnl_unicast(skb, net, portid); } static struct tc_action * -tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) +tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) { struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; @@ -762,7 +762,7 @@ static struct tc_action *create_a(int i) } static int tca_action_flush(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 pid) + struct nlmsghdr *n, u32 portid) { struct sk_buff *skb; unsigned char *b; @@ -799,7 +799,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a->ops == NULL) goto err_out; - nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) goto out_module_put; t = nlmsg_data(nlh); @@ -823,7 +823,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; @@ -841,7 +841,7 @@ noflush_out: static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int event) + u32 portid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -853,13 +853,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1] != NULL) - return tca_action_flush(net, tb[1], n, pid); + return tca_action_flush(net, tb[1], n, portid); else return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_get_1(tb[i], n, pid); + act = tcf_action_get_1(tb[i], n, portid); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; @@ -874,7 +874,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } if (event == RTM_GETACTION) - ret = act_get_notify(net, pid, n, head, event); + ret = act_get_notify(net, portid, n, head, event); else { /* delete */ struct sk_buff *skb; @@ -884,7 +884,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } - if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event, + if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event, 0, 1) <= 0) { kfree_skb(skb); ret = -EINVAL; @@ -893,7 +893,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (ret > 0) return 0; @@ -905,7 +905,7 @@ err: } static int tcf_add_notify(struct net *net, struct tc_action *a, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -920,7 +920,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_kfree_skb; t = nlmsg_data(nlh); @@ -940,7 +940,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -953,7 +953,7 @@ out_kfree_skb: static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int ovr) + u32 portid, int ovr) { int ret = 0; struct tc_action *act; @@ -971,7 +971,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; kfree(a); @@ -984,7 +984,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); @@ -1008,17 +1008,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_DELACTION); + portid, RTM_DELACTION); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_GETACTION); + portid, RTM_GETACTION); break; default: BUG(); @@ -1085,7 +1085,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) goto out_module_put; } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; @@ -1109,7 +1109,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_cancel(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; - if (NETLINK_CB(cb->skb).pid && ret) + if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index f10fb8256442..05d60859d8e3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, struct tcf_common *pc; int ret = 0; int err; +#ifdef CONFIG_GACT_PROB + struct tc_gact_p *p_parm = NULL; +#endif if (nla == NULL) return -EINVAL; @@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, #ifndef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) return -EOPNOTSUPP; +#else + if (tb[TCA_GACT_PROB]) { + p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm->ptype >= MAX_RAND) + return -EINVAL; + } #endif pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); @@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, spin_lock_bh(&gact->tcf_lock); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB - if (tb[TCA_GACT_PROB] != NULL) { - struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm) { gact->tcfg_paction = p_parm->paction; gact->tcfg_pval = p_parm->pval; gact->tcfg_ptype = p_parm->ptype; @@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + if (gact->tcfg_ptype) action = gact_rand[gact->tcfg_ptype](gact); else action = gact->tcf_action; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60e281ad0f07..58fb3c7aab9e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -185,7 +185,12 @@ err3: err2: kfree(tname); err1: - kfree(pc); + if (ret == ACT_P_CREATED) { + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); + } return err; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fe81cc18e9e0..9c0fd0c78814 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, out: if (err) { m->tcf_qstats.overlimits++; - /* should we be asking for packet to be dropped? - * may make sense for redirect case only - */ - retval = TC_ACT_SHOT; - } else { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + retval = TC_ACT_SHOT; + else + retval = m->tcf_action; + } else retval = m->tcf_action; - } spin_unlock(&m->tcf_lock); return retval; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 26aa2f6ce257..45c53ab067a6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - kfree(pc); + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); return -ENOMEM; } ret = ACT_P_CREATED; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 3922f2a2821b..3714f60f0b3c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, d = to_defact(pc); ret = alloc_defdata(d, defdata); if (ret < 0) { - kfree(pc); + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); return ret; } d->tcf_action = parm->action; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6dd1131f2ec1..7ae02892437c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -319,7 +319,7 @@ replay: } } - err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); + err = tp->ops->change(skb, tp, cl, t->tcm_handle, tca, &fh); if (err == 0) { if (tp_created) { spin_lock_bh(root_lock); @@ -343,13 +343,13 @@ errout: } static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, - unsigned long fh, u32 pid, u32 seq, u16 flags, int event) + unsigned long fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -381,18 +381,18 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, unsigned long fh, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -407,7 +407,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, { struct tcf_dump_args *a = (void *)arg; - return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, + return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } @@ -465,7 +465,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, + if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 590960a22a77..344a11b342e5 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -162,7 +162,8 @@ errout: return err; } -static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, +static int basic_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { int err; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8d1d38..2ecde225ae60 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -77,11 +77,18 @@ struct cgroup_subsys net_cls_subsys = { .name = "net_cls", .create = cgrp_create, .destroy = cgrp_destroy, -#ifdef CONFIG_NET_CLS_CGROUP .subsys_id = net_cls_subsys_id, -#endif .base_cftypes = ss_files, .module = THIS_MODULE, + + /* + * While net_cls cgroup has the rudimentary hierarchy support of + * inheriting the parent's classid on cgroup creation, it doesn't + * properly propagates config changes in ancestors to their + * descendents. A child should follow the parent's configuration + * but be allowed to override it. Fix it and remove the following. + */ + .broken_hierarchy = true, }; struct cls_cgroup_head { @@ -151,7 +158,8 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; -static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, +static int cls_cgroup_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { @@ -283,12 +291,6 @@ static int __init init_cgroup_cls(void) if (ret) goto out; -#ifndef CONFIG_NET_CLS_CGROUP - /* We can't use rcu_assign_pointer because this is an int. */ - smp_wmb(); - net_cls_subsys_id = net_cls_subsys.subsys_id; -#endif - ret = register_tcf_proto_ops(&cls_cgroup_ops); if (ret) cgroup_unload_subsys(&net_cls_subsys); @@ -301,11 +303,6 @@ static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); -#ifndef CONFIG_NET_CLS_CGROUP - net_cls_subsys_id = -1; - synchronize_rcu(); -#endif - cgroup_unload_subsys(&net_cls_subsys); } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index ccd08c8dc6a7..ce82d0cb1b47 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -193,15 +193,19 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb) static u32 flow_get_skuid(const struct sk_buff *skb) { - if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_cred->fsuid; + if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { + kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid; + return from_kuid(&init_user_ns, skuid); + } return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { - if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_cred->fsgid; + if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { + kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid; + return from_kgid(&init_user_ns, skgid); + } return 0; } @@ -347,7 +351,8 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static int flow_change(struct tcf_proto *tp, unsigned long base, +static int flow_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { @@ -386,6 +391,10 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, if (fls(keymask) - 1 > FLOW_KEY_MAX) return -EOPNOTSUPP; + + if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && + sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns) + return -EOPNOTSUPP; } err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8384a4797240..4075a0aef2aa 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -233,7 +233,8 @@ errout: return err; } -static int fw_change(struct tcf_proto *tp, unsigned long base, +static int fw_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 44f405cb9aaf..c10d57bf98f2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -427,7 +427,8 @@ errout: return err; } -static int route4_change(struct tcf_proto *tp, unsigned long base, +static int route4_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 18ab93ec8d7e..494bbb90924a 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -416,7 +416,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; -static int rsvp_change(struct tcf_proto *tp, unsigned long base, +static int rsvp_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index fe29420d0b0e..a1293b4ab7a1 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -332,7 +332,8 @@ errout: } static int -tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle, +tcindex_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { struct nlattr *opt = tca[TCA_OPTIONS]; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d45373fb00b9..c7c27bc91b5a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -544,7 +544,8 @@ errout: return err; } -static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, +static int u32_change(struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4ab6e3325573..7c3de6ffa516 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -461,7 +461,7 @@ META_COLLECTOR(int_sk_sndtimeo) META_COLLECTOR(int_sk_sendmsg_off) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_sndmsg_off; + dst->value = skb->sk->sk_frag.offset; } META_COLLECTOR(int_sk_write_pend) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a08b4ab3e421..a18d975db59c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1185,7 +1185,7 @@ graft: } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1193,7 +1193,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1248,25 +1248,25 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old)) { - if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new)) { - if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } if (skb->len) - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: @@ -1289,7 +1289,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; } else { if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1300,7 +1300,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, continue; } if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1375,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; - u32 pid = tcm->tcm_parent; + u32 portid = tcm->tcm_parent; u32 clid = tcm->tcm_handle; u32 qid = TC_H_MAJ(clid); int err; @@ -1403,8 +1403,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - if (pid != TC_H_ROOT) { - u32 qid1 = TC_H_MAJ(pid); + if (portid != TC_H_ROOT) { + u32 qid1 = TC_H_MAJ(portid); if (qid && qid1) { /* If both majors are known, they must be identical. */ @@ -1418,10 +1418,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now qid is genuine qdisc handle consistent * both with parent and child. * - * TC_H_MAJ(pid) still may be unspecified, complete it now. + * TC_H_MAJ(portid) still may be unspecified, complete it now. */ - if (pid) - pid = TC_H_MAKE(qid, pid); + if (portid) + portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) qid = dev->qdisc->handle; @@ -1439,7 +1439,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now try to get class */ if (clid == 0) { - if (pid == TC_H_ROOT) + if (portid == TC_H_ROOT) clid = qid; } else clid = TC_H_MAKE(qid, clid); @@ -1478,7 +1478,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) new_cl = cl; err = -EOPNOTSUPP; if (cops->change) - err = cops->change(q, clid, pid, tca, &new_cl); + err = cops->change(q, clid, portid, tca, &new_cl); if (err == 0) tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); @@ -1492,7 +1492,7 @@ out: static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1500,7 +1500,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1540,18 +1540,18 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, unsigned long cl, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) { + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -1565,7 +1565,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; - return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid, + return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6aabd77d1cfd..564b9fc8efd3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -250,10 +250,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; - if (cl == NULL || cl->level >= head->level) + if (cl == NULL) goto fallback; } - + if (cl->level >= head->level) + goto fallback; #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 9ce0b4fe23ff..71e50c80315f 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -352,7 +352,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - int err; + int err = 0; cl = drr_classify(skb, sch, &err); if (cl == NULL) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 9fc1c62ec80e..4e606fcb2534 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -191,7 +191,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (list_empty(&flow->flowchain)) { list_add_tail(&flow->flowchain, &q->new_flows); - codel_vars_init(&flow->cvars); q->new_flow_count++; flow->deficit = q->quantum; flow->dropped = 0; @@ -418,6 +417,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) struct fq_codel_flow *flow = q->flows + i; INIT_LIST_HEAD(&flow->flowchain); + codel_vars_init(&flow->cvars); } } if (sch->limit >= 1) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 511323e89cec..aefc1504dc88 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -324,24 +324,6 @@ void netif_carrier_off(struct net_device *dev) } EXPORT_SYMBOL(netif_carrier_off); -/** - * netif_notify_peers - notify network peers about existence of @dev - * @dev: network device - * - * Generate traffic such that interested network peers are aware of - * @dev, such as by generating a gratuitous ARP. This may be used when - * a device wants to inform the rest of the network about some sort of - * reconfiguration such as a failover event or virtual machine - * migration. - */ -void netif_notify_peers(struct net_device *dev) -{ - rtnl_lock(); - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); - rtnl_unlock(); -} -EXPORT_SYMBOL(netif_notify_peers); - /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. @@ -545,6 +527,8 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { }; EXPORT_SYMBOL(pfifo_fast_ops); +static struct lock_class_key qdisc_tx_busylock; + struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops) { @@ -552,6 +536,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc *sch; unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; int err = -ENOBUFS; + struct net_device *dev = dev_queue->dev; p = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); @@ -571,12 +556,16 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, } INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); + spin_lock_init(&sch->busylock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - dev_hold(qdisc_dev(sch)); + dev_hold(dev); atomic_set(&sch->refcnt, 1); return sch; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index e901583e4ea5..d42234c0f13b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -102,9 +102,8 @@ static inline int gred_wred_mode_check(struct Qdisc *sch) if (q == NULL) continue; - for (n = 0; n < table->DPs; n++) - if (table->tab[n] && table->tab[n] != q && - table->tab[n]->prio == q->prio) + for (n = i + 1; n < table->DPs; n++) + if (table->tab[n] && table->tab[n]->prio == q->prio) return 1; } @@ -137,6 +136,7 @@ static inline void gred_store_wred_set(struct gred_sched *table, struct gred_sched_data *q) { table->wred_set.qavg = q->vars.qavg; + table->wred_set.qidlestart = q->vars.qidlestart; } static inline int gred_use_ecn(struct gred_sched *t) @@ -176,7 +176,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; } - /* sum up all the qaves of prios <= to ours to get the new qave */ + /* sum up all the qaves of prios < ours to get the new qave */ if (!gred_wred_mode(t) && gred_rio_mode(t)) { int i; @@ -260,16 +260,18 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch) } else { q->backlog -= qdisc_pkt_len(skb); - if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->vars); + if (gred_wred_mode(t)) { + if (!sch->qstats.backlog) + red_start_of_idle_period(&t->wred_set); + } else { + if (!q->backlog) + red_start_of_idle_period(&q->vars); + } } return skb; } - if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) - red_start_of_idle_period(&t->wred_set); - return NULL; } @@ -291,19 +293,20 @@ static unsigned int gred_drop(struct Qdisc *sch) q->backlog -= len; q->stats.other++; - if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->vars); + if (gred_wred_mode(t)) { + if (!sch->qstats.backlog) + red_start_of_idle_period(&t->wred_set); + } else { + if (!q->backlog) + red_start_of_idle_period(&q->vars); + } } qdisc_drop(skb, sch); return len; } - if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) - red_start_of_idle_period(&t->wred_set); - return 0; - } static void gred_reset(struct Qdisc *sch) @@ -535,6 +538,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct tc_gred_qopt opt; + unsigned long qavg; memset(&opt, 0, sizeof(opt)); @@ -566,7 +570,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (gred_wred_mode(table)) gred_load_wred_set(table, q); - opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg); + qavg = red_calc_qavg(&q->parms, &q->vars, + q->vars.qavg >> q->parms.Wlog); + opt.qave = qavg >> q->parms.Wlog; append_opt: if (nla_append(skb, sizeof(opt), &opt) < 0) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 9af01f3df18c..f0dd83cff906 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -203,6 +203,34 @@ out: return index; } +/* Length of the next packet (0 if the queue is empty). */ +static unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + + skb = sch->ops->peek(sch); + return skb ? qdisc_pkt_len(skb) : 0; +} + +static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *); +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int len); + +static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl, + u32 lmax, u32 inv_w, int delta_w) +{ + int i; + + /* update qfq-specific data */ + cl->lmax = lmax; + cl->inv_w = inv_w; + i = qfq_calc_index(cl->inv_w, cl->lmax); + + cl->grp = &q->groups[i]; + + q->wsum += delta_w; +} + static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg) { @@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, lmax = 1UL << QFQ_MTU_SHIFT; if (cl != NULL) { + bool need_reactivation = false; + if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, &cl->rate_est, qdisc_root_sleeping_lock(sch), @@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } - if (inv_w != cl->inv_w) { - sch_tree_lock(sch); - q->wsum += delta_w; - cl->inv_w = inv_w; - sch_tree_unlock(sch); + if (lmax == cl->lmax && inv_w == cl->inv_w) + return 0; /* nothing to update */ + + i = qfq_calc_index(inv_w, lmax); + sch_tree_lock(sch); + if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) { + /* + * shift cl->F back, to not charge the + * class for the not-yet-served head + * packet + */ + cl->F = cl->S; + /* remove class from its slot in the old group */ + qfq_deactivate_class(q, cl); + need_reactivation = true; } + + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); + + if (need_reactivation) /* activate in new group */ + qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc)); + sch_tree_unlock(sch); + return 0; } @@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->common.classid = classid; - cl->lmax = lmax; - cl->inv_w = inv_w; - i = qfq_calc_index(cl->inv_w, cl->lmax); - cl->grp = &q->groups[i]; + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); @@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } } - q->wsum += weight; sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); @@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) } } -/* What is length of next packet in queue (0 if queue is empty) */ -static unsigned int qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - - skb = sch->ops->peek(sch); - return skb ? qdisc_pkt_len(skb) : 0; -} - /* * Updates the class, returns true if also the group needs to be updated. */ @@ -831,7 +865,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) if (mask) { struct qfq_group *next = qfq_ffs(q, mask); if (qfq_gt(roundedF, next->F)) { - cl->S = next->F; + if (qfq_gt(limit, next->F)) + cl->S = next->F; + else /* preserve timestamp correctness */ + cl->S = limit; return; } } @@ -843,11 +880,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_group *grp; struct qfq_class *cl; - int err; - u64 roundedS; - int s; + int err = 0; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -876,11 +910,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; /* If reach this point, queue q was idle */ - grp = cl->grp; + qfq_activate_class(q, cl, qdisc_pkt_len(skb)); + + return err; +} + +/* + * Handle class switch from idle to backlogged. + */ +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int pkt_len) +{ + struct qfq_group *grp = cl->grp; + u64 roundedS; + int s; + qfq_update_start(q, cl); /* compute new finish time and rounded start. */ - cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; + cl->F = cl->S + (u64)pkt_len * cl->inv_w; roundedS = qfq_round_down(cl->S, grp->slot_shift); /* @@ -917,8 +965,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) skip_update: qfq_slot_insert(grp, cl, roundedS); - - return err; } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ebaef3ed6065..b1ef3bc301a5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -82,6 +82,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_scope_t scope, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_sock *sp; int i; sctp_paramhdr_t *p; @@ -124,7 +125,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; - asoc->pf_retrans = sctp_pf_retrans; + asoc->pf_retrans = net->sctp.pf_retrans; asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); @@ -175,7 +176,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; + min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -281,7 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * and will revert old behavior. */ asoc->peer.asconf_capable = 0; - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) asoc->peer.asconf_capable = 1; asoc->asconf_addr_del_pending = NULL; asoc->src_out_of_asoc_ok = 0; @@ -641,6 +642,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const gfp_t gfp, const int peer_state) { + struct net *net = sock_net(asoc->base.sk); struct sctp_transport *peer; struct sctp_sock *sp; unsigned short port; @@ -674,7 +676,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, return peer; } - peer = sctp_transport_new(addr, gfp); + peer = sctp_transport_new(net, addr, gfp); if (!peer) return NULL; @@ -1089,13 +1091,15 @@ out: /* Is this the association we are looking for? */ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, + struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr) { struct sctp_transport *transport; if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && - (htons(asoc->peer.port) == paddr->v4.sin_port)) { + (htons(asoc->peer.port) == paddr->v4.sin_port) && + net_eq(sock_net(asoc->base.sk), net)) { transport = sctp_assoc_lookup_paddr(asoc, paddr); if (!transport) goto out; @@ -1116,6 +1120,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) struct sctp_association *asoc = container_of(work, struct sctp_association, base.inqueue.immediate); + struct net *net = sock_net(asoc->base.sk); struct sctp_endpoint *ep; struct sctp_chunk *chunk; struct sctp_inq *inqueue; @@ -1148,13 +1153,13 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) if (sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, + error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); /* Check to see if the association is freed in response to @@ -1414,6 +1419,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) /* Should we send a SACK to update our peer? */ static inline int sctp_peer_needs_update(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); switch (asoc->state) { case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: @@ -1421,7 +1427,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, - (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift), asoc->pathmtu))) return 1; break; @@ -1542,7 +1548,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, if (asoc->peer.ipv6_address) flags |= SCTP_ADDR6_PEERSUPP; - return sctp_bind_addr_copy(&asoc->base.bind_addr, + return sctp_bind_addr_copy(sock_net(asoc->base.sk), + &asoc->base.bind_addr, &asoc->ep->base.bind_addr, scope, gfp, flags); } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index bf812048cf6f..159b9bc5d633 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -392,13 +392,14 @@ nomem: */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!sctp_auth_enable || !asoc->peer.auth_capable) + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -445,11 +446,12 @@ struct sctp_shared_key *sctp_auth_get_shkey( */ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) { + struct net *net = sock_net(ep->base.sk); struct crypto_hash *tfm = NULL; __u16 id; /* if the transforms are already allocted, we are done */ - if (!sctp_auth_enable) { + if (!net->sctp.auth_enable) { ep->auth_hmacs = NULL; return 0; } @@ -674,7 +676,12 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) /* Check if peer requested that this chunk is authenticated */ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -683,7 +690,12 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) /* Check if we requested that peer authenticate this chunk. */ int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable) return 0; return __sctp_auth_cid(chunk, diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 4ece451c8d27..d886b3bf84f5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -52,8 +52,8 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *, - sctp_scope_t scope, gfp_t gfp, +static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *, + union sctp_addr *, sctp_scope_t scope, gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); @@ -62,7 +62,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); /* Copy 'src' to 'dest' taking 'scope' into account. Omit addresses * in 'src' which have a broader scope than 'scope'. */ -int sctp_bind_addr_copy(struct sctp_bind_addr *dest, +int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, sctp_scope_t scope, gfp_t gfp, int flags) @@ -75,7 +75,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, /* Extract the addresses which are relevant for this scope. */ list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, scope, + error = sctp_copy_one_addr(net, dest, &addr->a, scope, gfp, flags); if (error < 0) goto out; @@ -87,7 +87,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, */ if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) { list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, + error = sctp_copy_one_addr(net, dest, &addr->a, SCTP_SCOPE_LINK, gfp, flags); if (error < 0) @@ -448,7 +448,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, } /* Copy out addresses from the global local address list. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *dest, +static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, union sctp_addr *addr, sctp_scope_t scope, gfp_t gfp, int flags) @@ -456,8 +456,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, int error = 0; if (sctp_is_any(NULL, addr)) { - error = sctp_copy_local_addr_list(dest, scope, gfp, flags); - } else if (sctp_in_scope(addr, scope)) { + error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags); + } else if (sctp_in_scope(net, addr, scope)) { /* Now that the address is in scope, check to see if * the address type is supported by local sock as * well as the remote peer. @@ -494,7 +494,7 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr) } /* Is 'addr' valid for 'scope'? */ -int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) +int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope) { sctp_scope_t addr_scope = sctp_scope(addr); @@ -512,7 +512,7 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) * Address scoping can be selectively controlled via sysctl * option */ - switch (sctp_scope_policy) { + switch (net->sctp.scope_policy) { case SCTP_SCOPE_POLICY_DISABLE: return 1; case SCTP_SCOPE_POLICY_ENABLE: diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 6c8556459a75..7c2df9c33df3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -257,7 +257,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, offset = 0; if ((whole > 1) || (whole && over)) - SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS); + SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ for (i=0, len=first_len; i < whole; i++) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 68a385d7c3bd..1859e2bc83d1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -65,6 +65,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_hmac_algo_param *auth_hmacs = NULL; struct sctp_chunks_param *auth_chunks = NULL; struct sctp_shared_key *null_key; @@ -74,7 +75,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (!ep->digest) return NULL; - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. @@ -106,7 +107,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; auth_chunks->param_hdr.length = @@ -140,14 +141,14 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, INIT_LIST_HEAD(&ep->asocs); /* Use SCTP specific send buffer space queues. */ - ep->sndbuf_policy = sctp_sndbuf_policy; + ep->sndbuf_policy = net->sctp.sndbuf_policy; sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); /* Get the receive buffer policy for this endpoint */ - ep->rcvbuf_policy = sctp_rcvbuf_policy; + ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE); @@ -302,11 +303,13 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) /* Is this the endpoint we are looking for? */ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, + struct net *net, const union sctp_addr *laddr) { struct sctp_endpoint *retval = NULL; - if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) { + if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) && + net_eq(sock_net(ep->base.sk), net)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; @@ -343,7 +346,8 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( rport = ntohs(paddr->v4.sin_port); - hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport); + hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port, + rport); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { @@ -386,13 +390,14 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, { struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; + struct net *net = sock_net(ep->base.sk); bp = &ep->base.bind_addr; /* This function is called with the socket lock held, * so the address_list can not change. */ list_for_each_entry(addr, &bp->address_list, list) { - if (sctp_has_association(&addr->a, paddr)) + if (sctp_has_association(net, &addr->a, paddr)) return 1; } @@ -409,6 +414,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) base.inqueue.immediate); struct sctp_association *asoc; struct sock *sk; + struct net *net; struct sctp_transport *transport; struct sctp_chunk *chunk; struct sctp_inq *inqueue; @@ -423,6 +429,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) asoc = NULL; inqueue = &ep->base.inqueue; sk = ep->base.sk; + net = sock_net(sk); while (NULL != (chunk = sctp_inq_pop(inqueue))) { subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); @@ -474,12 +481,12 @@ normal: if (asoc && sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; - error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, state, + error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); if (error && chunk) diff --git a/net/sctp/input.c b/net/sctp/input.c index e64d5210ed13..8bd3c279427e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -66,12 +66,15 @@ /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); -static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, - const union sctp_addr *laddr, +static struct sctp_association *__sctp_rcv_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *paddr, + const union sctp_addr *laddr, struct sctp_transport **transportp); -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr); +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, + const union sctp_addr *laddr); static struct sctp_association *__sctp_lookup_association( + struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt); @@ -80,7 +83,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ -static inline int sctp_rcv_checksum(struct sk_buff *skb) +static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) { struct sctphdr *sh = sctp_hdr(skb); __le32 cmp = sh->checksum; @@ -96,7 +99,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) if (val != cmp) { /* CRC failure, dump it. */ - SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS); + SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; @@ -129,11 +132,12 @@ int sctp_rcv(struct sk_buff *skb) union sctp_addr dest; int family; struct sctp_af *af; + struct net *net = dev_net(skb->dev); if (skb->pkt_type!=PACKET_HOST) goto discard_it; - SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS); + SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); if (skb_linearize(skb)) goto discard_it; @@ -145,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->len < sizeof(struct sctphdr)) goto discard_it; if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) && - sctp_rcv_checksum(skb) < 0) + sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb_pull(skb, sizeof(struct sctphdr)); @@ -178,10 +182,10 @@ int sctp_rcv(struct sk_buff *skb) !af->addr_valid(&dest, NULL, skb)) goto discard_it; - asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); + asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport); if (!asoc) - ep = __sctp_rcv_lookup_endpoint(&dest); + ep = __sctp_rcv_lookup_endpoint(net, &dest); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; @@ -200,7 +204,7 @@ int sctp_rcv(struct sk_buff *skb) sctp_endpoint_put(ep); ep = NULL; } - sk = sctp_get_ctl_sock(); + sk = net->sctp.ctl_sock; ep = sctp_sk(sk)->ep; sctp_endpoint_hold(ep); rcvr = &ep->base; @@ -216,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb) */ if (!asoc) { if (sctp_rcv_ootb(skb)) { - SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } @@ -272,9 +276,9 @@ int sctp_rcv(struct sk_buff *skb) skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); } else { - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } @@ -289,7 +293,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; @@ -462,11 +466,13 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } } else { + struct net *net = sock_net(sk); + if (timer_pending(&t->proto_unreach_timer) && del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); - sctp_do_sm(SCTP_EVENT_T_OTHER, + sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, t, GFP_ATOMIC); @@ -474,7 +480,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } /* Common lookup code for icmp/icmpv6 error handler. */ -struct sock *sctp_err_lookup(int family, struct sk_buff *skb, +struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctphdr *sctphdr, struct sctp_association **app, struct sctp_transport **tpp) @@ -503,7 +509,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, /* Look for an association that matches the incoming ICMP error * packet. */ - asoc = __sctp_lookup_association(&saddr, &daddr, &transport); + asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport); if (!asoc) return NULL; @@ -539,7 +545,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -586,9 +592,10 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct inet_sock *inet; sk_buff_data_t saveip, savesctp; int err; + struct net *net = dev_net(skb->dev); if (skb->len < ihlen + 8) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -597,12 +604,12 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, ihlen); - sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport); + sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original values. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call @@ -723,12 +730,13 @@ discard: /* Insert endpoint into the hash table. */ static void __sctp_hash_endpoint(struct sctp_endpoint *ep) { + struct net *net = sock_net(ep->base.sk); struct sctp_ep_common *epb; struct sctp_hashbucket *head; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); + epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); @@ -747,12 +755,13 @@ void sctp_hash_endpoint(struct sctp_endpoint *ep) /* Remove endpoint from the hash table. */ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { + struct net *net = sock_net(ep->base.sk); struct sctp_hashbucket *head; struct sctp_ep_common *epb; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); + epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; @@ -770,7 +779,8 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep) } /* Look up an endpoint. */ -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr) +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, + const union sctp_addr *laddr) { struct sctp_hashbucket *head; struct sctp_ep_common *epb; @@ -778,16 +788,16 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l struct hlist_node *node; int hash; - hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port)); + hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); - if (sctp_endpoint_is_match(ep, laddr)) + if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; } - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; hit: sctp_endpoint_hold(ep); @@ -798,13 +808,15 @@ hit: /* Insert association into the hash table. */ static void __sctp_hash_established(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); struct sctp_ep_common *epb; struct sctp_hashbucket *head; epb = &asoc->base; /* Calculate which chain this entry will belong to. */ - epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, asoc->peer.port); + epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, + asoc->peer.port); head = &sctp_assoc_hashtable[epb->hashent]; @@ -827,12 +839,13 @@ void sctp_hash_established(struct sctp_association *asoc) /* Remove association from the hash table. */ static void __sctp_unhash_established(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); struct sctp_hashbucket *head; struct sctp_ep_common *epb; epb = &asoc->base; - epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, + epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, asoc->peer.port); head = &sctp_assoc_hashtable[epb->hashent]; @@ -855,6 +868,7 @@ void sctp_unhash_established(struct sctp_association *asoc) /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( + struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt) @@ -869,12 +883,13 @@ static struct sctp_association *__sctp_lookup_association( /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port)); + hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port), + ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { asoc = sctp_assoc(epb); - transport = sctp_assoc_is_match(asoc, local, peer); + transport = sctp_assoc_is_match(asoc, net, local, peer); if (transport) goto hit; } @@ -892,27 +907,29 @@ hit: /* Look up an association. BH-safe. */ SCTP_STATIC -struct sctp_association *sctp_lookup_association(const union sctp_addr *laddr, +struct sctp_association *sctp_lookup_association(struct net *net, + const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp) { struct sctp_association *asoc; sctp_local_bh_disable(); - asoc = __sctp_lookup_association(laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); sctp_local_bh_enable(); return asoc; } /* Is there an association matching the given local and peer addresses? */ -int sctp_has_association(const union sctp_addr *laddr, +int sctp_has_association(struct net *net, + const union sctp_addr *laddr, const union sctp_addr *paddr) { struct sctp_association *asoc; struct sctp_transport *transport; - if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) { + if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { sctp_association_put(asoc); return 1; } @@ -938,7 +955,8 @@ int sctp_has_association(const union sctp_addr *laddr, * in certain circumstances. * */ -static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { struct sctp_association *asoc; @@ -978,7 +996,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, af->from_addr_param(paddr, params.addr, sh->source, 0); - asoc = __sctp_lookup_association(laddr, paddr, &transport); + asoc = __sctp_lookup_association(net, laddr, paddr, &transport); if (asoc) return asoc; } @@ -1001,6 +1019,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, * subsequent ASCONF Chunks. If found, proceed to rule D4. */ static struct sctp_association *__sctp_rcv_asconf_lookup( + struct net *net, sctp_chunkhdr_t *ch, const union sctp_addr *laddr, __be16 peer_port, @@ -1020,7 +1039,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( af->from_addr_param(&paddr, param, peer_port, 0); - return __sctp_lookup_association(laddr, &paddr, transportp); + return __sctp_lookup_association(net, laddr, &paddr, transportp); } @@ -1033,7 +1052,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( * This means that any chunks that can help us identify the association need * to be looked at to find this association. */ -static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { @@ -1074,8 +1094,9 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, break; case SCTP_CID_ASCONF: - if (have_auth || sctp_addip_noauth) - asoc = __sctp_rcv_asconf_lookup(ch, laddr, + if (have_auth || net->sctp.addip_noauth) + asoc = __sctp_rcv_asconf_lookup( + net, ch, laddr, sctp_hdr(skb)->source, transportp); default: @@ -1098,7 +1119,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, * include looking inside of INIT/INIT-ACK chunks or after the AUTH * chunks. */ -static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { @@ -1118,11 +1140,11 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, switch (ch->type) { case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: - return __sctp_rcv_init_lookup(skb, laddr, transportp); + return __sctp_rcv_init_lookup(net, skb, laddr, transportp); break; default: - return __sctp_rcv_walk_lookup(skb, laddr, transportp); + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); break; } @@ -1131,21 +1153,22 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, } /* Lookup an association for an inbound skb. */ -static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp) { struct sctp_association *asoc; - asoc = __sctp_lookup_association(laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); /* Further lookup for INIT/INIT-ACK packets. * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. */ if (!asoc) - asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp); + asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp); return asoc; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ed7139ea7978..ea14cb445295 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -99,6 +99,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->idev->dev); int found = 0; switch (ev) { @@ -110,27 +111,27 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; @@ -154,6 +155,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ipv6_pinfo *np; sk_buff_data_t saveip, savesctp; int err; + struct net *net = dev_net(skb->dev); idev = in6_dev_get(skb->dev); @@ -162,12 +164,12 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, offset); - sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); + sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original pointers. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS); goto out; } @@ -241,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) __func__, skb, skb->len, &fl6.saddr, &fl6.daddr); - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; @@ -580,7 +582,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (!(type & IPV6_ADDR_UNICAST)) return 0; - return ipv6_chk_addr(&init_net, in6, NULL, 0); + return ipv6_chk_addr(sock_net(&sp->inet.sk), in6, NULL, 0); } /* This function checks if the address is a valid address to be used for @@ -857,14 +859,14 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) struct net_device *dev; if (type & IPV6_ADDR_LINKLOCAL) { + struct net *net; if (!addr->v6.sin6_scope_id) return 0; + net = sock_net(&opt->inet.sk); rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, - addr->v6.sin6_scope_id); + dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); if (!dev || - !ipv6_chk_addr(&init_net, &addr->v6.sin6_addr, - dev, 0)) { + !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { rcu_read_unlock(); return 0; } @@ -897,7 +899,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (!addr->v6.sin6_scope_id) return 0; rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, + dev = dev_get_by_index_rcu(sock_net(&opt->inet.sk), addr->v6.sin6_scope_id); rcu_read_unlock(); if (!dev) diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 8ef8e7d9eb61..fe012c44f8df 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -129,20 +129,20 @@ static const struct file_operations sctp_objcnt_ops = { }; /* Initialize the objcount in the proc filesystem. */ -void sctp_dbg_objcnt_init(void) +void sctp_dbg_objcnt_init(struct net *net) { struct proc_dir_entry *ent; ent = proc_create("sctp_dbg_objcnt", 0, - proc_net_sctp, &sctp_objcnt_ops); + net->sctp.proc_net_sctp, &sctp_objcnt_ops); if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } /* Cleanup the objcount entry in the proc filesystem. */ -void sctp_dbg_objcnt_exit(void) +void sctp_dbg_objcnt_exit(struct net *net) { - remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp); + remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp); } diff --git a/net/sctp/output.c b/net/sctp/output.c index 838e18b4d7ea..4e90188bf489 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -364,6 +364,25 @@ finish: return retval; } +static void sctp_packet_release_owner(struct sk_buff *skb) +{ + sk_free(skb->sk); +} + +static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sctp_packet_release_owner; + + /* + * The data chunks have already been accounted for in sctp_sendmsg(), + * therefore only reserve a single byte to keep socket around until + * the packet has been transmitted. + */ + atomic_inc(&sk->sk_wmem_alloc); +} + /* All packets are sent to the network through this function from * sctp_outq_tail(). * @@ -405,7 +424,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) /* Set the owning socket so that we know where to get the * destination IP address. */ - skb_set_owner_w(nskb, sk); + sctp_packet_set_owner_w(nskb, sk); if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); @@ -597,7 +616,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e7aa177c9522..1b4a7f8ec3fd 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -63,6 +63,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, + union sctp_addr *saddr, struct sctp_sackhdr *sack, __u32 *highest_new_tsn); @@ -299,6 +300,7 @@ void sctp_outq_free(struct sctp_outq *q) /* Put a new chunk in an sctp_outq. */ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", @@ -337,15 +339,15 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else - SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); q->empty = 0; break; } } else { list_add_tail(&chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } if (error < 0) @@ -478,11 +480,12 @@ void sctp_retransmit_mark(struct sctp_outq *q, void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; switch(reason) { case SCTP_RTXR_T3_RTX: - SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -493,15 +496,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, transport->asoc->unack_data; break; case SCTP_RTXR_FAST_RTX: - SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: - SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS); break; case SCTP_RTXR_T1_RTX: - SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS); transport->asoc->init_retries++; break; default: @@ -589,9 +592,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * next chunk. */ if (chunk->tsn_gap_acked) { - list_del(&chunk->transmitted_list); - list_add_tail(&chunk->transmitted_list, - &transport->transmitted); + list_move_tail(&chunk->transmitted_list, + &transport->transmitted); continue; } @@ -655,9 +657,8 @@ redo: /* The append was successful, so add this chunk to * the transmitted list. */ - list_del(&chunk->transmitted_list); - list_add_tail(&chunk->transmitted_list, - &transport->transmitted); + list_move_tail(&chunk->transmitted_list, + &transport->transmitted); /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. @@ -1139,9 +1140,10 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc, * Process the SACK against the outqueue. Mostly, this just frees * things off the transmitted queue. */ -int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) +int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) { struct sctp_association *asoc = q->asoc; + struct sctp_sackhdr *sack = chunk->subh.sack_hdr; struct sctp_transport *transport; struct sctp_chunk *tchunk = NULL; struct list_head *lchunk, *transport_list, *temp; @@ -1210,7 +1212,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ - sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn); + sctp_check_transmitted(q, &q->retransmit, NULL, NULL, sack, &highest_new_tsn); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1219,7 +1221,8 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) */ list_for_each_entry(transport, transport_list, transports) { sctp_check_transmitted(q, &transport->transmitted, - transport, sack, &highest_new_tsn); + transport, &chunk->source, sack, + &highest_new_tsn); /* * SFR-CACC algorithm: * C) Let count_of_newacks be the number of @@ -1326,6 +1329,7 @@ int sctp_outq_is_empty(const struct sctp_outq *q) static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, + union sctp_addr *saddr, struct sctp_sackhdr *sack, __u32 *highest_new_tsn_in_sack) { @@ -1633,8 +1637,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Mark the destination transport address as * active if it is not so marked. */ - if ((transport->state == SCTP_INACTIVE) || - (transport->state == SCTP_UNCONFIRMED)) { + if ((transport->state == SCTP_INACTIVE || + transport->state == SCTP_UNCONFIRMED) && + sctp_cmp_addr_exact(&transport->ipaddr, saddr)) { sctp_assoc_control_transport( transport->asoc, transport, @@ -1914,6 +1919,6 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) if (ftsn_chunk) { list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS); } } diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index 534c7eae9d15..794bb14decde 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -57,7 +57,7 @@ #define DECLARE_PRIMITIVE(name) \ /* This is called in the code as sctp_primitive_ ## name. */ \ -int sctp_primitive_ ## name(struct sctp_association *asoc, \ +int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \ void *arg) { \ int error = 0; \ sctp_event_t event_type; sctp_subtype_t subtype; \ @@ -69,7 +69,7 @@ int sctp_primitive_ ## name(struct sctp_association *asoc, \ state = asoc ? asoc->state : SCTP_STATE_CLOSED; \ ep = asoc ? asoc->ep : NULL; \ \ - error = sctp_do_sm(event_type, subtype, state, ep, asoc, \ + error = sctp_do_sm(net, event_type, subtype, state, ep, asoc, \ arg, GFP_KERNEL); \ return error; \ } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 1e2eee88c3ea..c3bea269faf4 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -80,11 +80,12 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void __percpu **)sctp_statistics, + snmp_fold_field((void __percpu **)net->sctp.sctp_statistics, sctp_snmp_list[i].entry)); return 0; @@ -93,7 +94,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) /* Initialize the seq file operations for 'snmp' object. */ static int sctp_snmp_seq_open(struct inode *inode, struct file *file) { - return single_open(file, sctp_snmp_seq_show, NULL); + return single_open_net(inode, file, sctp_snmp_seq_show); } static const struct file_operations sctp_snmp_seq_fops = { @@ -105,11 +106,12 @@ static const struct file_operations sctp_snmp_seq_fops = { }; /* Set up the proc fs entry for 'snmp' object. */ -int __init sctp_snmp_proc_init(void) +int __net_init sctp_snmp_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("snmp", S_IRUGO, proc_net_sctp, &sctp_snmp_seq_fops); + p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_snmp_seq_fops); if (!p) return -ENOMEM; @@ -117,9 +119,9 @@ int __init sctp_snmp_proc_init(void) } /* Cleanup the proc fs entry for 'snmp' object. */ -void sctp_snmp_proc_exit(void) +void sctp_snmp_proc_exit(struct net *net) { - remove_proc_entry("snmp", proc_net_sctp); + remove_proc_entry("snmp", net->sctp.proc_net_sctp); } /* Dump local addresses of an association/endpoint. */ @@ -213,10 +215,13 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), seq_file_net(seq))) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, epb->bind_addr.port, - sock_i_uid(sk), sock_i_ino(sk)); + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk)); sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "\n"); @@ -238,7 +243,8 @@ static const struct seq_operations sctp_eps_ops = { /* Initialize the seq file operations for 'eps' object. */ static int sctp_eps_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_eps_ops); + return seq_open_net(inode, file, &sctp_eps_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_eps_seq_fops = { @@ -249,11 +255,12 @@ static const struct file_operations sctp_eps_seq_fops = { }; /* Set up the proc fs entry for 'eps' object. */ -int __init sctp_eps_proc_init(void) +int __net_init sctp_eps_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("eps", S_IRUGO, proc_net_sctp, &sctp_eps_seq_fops); + p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_eps_seq_fops); if (!p) return -ENOMEM; @@ -261,9 +268,9 @@ int __init sctp_eps_proc_init(void) } /* Cleanup the proc fs entry for 'eps' object. */ -void sctp_eps_proc_exit(void) +void sctp_eps_proc_exit(struct net *net) { - remove_proc_entry("eps", proc_net_sctp); + remove_proc_entry("eps", net->sctp.proc_net_sctp); } @@ -316,6 +323,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), seq_file_net(seq))) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-2d %-4d " "%4d %8d %8d %7d %5lu %-5d %5d ", @@ -324,7 +333,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc->assoc_id, assoc->sndbuf_used, atomic_read(&assoc->rmem_alloc), - sock_i_uid(sk), sock_i_ino(sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); seq_printf(seq, " "); @@ -354,7 +364,8 @@ static const struct seq_operations sctp_assoc_ops = { /* Initialize the seq file operations for 'assocs' object. */ static int sctp_assocs_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_assoc_ops); + return seq_open_net(inode, file, &sctp_assoc_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_assocs_seq_fops = { @@ -365,11 +376,11 @@ static const struct file_operations sctp_assocs_seq_fops = { }; /* Set up the proc fs entry for 'assocs' object. */ -int __init sctp_assocs_proc_init(void) +int __net_init sctp_assocs_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("assocs", S_IRUGO, proc_net_sctp, + p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, &sctp_assocs_seq_fops); if (!p) return -ENOMEM; @@ -378,9 +389,9 @@ int __init sctp_assocs_proc_init(void) } /* Cleanup the proc fs entry for 'assocs' object. */ -void sctp_assocs_proc_exit(void) +void sctp_assocs_proc_exit(struct net *net) { - remove_proc_entry("assocs", proc_net_sctp); + remove_proc_entry("assocs", net->sctp.proc_net_sctp); } static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) @@ -426,6 +437,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { + if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) + continue; assoc = sctp_assoc(epb); list_for_each_entry(tsp, &assoc->peer.transport_addr_list, transports) { @@ -489,14 +502,15 @@ static const struct seq_operations sctp_remaddr_ops = { }; /* Cleanup the proc fs entry for 'remaddr' object. */ -void sctp_remaddr_proc_exit(void) +void sctp_remaddr_proc_exit(struct net *net) { - remove_proc_entry("remaddr", proc_net_sctp); + remove_proc_entry("remaddr", net->sctp.proc_net_sctp); } static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_remaddr_ops); + return seq_open_net(inode, file, &sctp_remaddr_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_remaddr_seq_fops = { @@ -506,11 +520,12 @@ static const struct file_operations sctp_remaddr_seq_fops = { .release = seq_release, }; -int __init sctp_remaddr_proc_init(void) +int __net_init sctp_remaddr_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops); + p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_remaddr_seq_fops); if (!p) return -ENOMEM; return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1f89c4e69645..2d518425d598 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -69,21 +69,10 @@ /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; -DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; - -#ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_sctp; -#endif struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); -/* This is the global socket data structure used for responding to - * the Out-of-the-blue (OOTB) packets. A control sock will be created - * for this socket at the initialization time. - */ -static struct sock *sctp_ctl_sock; - static struct sctp_pf *sctp_pf_inet6_specific; static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; @@ -96,74 +85,54 @@ long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; -/* Return the address of the control sock. */ -struct sock *sctp_get_ctl_sock(void) -{ - return sctp_ctl_sock; -} - /* Set up the proc fs entry for the SCTP protocol. */ -static __init int sctp_proc_init(void) +static __net_init int sctp_proc_init(struct net *net) { - if (percpu_counter_init(&sctp_sockets_allocated, 0)) - goto out_nomem; #ifdef CONFIG_PROC_FS - if (!proc_net_sctp) { - proc_net_sctp = proc_mkdir("sctp", init_net.proc_net); - if (!proc_net_sctp) - goto out_free_percpu; - } - - if (sctp_snmp_proc_init()) + net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); + if (!net->sctp.proc_net_sctp) + goto out_proc_net_sctp; + if (sctp_snmp_proc_init(net)) goto out_snmp_proc_init; - if (sctp_eps_proc_init()) + if (sctp_eps_proc_init(net)) goto out_eps_proc_init; - if (sctp_assocs_proc_init()) + if (sctp_assocs_proc_init(net)) goto out_assocs_proc_init; - if (sctp_remaddr_proc_init()) + if (sctp_remaddr_proc_init(net)) goto out_remaddr_proc_init; return 0; out_remaddr_proc_init: - sctp_assocs_proc_exit(); + sctp_assocs_proc_exit(net); out_assocs_proc_init: - sctp_eps_proc_exit(); + sctp_eps_proc_exit(net); out_eps_proc_init: - sctp_snmp_proc_exit(); + sctp_snmp_proc_exit(net); out_snmp_proc_init: - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } -out_free_percpu: - percpu_counter_destroy(&sctp_sockets_allocated); -#else - return 0; -#endif /* CONFIG_PROC_FS */ - -out_nomem: + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; +out_proc_net_sctp: return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + return 0; } /* Clean up the proc fs entry for the SCTP protocol. * Note: Do not make this __exit as it is used in the init error * path. */ -static void sctp_proc_exit(void) +static void sctp_proc_exit(struct net *net) { #ifdef CONFIG_PROC_FS - sctp_snmp_proc_exit(); - sctp_eps_proc_exit(); - sctp_assocs_proc_exit(); - sctp_remaddr_proc_exit(); - - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } + sctp_snmp_proc_exit(net); + sctp_eps_proc_exit(net); + sctp_assocs_proc_exit(net); + sctp_remaddr_proc_exit(net); + + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; #endif - percpu_counter_destroy(&sctp_sockets_allocated); } /* Private helper to extract ipv4 address and stash them in @@ -201,29 +170,29 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void sctp_get_local_addr_list(void) +static void sctp_get_local_addr_list(struct net *net) { struct net_device *dev; struct list_head *pos; struct sctp_af *af; rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); - af->copy_addrlist(&sctp_local_addr_list, dev); + af->copy_addrlist(&net->sctp.local_addr_list, dev); } } rcu_read_unlock(); } /* Free the existing local addresses. */ -static void sctp_free_local_addr_list(void) +static void sctp_free_local_addr_list(struct net *net) { struct sctp_sockaddr_entry *addr; struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &net->sctp.local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); list_del(pos); kfree(addr); @@ -231,17 +200,17 @@ static void sctp_free_local_addr_list(void) } /* Copy the local addresses which are valid for 'scope' into 'bp'. */ -int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, - gfp_t gfp, int copy_flags) +int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, + sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; - if (sctp_in_scope(&addr->a, scope)) { + if (sctp_in_scope(net, &addr->a, scope)) { /* Now that the address is in scope, check to see if * the address type is really supported by the local * sock as well as the remote peer. @@ -397,7 +366,8 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, /* Should this be available for binding? */ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { - int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr); + struct net *net = sock_net(&sp->inet.sk); + int ret = inet_addr_type(net, addr->v4.sin_addr.s_addr); if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && @@ -484,7 +454,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", __func__, &fl4->daddr, &fl4->saddr); - rt = ip_route_output_key(&init_net, fl4); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) dst = &rt->dst; @@ -530,7 +500,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, (AF_INET == laddr->a.sa.sa_family)) { fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; - rt = ip_route_output_key(&init_net, fl4); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) { dst = &rt->dst; goto out_unlock; @@ -627,14 +597,15 @@ static void sctp_v4_ecn_capable(struct sock *sk) void sctp_addr_wq_timeout_handler(unsigned long arg) { + struct net *net = (struct net *)arg; struct sctp_sockaddr_entry *addrw, *temp; struct sctp_sock *sp; - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", - " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, + " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, addrw); #if IS_ENABLED(CONFIG_IPV6) @@ -648,7 +619,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) goto free_next; in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr; - if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 && + if (ipv6_chk_addr(net, in6, NULL, 0) == 0 && addrw->state == SCTP_ADDR_NEW) { unsigned long timeo_val; @@ -656,12 +627,12 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) SCTP_ADDRESS_TICK_DELAY); timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); break; } } #endif - list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) { + list_for_each_entry(sp, &net->sctp.auto_asconf_splist, auto_asconf_list) { struct sock *sk; sk = sctp_opt2sk(sp); @@ -679,31 +650,32 @@ free_next: list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } -static void sctp_free_addr_wq(void) +static void sctp_free_addr_wq(struct net *net) { struct sctp_sockaddr_entry *addrw; struct sctp_sockaddr_entry *temp; - spin_lock_bh(&sctp_addr_wq_lock); - del_timer(&sctp_addr_wq_timer); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + spin_lock_bh(&net->sctp.addr_wq_lock); + del_timer(&net->sctp.addr_wq_timer); + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* lookup the entry for the same address in the addr_waitq * sctp_addr_wq MUST be locked */ -static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr) +static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net, + struct sctp_sockaddr_entry *addr) { struct sctp_sockaddr_entry *addrw; - list_for_each_entry(addrw, &sctp_addr_waitq, list) { + list_for_each_entry(addrw, &net->sctp.addr_waitq, list) { if (addrw->a.sa.sa_family != addr->a.sa.sa_family) continue; if (addrw->a.sa.sa_family == AF_INET) { @@ -719,7 +691,7 @@ static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entr return NULL; } -void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) +void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd) { struct sctp_sockaddr_entry *addrw; unsigned long timeo_val; @@ -730,38 +702,38 @@ void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) * new address after a couple of addition and deletion of that address */ - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); /* Offsets existing events in addr_wq */ - addrw = sctp_addr_wq_lookup(addr); + addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", " in wq %p\n", addrw->state, &addrw->a, - &sctp_addr_waitq); + &net->sctp.addr_waitq); list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } /* OK, we have to add the new address to the wait queue */ addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); if (addrw == NULL) { - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } addrw->state = cmd; - list_add_tail(&addrw->list, &sctp_addr_waitq); + list_add_tail(&addrw->list, &net->sctp.addr_waitq); SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", - " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq); + " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); - if (!timer_pending(&sctp_addr_wq_timer)) { + if (!timer_pending(&net->sctp.addr_wq_timer)) { timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* Event handler for inet address addition/deletion events. @@ -776,11 +748,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->ifa_dev->dev); int found = 0; - if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net)) - return NOTIFY_DONE; - switch (ev) { case NETDEV_UP: addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); @@ -789,27 +759,27 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; @@ -822,7 +792,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, * Initialize the control inode/socket with a control endpoint data * structure. This endpoint is reserved exclusively for the OOTB processing. */ -static int sctp_ctl_sock_init(void) +static int sctp_ctl_sock_init(struct net *net) { int err; sa_family_t family = PF_INET; @@ -830,14 +800,14 @@ static int sctp_ctl_sock_init(void) if (sctp_get_pf_specific(PF_INET6)) family = PF_INET6; - err = inet_ctl_sock_create(&sctp_ctl_sock, family, - SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); + err = inet_ctl_sock_create(&net->sctp.ctl_sock, family, + SOCK_SEQPACKET, IPPROTO_SCTP, net); /* If IPv6 socket could not be created, try the IPv4 socket */ if (err < 0 && family == PF_INET6) - err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET, + err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, - &init_net); + net); if (err < 0) { pr_err("Failed to create the SCTP control socket\n"); @@ -990,7 +960,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); return ip_queue_xmit(skb, &transport->fl); } @@ -1063,6 +1033,7 @@ static const struct net_protocol sctp_protocol = { .handler = sctp_rcv, .err_handler = sctp_v4_err, .no_policy = 1, + .netns_ok = 1, }; /* IPv4 address related functions. */ @@ -1130,16 +1101,16 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) return 1; } -static inline int init_sctp_mibs(void) +static inline int init_sctp_mibs(struct net *net) { - return snmp_mib_init((void __percpu **)sctp_statistics, + return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics, sizeof(struct sctp_mib), __alignof__(struct sctp_mib)); } -static inline void cleanup_sctp_mibs(void) +static inline void cleanup_sctp_mibs(struct net *net) { - snmp_mib_free((void __percpu **)sctp_statistics); + snmp_mib_free((void __percpu **)net->sctp.sctp_statistics); } static void sctp_v4_pf_init(void) @@ -1194,6 +1165,143 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } +static int sctp_net_init(struct net *net) +{ + int status; + + /* + * 14. Suggested SCTP Protocol Parameter Values + */ + /* The following protocol parameters are RECOMMENDED: */ + /* RTO.Initial - 3 seconds */ + net->sctp.rto_initial = SCTP_RTO_INITIAL; + /* RTO.Min - 1 second */ + net->sctp.rto_min = SCTP_RTO_MIN; + /* RTO.Max - 60 seconds */ + net->sctp.rto_max = SCTP_RTO_MAX; + /* RTO.Alpha - 1/8 */ + net->sctp.rto_alpha = SCTP_RTO_ALPHA; + /* RTO.Beta - 1/4 */ + net->sctp.rto_beta = SCTP_RTO_BETA; + + /* Valid.Cookie.Life - 60 seconds */ + net->sctp.valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; + + /* Whether Cookie Preservative is enabled(1) or not(0) */ + net->sctp.cookie_preserve_enable = 1; + + /* Max.Burst - 4 */ + net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + + /* Association.Max.Retrans - 10 attempts + * Path.Max.Retrans - 5 attempts (per destination address) + * Max.Init.Retransmits - 8 attempts + */ + net->sctp.max_retrans_association = 10; + net->sctp.max_retrans_path = 5; + net->sctp.max_retrans_init = 8; + + /* Sendbuffer growth - do per-socket accounting */ + net->sctp.sndbuf_policy = 0; + + /* Rcvbuffer growth - do per-socket accounting */ + net->sctp.rcvbuf_policy = 0; + + /* HB.interval - 30 seconds */ + net->sctp.hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; + + /* delayed SACK timeout */ + net->sctp.sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; + + /* Disable ADDIP by default. */ + net->sctp.addip_enable = 0; + net->sctp.addip_noauth = 0; + net->sctp.default_auto_asconf = 0; + + /* Enable PR-SCTP by default. */ + net->sctp.prsctp_enable = 1; + + /* Disable AUTH by default. */ + net->sctp.auth_enable = 0; + + /* Set SCOPE policy to enabled */ + net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; + + /* Set the default rwnd update threshold */ + net->sctp.rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + + /* Initialize maximum autoclose timeout. */ + net->sctp.max_autoclose = INT_MAX / HZ; + + status = sctp_sysctl_net_register(net); + if (status) + goto err_sysctl_register; + + /* Allocate and initialise sctp mibs. */ + status = init_sctp_mibs(net); + if (status) + goto err_init_mibs; + + /* Initialize proc fs directory. */ + status = sctp_proc_init(net); + if (status) + goto err_init_proc; + + sctp_dbg_objcnt_init(net); + + /* Initialize the control inode/socket for handling OOTB packets. */ + if ((status = sctp_ctl_sock_init(net))) { + pr_err("Failed to initialize the SCTP control sock\n"); + goto err_ctl_sock_init; + } + + /* Initialize the local address list. */ + INIT_LIST_HEAD(&net->sctp.local_addr_list); + spin_lock_init(&net->sctp.local_addr_lock); + sctp_get_local_addr_list(net); + + /* Initialize the address event list */ + INIT_LIST_HEAD(&net->sctp.addr_waitq); + INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); + spin_lock_init(&net->sctp.addr_wq_lock); + net->sctp.addr_wq_timer.expires = 0; + setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, + (unsigned long)net); + + return 0; + +err_ctl_sock_init: + sctp_dbg_objcnt_exit(net); + sctp_proc_exit(net); +err_init_proc: + cleanup_sctp_mibs(net); +err_init_mibs: + sctp_sysctl_net_unregister(net); +err_sysctl_register: + return status; +} + +static void sctp_net_exit(struct net *net) +{ + /* Free the local address list */ + sctp_free_addr_wq(net); + sctp_free_local_addr_list(net); + + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); + + sctp_dbg_objcnt_exit(net); + + sctp_proc_exit(net); + cleanup_sctp_mibs(net); + sctp_sysctl_net_unregister(net); +} + +static struct pernet_operations sctp_net_ops = { + .init = sctp_net_init, + .exit = sctp_net_exit, +}; + /* Initialize the universe into something sensible. */ SCTP_STATIC __init int sctp_init(void) { @@ -1224,62 +1332,9 @@ SCTP_STATIC __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - /* Allocate and initialise sctp mibs. */ - status = init_sctp_mibs(); + status = percpu_counter_init(&sctp_sockets_allocated, 0); if (status) - goto err_init_mibs; - - /* Initialize proc fs directory. */ - status = sctp_proc_init(); - if (status) - goto err_init_proc; - - /* Initialize object count debugging. */ - sctp_dbg_objcnt_init(); - - /* - * 14. Suggested SCTP Protocol Parameter Values - */ - /* The following protocol parameters are RECOMMENDED: */ - /* RTO.Initial - 3 seconds */ - sctp_rto_initial = SCTP_RTO_INITIAL; - /* RTO.Min - 1 second */ - sctp_rto_min = SCTP_RTO_MIN; - /* RTO.Max - 60 seconds */ - sctp_rto_max = SCTP_RTO_MAX; - /* RTO.Alpha - 1/8 */ - sctp_rto_alpha = SCTP_RTO_ALPHA; - /* RTO.Beta - 1/4 */ - sctp_rto_beta = SCTP_RTO_BETA; - - /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; - - /* Whether Cookie Preservative is enabled(1) or not(0) */ - sctp_cookie_preserve_enable = 1; - - /* Max.Burst - 4 */ - sctp_max_burst = SCTP_DEFAULT_MAX_BURST; - - /* Association.Max.Retrans - 10 attempts - * Path.Max.Retrans - 5 attempts (per destination address) - * Max.Init.Retransmits - 8 attempts - */ - sctp_max_retrans_association = 10; - sctp_max_retrans_path = 5; - sctp_max_retrans_init = 8; - - /* Sendbuffer growth - do per-socket accounting */ - sctp_sndbuf_policy = 0; - - /* Rcvbuffer growth - do per-socket accounting */ - sctp_rcvbuf_policy = 0; - - /* HB.interval - 30 seconds */ - sctp_hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; - - /* delayed SACK timeout */ - sctp_sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; + goto err_percpu_counter_init; /* Implementation specific variables. */ @@ -1287,9 +1342,6 @@ SCTP_STATIC __init int sctp_init(void) sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; - /* Initialize maximum autoclose timeout. */ - sctp_max_autoclose = INT_MAX / HZ; - /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); @@ -1376,41 +1428,12 @@ SCTP_STATIC __init int sctp_init(void) pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); - /* Disable ADDIP by default. */ - sctp_addip_enable = 0; - sctp_addip_noauth = 0; - sctp_default_auto_asconf = 0; - - /* Enable PR-SCTP by default. */ - sctp_prsctp_enable = 1; - - /* Disable AUTH by default. */ - sctp_auth_enable = 0; - - /* Set SCOPE policy to enabled */ - sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; - - /* Set the default rwnd update threshold */ - sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; - sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); sctp_v4_pf_init(); sctp_v6_pf_init(); - /* Initialize the local address list. */ - INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); - sctp_get_local_addr_list(); - - /* Initialize the address event list */ - INIT_LIST_HEAD(&sctp_addr_waitq); - INIT_LIST_HEAD(&sctp_auto_asconf_splist); - spin_lock_init(&sctp_addr_wq_lock); - sctp_addr_wq_timer.expires = 0; - setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0); - status = sctp_v4_protosw_init(); if (status) @@ -1420,11 +1443,9 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_v6_protosw_init; - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init())) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } + status = register_pernet_subsys(&sctp_net_ops); + if (status) + goto err_register_pernet_subsys; status = sctp_v4_add_protocol(); if (status) @@ -1441,13 +1462,12 @@ out: err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: - inet_ctl_sock_destroy(sctp_ctl_sock); -err_ctl_sock_init: + unregister_pernet_subsys(&sctp_net_ops); +err_register_pernet_subsys: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: - sctp_free_local_addr_list(); sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1461,11 +1481,8 @@ err_ehash_alloc: get_order(sctp_assoc_hashsize * sizeof(struct sctp_hashbucket))); err_ahash_alloc: - sctp_dbg_objcnt_exit(); - sctp_proc_exit(); -err_init_proc: - cleanup_sctp_mibs(); -err_init_mibs: + percpu_counter_destroy(&sctp_sockets_allocated); +err_percpu_counter_init: kmem_cache_destroy(sctp_chunk_cachep); err_chunk_cachep: kmem_cache_destroy(sctp_bucket_cachep); @@ -1482,18 +1499,13 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); sctp_v4_del_protocol(); - sctp_free_addr_wq(); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(sctp_ctl_sock); + unregister_pernet_subsys(&sctp_net_ops); /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); - /* Free the local address list. */ - sctp_free_local_addr_list(); - /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); @@ -1508,9 +1520,7 @@ SCTP_STATIC __exit void sctp_exit(void) get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); - sctp_dbg_objcnt_exit(); - sctp_proc_exit(); - cleanup_sctp_mibs(); + percpu_counter_destroy(&sctp_sockets_allocated); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 479a70ef6ff8..fbe1636309a7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -198,6 +198,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len) { + struct net *net = sock_net(asoc->base.sk); sctp_inithdr_t init; union sctp_params addrs; size_t chunksize; @@ -237,7 +238,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) chunksize += sizeof(prsctp_param); /* ADDIP: Section 4.2.7: @@ -245,7 +246,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and * INIT-ACK parameters. */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; @@ -257,7 +258,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += vparam_len; /* Account for AUTH related parameters */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Add random parameter length*/ chunksize += sizeof(asoc->c.auth_random); @@ -331,7 +332,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_param(retval, num_ext, extensions); } - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); if (sp->adaptation_ind) { @@ -342,7 +343,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, } /* Add SCTP-AUTH chunks to the parameter list */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), asoc->c.auth_random); if (auth_hmacs) @@ -1940,7 +1941,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, return 0; } -static int sctp_verify_ext_param(union sctp_params param) +static int sctp_verify_ext_param(struct net *net, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); int have_auth = 0; @@ -1964,10 +1965,10 @@ static int sctp_verify_ext_param(union sctp_params param) * only if ADD-IP is turned on and we are not backward-compatible * mode. */ - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) return 1; - if (sctp_addip_enable && !have_auth && have_asconf) + if (net->sctp.addip_enable && !have_auth && have_asconf) return 0; return 1; @@ -1976,13 +1977,14 @@ static int sctp_verify_ext_param(union sctp_params param) static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { + struct net *net = sock_net(asoc->base.sk); __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); int i; for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { case SCTP_CID_FWD_TSN: - if (sctp_prsctp_enable && + if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; break; @@ -1990,12 +1992,12 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - if (sctp_auth_enable) + if (net->sctp.auth_enable) asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - if (sctp_addip_enable) + if (net->sctp.addip_enable) asoc->peer.asconf_capable = 1; break; default: @@ -2081,7 +2083,8 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * SCTP_IERROR_ERROR - stop processing, trigger an ERROR * SCTP_IERROR_NO_ERROR - continue with the chunk */ -static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, +static sctp_ierror_t sctp_verify_param(struct net *net, + const struct sctp_association *asoc, union sctp_params param, sctp_cid_t cid, struct sctp_chunk *chunk, @@ -2110,12 +2113,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_SUPPORTED_EXT: - if (!sctp_verify_ext_param(param)) + if (!sctp_verify_ext_param(net, param)) return SCTP_IERROR_ABORT; break; case SCTP_PARAM_SET_PRIMARY: - if (sctp_addip_enable) + if (net->sctp.addip_enable) break; goto fallthrough; @@ -2126,12 +2129,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) break; goto fallthrough; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Secion 6.1 @@ -2148,7 +2151,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Section 3.2 @@ -2164,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; @@ -2198,7 +2201,7 @@ fallthrough: } /* Verify the INIT packet before we process it. */ -int sctp_verify_init(const struct sctp_association *asoc, +int sctp_verify_init(struct net *net, const struct sctp_association *asoc, sctp_cid_t cid, sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, @@ -2245,7 +2248,7 @@ int sctp_verify_init(const struct sctp_association *asoc, /* Verify all the variable length parameters */ sctp_walk_params(param, peer_init, init_hdr.params) { - result = sctp_verify_param(asoc, param, cid, chunk, errp); + result = sctp_verify_param(net, asoc, param, cid, chunk, errp); switch (result) { case SCTP_IERROR_ABORT: case SCTP_IERROR_NOMEM: @@ -2270,6 +2273,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, sctp_init_chunk_t *peer_init, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); union sctp_params param; struct sctp_transport *transport; struct list_head *pos, *temp; @@ -2326,7 +2330,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * also give us an option to silently ignore the packet, which * is what we'll do here. */ - if (!sctp_addip_noauth && + if (!net->sctp.addip_noauth && (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | @@ -2466,6 +2470,7 @@ static int sctp_process_param(struct sctp_association *asoc, const union sctp_addr *peer_addr, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); union sctp_addr addr; int i; __u16 sat; @@ -2494,13 +2499,13 @@ do_addr_param: af = sctp_get_af_specific(param_type2af(param.p->type)); af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0); scope = sctp_scope(peer_addr); - if (sctp_in_scope(&addr, scope)) + if (sctp_in_scope(net, &addr, scope)) if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) return 0; break; case SCTP_PARAM_COOKIE_PRESERVATIVE: - if (!sctp_cookie_preserve_enable) + if (!net->sctp.cookie_preserve_enable) break; stale = ntohl(param.life->lifespan_increment); @@ -2580,7 +2585,7 @@ do_addr_param: break; case SCTP_PARAM_SET_PRIMARY: - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) goto fall_through; addr_param = param.v + sizeof(sctp_addip_param_t); @@ -2607,7 +2612,7 @@ do_addr_param: break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } @@ -2615,7 +2620,7 @@ do_addr_param: goto fall_through; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's random parameter */ @@ -2628,7 +2633,7 @@ do_addr_param: break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's HMAC list */ @@ -2644,7 +2649,7 @@ do_addr_param: break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; asoc->peer.peer_chunks = kmemdup(param.p, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index fe99628e1257..57f7de839b03 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -251,6 +251,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); /* Check whether a task is in the sock. */ @@ -271,7 +272,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) goto out_unlock; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX), asoc->state, asoc->ep, asoc, @@ -291,6 +292,7 @@ out_unlock: static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { + struct net *net = sock_net(asoc->base.sk); int error = 0; sctp_bh_lock_sock(asoc->base.sk); @@ -312,7 +314,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, goto out_unlock; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(timeout_type), asoc->state, asoc->ep, asoc, (void *)timeout_type, GFP_ATOMIC); @@ -371,6 +373,7 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { @@ -388,7 +391,7 @@ void sctp_generate_heartbeat_event(unsigned long data) if (transport->dead) goto out_unlock; - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); @@ -408,6 +411,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { @@ -426,7 +430,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) if (asoc->base.dead) goto out_unlock; - sctp_do_sm(SCTP_EVENT_T_OTHER, + sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); @@ -748,13 +752,15 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Helper function to process the process SACK command. */ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, - struct sctp_sackhdr *sackh) + struct sctp_chunk *chunk) { int err = 0; - if (sctp_outq_sack(&asoc->outqueue, sackh)) { + if (sctp_outq_sack(&asoc->outqueue, chunk)) { + struct net *net = sock_net(asoc->base.sk); + /* There are no more TSNs awaiting SACK. */ - err = sctp_do_sm(SCTP_EVENT_T_OTHER, + err = sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN), asoc->state, asoc->ep, asoc, NULL, GFP_ATOMIC); @@ -1042,6 +1048,8 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc, */ static void sctp_cmd_send_asconf(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); + /* Send the next asconf chunk from the addip chunk * queue. */ @@ -1053,7 +1061,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(asoc, asconf)) + if (sctp_primitive_ASCONF(net, asoc, asconf)) sctp_chunk_free(asconf); else asoc->addip_last_asconf = asconf; @@ -1089,7 +1097,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * If you want to understand all of lksctp, this is a * good place to start. */ -int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, +int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_state_t state, struct sctp_endpoint *ep, struct sctp_association *asoc, @@ -1110,12 +1118,12 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, /* Look up the state function, run it, and then process the * side effects. These three steps are the heart of lksctp. */ - state_fn = sctp_sm_lookup_event(event_type, state, subtype); + state_fn = sctp_sm_lookup_event(net, event_type, state, subtype); sctp_init_cmd_seq(&commands); DEBUG_PRE; - status = (*state_fn->fn)(ep, asoc, subtype, event_arg, &commands); + status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); DEBUG_POST; error = sctp_side_effects(event_type, subtype, state, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9fca10357350..b6adef8a1e93 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -66,7 +66,8 @@ #include <net/sctp/sm.h> #include <net/sctp/structs.h> -static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, +static struct sctp_packet *sctp_abort_pkt_new(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, const void *payload, @@ -74,36 +75,43 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands); -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk); -static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, +static void sctp_send_stale_cookie_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, struct sctp_chunk *err_chunk); -static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); -static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, +static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, + sctp_cmd_seq_t *commands, __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport); static sctp_disposition_t sctp_sf_abort_violation( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, void *arg, @@ -112,6 +120,7 @@ static sctp_disposition_t sctp_sf_abort_violation( const size_t paylen); static sctp_disposition_t sctp_sf_violation_chunklen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -119,6 +128,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_paramlen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -126,6 +136,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -133,18 +144,21 @@ static sctp_disposition_t sctp_sf_violation_ctsn( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_chunk( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, +static sctp_ierror_t sctp_sf_authenticate(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, struct sctp_chunk *chunk); -static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -204,7 +218,8 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_4_C(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -214,7 +229,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, struct sctp_ulpevent *ev; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* RFC 2960 6.10 Bundling * @@ -222,11 +237,11 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); + return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* RFC 2960 10.2 SCTP-to-ULP @@ -259,8 +274,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -289,7 +304,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -313,21 +329,21 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. * Normally, this would cause an ABORT with a Protocol Violation @@ -335,7 +351,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * just discard the packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being @@ -344,18 +360,18 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * can treat this OOTB */ if (sctp_sstate(ep->base.sk, CLOSING)) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -366,13 +382,13 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); return SCTP_DISPOSITION_CONSUME; } else { return SCTP_DISPOSITION_NOMEM; } } else { - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } @@ -484,7 +500,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -496,25 +513,25 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); + return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the INIT-ACK chunk has a valid length */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { @@ -526,7 +543,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * the association. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -537,7 +554,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); error = SCTP_ERROR_INV_PARAM; } } @@ -554,10 +571,10 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); } @@ -633,7 +650,8 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -650,9 +668,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* Make sure that the COOKIE_ECHO chunk has a valid length. @@ -661,7 +679,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -670,7 +688,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sk = ep->base.sk; if (!sctp_sstate(sk, LISTENING) || (sctp_style(sk, TCP) && sk_acceptq_is_full(sk))) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* "Decode" the chunk. We have no optional parameters so we * are in good shape. @@ -703,13 +721,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, goto nomem; case -SCTP_IERROR_STALE_COOKIE: - sctp_send_stale_cookie_err(ep, asoc, chunk, commands, + sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -756,14 +774,14 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t)); auth.transport = chunk->transport; - ret = sctp_sf_authenticate(ep, new_asoc, type, &auth); + ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); /* We can now safely free the auth_chunk clone */ kfree_skb(chunk->auth_chunk); if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -804,8 +822,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (new_asoc->autoclose) @@ -856,7 +874,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -865,13 +884,13 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, struct sctp_ulpevent *ev; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Reset init error count upon receipt of COOKIE-ACK, @@ -892,8 +911,8 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, @@ -958,7 +977,8 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, } /* Generate a HEARTBEAT packet on the given transport. */ -sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -972,8 +992,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -1028,7 +1048,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_beat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1039,11 +1060,11 @@ sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep, size_t paylen = 0; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 8.3 The receiver of the HEARTBEAT should immediately @@ -1095,7 +1116,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1108,12 +1130,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, unsigned long max_interval; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) + sizeof(sctp_sender_hb_info_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; @@ -1171,7 +1193,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, /* Helper function to send out an abort for the restart * condition. */ -static int sctp_sf_send_restart_abort(union sctp_addr *ssa, +static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { @@ -1197,18 +1219,18 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa, errhdr->length = htons(len); /* Assign to the control socket. */ - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; /* Association is NULL since this may be a restart attack and we * want to send back the attacker's vtag. */ - pkt = sctp_abort_pkt_new(ep, NULL, init, errhdr, len); + pkt = sctp_abort_pkt_new(net, ep, NULL, init, errhdr, len); if (!pkt) goto out; sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* Discard the rest of the inbound packet. */ sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); @@ -1240,6 +1262,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(new_asoc->base.sk); struct sctp_transport *new_addr; int ret = 1; @@ -1258,7 +1281,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, transports) { if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, &new_addr->ipaddr)) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init, commands); ret = 0; break; @@ -1358,6 +1381,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc, * chunk handling. */ static sctp_disposition_t sctp_sf_do_unexpected_init( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -1382,20 +1406,20 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. * In this case, we generate a protocol violation since we have * an association established. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; @@ -1405,14 +1429,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -1421,14 +1445,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); retval = SCTP_DISPOSITION_CONSUME; } else { retval = SCTP_DISPOSITION_NOMEM; } goto cleanup; } else { - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } @@ -1570,7 +1594,8 @@ cleanup: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1579,7 +1604,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. */ - return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands); + return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } /* @@ -1623,7 +1648,8 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1632,7 +1658,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. */ - return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands); + return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } @@ -1645,7 +1671,8 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, * An unexpected INIT ACK usually indicates the processing of an old or * duplicated INIT chunk. */ -sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -1653,10 +1680,10 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, /* Per the above section, we'll discard the chunk if we have an * endpoint. If this is an OOTB INIT-ACK, treat it as such. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) - return sctp_sf_ootb(ep, asoc, type, arg, commands); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) + return sctp_sf_ootb(net, ep, asoc, type, arg, commands); else - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); } /* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A') @@ -1664,7 +1691,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, * Section 5.2.4 * A) In this case, the peer may have restarted. */ -static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1700,7 +1728,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(ep, asoc, + disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, SCTP_ST_CHUNK(chunk->chunk_hdr->type), chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) @@ -1763,7 +1791,8 @@ nomem: * after responding to the local endpoint's INIT */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1784,7 +1813,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -1833,7 +1862,8 @@ nomem: * but a new tag of its own. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1854,7 +1884,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep, * enter the ESTABLISHED state, if it has not already done so. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1876,7 +1907,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); @@ -1948,7 +1979,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1967,7 +1999,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, * done later. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* "Decode" the chunk. We have no optional parameters so we @@ -2001,12 +2033,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, goto nomem; case -SCTP_IERROR_STALE_COOKIE: - sctp_send_stale_cookie_err(ep, asoc, chunk, commands, + sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -2017,27 +2049,27 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, switch (action) { case 'A': /* Association restart. */ - retval = sctp_sf_do_dupcook_a(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands, new_asoc); break; case 'B': /* Collision case B. */ - retval = sctp_sf_do_dupcook_b(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_b(net, ep, asoc, chunk, commands, new_asoc); break; case 'C': /* Collision case C. */ - retval = sctp_sf_do_dupcook_c(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_c(net, ep, asoc, chunk, commands, new_asoc); break; case 'D': /* Collision case D. */ - retval = sctp_sf_do_dupcook_d(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_d(net, ep, asoc, chunk, commands, new_asoc); break; default: /* Discard packet for all others. */ - retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands); + retval = sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); break; } @@ -2063,6 +2095,7 @@ nomem: * See sctp_sf_do_9_1_abort(). */ sctp_disposition_t sctp_sf_shutdown_pending_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -2072,7 +2105,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2085,7 +2118,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2094,9 +2127,9 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* @@ -2104,7 +2137,8 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * * See sctp_sf_do_9_1_abort(). */ -sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2113,7 +2147,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2126,7 +2160,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2135,7 +2169,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -2145,7 +2179,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* @@ -2154,6 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * See sctp_sf_do_9_1_abort(). */ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -2163,7 +2198,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. */ - return sctp_sf_shutdown_sent_abort(ep, asoc, type, arg, commands); + return sctp_sf_shutdown_sent_abort(net, ep, asoc, type, arg, commands); } /* @@ -2180,7 +2215,8 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2190,13 +2226,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, sctp_errhdr_t *err; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. * The parameter walking depends on this as well. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Process the error here */ @@ -2206,7 +2242,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, */ sctp_walk_errors(err, chunk->chunk_hdr) { if (SCTP_ERROR_STALE_COOKIE == err->cause) - return sctp_sf_do_5_2_6_stale(ep, asoc, type, + return sctp_sf_do_5_2_6_stale(net, ep, asoc, type, arg, commands); } @@ -2215,7 +2251,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, * we are discarding the packet, there should be no adverse * affects. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* @@ -2243,7 +2279,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2365,7 +2402,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2374,7 +2412,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2387,7 +2425,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2396,12 +2434,13 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } -static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2418,7 +2457,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_errhdr_t *err; sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); error = ((sctp_errhdr_t *)chunk->skb->data)->cause; } @@ -2426,8 +2465,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -2437,7 +2476,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * * See sctp_sf_do_9_1_abort() above. */ -sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2448,7 +2488,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, __be16 error = SCTP_ERROR_NO_ERROR; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2461,27 +2501,28 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((sctp_errhdr_t *)chunk->skb->data)->cause; - return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc, + return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); } /* * Process an incoming ICMP as an ABORT. (COOKIE-WAIT state) */ -sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR, + return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR, ENOPROTOOPT, asoc, (struct sctp_transport *)arg); } @@ -2489,7 +2530,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep /* * Process an ABORT. (COOKIE-ECHOED state) */ -sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2498,7 +2540,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_abort(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_abort(net, ep, asoc, type, arg, commands); } /* @@ -2506,7 +2548,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, * * This is common code called by several sctp_sf_*_abort() functions above. */ -static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, +static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, + sctp_cmd_seq_t *commands, __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport) @@ -2514,7 +2557,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err)); @@ -2557,7 +2600,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2570,12 +2614,12 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Convert the elaborate header. */ @@ -2595,7 +2639,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT * When a peer sends a SHUTDOWN, SCTP delivers this notification to @@ -2619,7 +2663,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_shutdown_ack(ep, asoc, type, + disposition = sctp_sf_do_9_2_shutdown_ack(net, ep, asoc, type, arg, commands); } @@ -2645,7 +2689,8 @@ out: * The Cumulative TSN Ack of the received SHUTDOWN chunk * MUST be processed. */ -sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2656,12 +2701,12 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sdh = (sctp_shutdownhdr_t *)chunk->skb->data; @@ -2678,7 +2723,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* verify, by checking the Cumulative TSN Ack field of the * chunk, that all its outstanding DATA chunks have been @@ -2697,7 +2742,8 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2708,7 +2754,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep, /* Make sure that the chunk has a valid length */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Since we are not going to really process this INIT, there @@ -2760,7 +2806,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2771,10 +2818,10 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, u32 lowest_tsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); cwr = (sctp_cwrhdr_t *) chunk->skb->data; @@ -2815,7 +2862,8 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_ecne(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2825,10 +2873,10 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); ecne = (sctp_ecnehdr_t *) chunk->skb->data; @@ -2871,7 +2919,8 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2884,11 +2933,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); error = sctp_eat_data(asoc, chunk, commands ); @@ -2897,16 +2946,16 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_abort_violation(ep, asoc, chunk, commands, + return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); @@ -2992,7 +3041,8 @@ consume: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3004,11 +3054,11 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); error = sctp_eat_data(asoc, chunk, commands ); @@ -3022,7 +3072,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, case SCTP_IERROR_NO_DATA: goto consume; case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_abort_violation(ep, asoc, chunk, commands, + return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); @@ -3082,7 +3132,8 @@ consume: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3093,18 +3144,18 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Pull the SACK chunk from the data buffer */ sackh = sctp_sm_pull_sack(chunk); /* Was this a bogus SACK? */ if (!sackh) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); chunk->subh.sack_hdr = sackh; ctsn = ntohl(sackh->cum_tsn_ack); @@ -3125,10 +3176,10 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* Return this SACK for further processing. */ - sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh)); + sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_CHUNK(chunk)); /* Note: We do the rest of the work on the PROCESS_SACK * sideeffect. @@ -3154,7 +3205,8 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3164,7 +3216,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_chunk *abort; - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. The T bit will be set if the asoc @@ -3188,9 +3240,9 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - sctp_sf_pdiscard(ep, asoc, type, arg, commands); + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; } @@ -3205,7 +3257,8 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_operr_notify(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3215,15 +3268,15 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, sctp_errhdr_t *err; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err, commands); sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, @@ -3242,7 +3295,8 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_final(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3253,11 +3307,11 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, struct sctp_ulpevent *ev; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 10.2 H) SHUTDOWN COMPLETE notification * @@ -3290,8 +3344,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); /* ...and remove all record of the association. */ @@ -3324,7 +3378,8 @@ nomem: * receiver of the OOTB packet shall discard the OOTB packet and take * no further action. */ -sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_ootb(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3338,13 +3393,13 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, int ootb_shut_ack = 0; int ootb_cookie_ack = 0; - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Now that we know we at least have a chunk header, @@ -3359,7 +3414,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, * sending an ABORT of its own. */ if (SCTP_CID_ABORT == ch->type) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR * or a COOKIE ACK the SCTP Packet should be silently @@ -3381,18 +3436,18 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, /* Report violation if chunk len overflows */ ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); if (ootb_shut_ack) - return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); + return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands); else if (ootb_cookie_ack) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); else - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* @@ -3416,7 +3471,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3426,7 +3482,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_chunk *shut; - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an SHUTDOWN_COMPLETE. @@ -3450,19 +3506,19 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* If the chunk length is invalid, we don't want to process * the reset of the packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* We need to discard the rest of the packet to prevent * potential bomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } return SCTP_DISPOSITION_NOMEM; @@ -3479,7 +3535,8 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, * chunks. --piggy ] * */ -sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3489,7 +3546,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Although we do have an association in this case, it corresponds @@ -3497,13 +3554,14 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); - return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); + return sctp_sf_shut_8_4_5(net, ep, NULL, type, arg, commands); } /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. */ -sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_asconf(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -3519,7 +3577,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* ADD-IP: Section 4.1.1 @@ -3528,12 +3586,12 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !chunk->auth) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + if (!net->sctp.addip_noauth && !chunk->auth) + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ASCONF ADDIP chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); hdr = (sctp_addiphdr_t *)chunk->skb->data; @@ -3542,7 +3600,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3550,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value @@ -3630,7 +3688,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * When building TLV parameters for the ASCONF Chunk that will add or * delete IP addresses the D0 to D13 rules should be applied: */ -sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -3645,7 +3704,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(asconf_ack, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* ADD-IP, Section 4.1.2: @@ -3654,12 +3713,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !asconf_ack->auth) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + if (!net->sctp.addip_noauth && !asconf_ack->auth) + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ADDIP chunk has a valid length. */ if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data; @@ -3670,7 +3729,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); if (last_asconf) { @@ -3705,8 +3764,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -3739,8 +3798,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -3761,7 +3820,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3776,12 +3836,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the FORWARD_TSN chunk has valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data; @@ -3828,6 +3888,7 @@ discard_noforce: } sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -3843,12 +3904,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the FORWARD_TSN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data; @@ -3915,7 +3976,8 @@ gen_shutdown: * * The return value is the disposition of the chunk. */ -static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, +static sctp_ierror_t sctp_sf_authenticate(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, struct sctp_chunk *chunk) @@ -3988,7 +4050,8 @@ nomem: return SCTP_IERROR_NOMEM; } -sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_auth(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4001,21 +4064,21 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, /* Make sure that the peer has AUTH capable */ if (!asoc->peer.auth_capable) - return sctp_sf_unk_chunk(ep, asoc, type, arg, commands); + return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the AUTH chunk has valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); auth_hdr = (struct sctp_authhdr *)chunk->skb->data; - error = sctp_sf_authenticate(ep, asoc, type, chunk); + error = sctp_sf_authenticate(net, ep, asoc, type, chunk); switch (error) { case SCTP_IERROR_AUTH_BAD_HMAC: /* Generate the ERROR chunk and discard the rest @@ -4032,10 +4095,10 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, /* Fall Through */ case SCTP_IERROR_AUTH_BAD_KEYID: case SCTP_IERROR_BAD_SIG: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); case SCTP_IERROR_NOMEM: @@ -4084,7 +4147,8 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_unk_chunk(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4097,20 +4161,20 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk); if (!sctp_vtag_verify(unk_chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. */ if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); switch (type.chunk & SCTP_CID_ACTION_MASK) { case SCTP_CID_ACTION_DISCARD: /* Discard the packet. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); break; case SCTP_CID_ACTION_DISCARD_ERR: /* Generate an ERROR chunk as response. */ @@ -4125,7 +4189,7 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, } /* Discard the packet. */ - sctp_sf_pdiscard(ep, asoc, type, arg, commands); + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; break; case SCTP_CID_ACTION_SKIP: @@ -4167,7 +4231,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_discard_chunk(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4180,7 +4245,7 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, * chunkhdr structure to make a comparison. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); @@ -4205,13 +4270,14 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_pdiscard(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4232,7 +4298,8 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, * We simply tag the chunk as a violation. The state machine will log * the violation and continue. */ -sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_violation(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4242,7 +4309,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_VIOLATION; @@ -4252,6 +4319,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, * Common function to handle a protocol violation. */ static sctp_disposition_t sctp_sf_abort_violation( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, void *arg, @@ -4302,7 +4370,7 @@ static sctp_disposition_t sctp_sf_abort_violation( } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4316,10 +4384,10 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); } } else { - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (!packet) goto nomem_pkt; @@ -4334,13 +4402,13 @@ static sctp_disposition_t sctp_sf_abort_violation( sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem_pkt: @@ -4369,6 +4437,7 @@ nomem: * Generate an ABORT chunk and terminate the association. */ static sctp_disposition_t sctp_sf_violation_chunklen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4377,7 +4446,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( { static const char err_str[]="The following chunk had invalid length:"; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } @@ -4388,6 +4457,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( * the length is considered as invalid. */ static sctp_disposition_t sctp_sf_violation_paramlen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4407,17 +4477,17 @@ static sctp_disposition_t sctp_sf_violation_paramlen( goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem: return SCTP_DISPOSITION_NOMEM; @@ -4430,6 +4500,7 @@ nomem: * error code. */ static sctp_disposition_t sctp_sf_violation_ctsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4438,7 +4509,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( { static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:"; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } @@ -4449,6 +4520,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( * on the path and we may not want to continue this communication. */ static sctp_disposition_t sctp_sf_violation_chunk( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4458,9 +4530,9 @@ static sctp_disposition_t sctp_sf_violation_chunk( static const char err_str[]="The following chunk violates protocol:"; if (!asoc) - return sctp_sf_violation(ep, asoc, type, arg, commands); + return sctp_sf_violation(net, ep, asoc, type, arg, commands); - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } /*************************************************************************** @@ -4523,7 +4595,8 @@ static sctp_disposition_t sctp_sf_violation_chunk( * * The return value is a disposition. */ -sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4634,7 +4707,8 @@ nomem: * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_send(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4673,6 +4747,7 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep, * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4694,7 +4769,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, + disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } return disposition; @@ -4728,6 +4803,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_1_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4759,14 +4835,15 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_USER_ABORT)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return retval; } /* We tried an illegal operation on an association which is closed. */ -sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_error_closed(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4779,7 +4856,8 @@ sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep, /* We tried an illegal operation on an association which is shutting * down. */ -sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_error_shutdown(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4805,6 +4883,7 @@ sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep, * (timers) */ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4817,7 +4896,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -4839,6 +4918,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( * (timers) */ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4847,7 +4927,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_prm_shutdown(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_prm_shutdown(net, ep, asoc, type, arg, commands); } /* @@ -4865,6 +4945,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( * (timers) */ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4884,7 +4965,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4914,6 +4995,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4923,7 +5005,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -4939,6 +5021,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4949,7 +5032,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -4965,6 +5048,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4979,7 +5063,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -4995,6 +5079,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5004,7 +5089,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. */ - return sctp_sf_shutdown_sent_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_shutdown_sent_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -5030,6 +5115,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( * association on which a heartbeat should be issued. */ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5061,7 +5147,8 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( * When an endpoint has an ASCONF signaled change to be sent to the * remote endpoint it should do A1 to A9 */ -sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5082,6 +5169,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep, * The return value is the disposition of the primitive. */ sctp_disposition_t sctp_sf_ignore_primitive( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5103,6 +5191,7 @@ sctp_disposition_t sctp_sf_ignore_primitive( * retransmit, the stack will immediately send up this notification. */ sctp_disposition_t sctp_sf_do_no_pending_tsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5134,6 +5223,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn( * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5203,6 +5293,7 @@ nomem: * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5221,11 +5312,11 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( */ if (chunk) { if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); } @@ -5273,7 +5364,8 @@ nomem: * * The return value is the disposition of the event. */ -sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_ignore_other(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5298,7 +5390,8 @@ sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5306,7 +5399,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, { struct sctp_transport *transport = arg; - SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS); if (asoc->overall_error_count >= asoc->max_retrans) { if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { @@ -5327,8 +5420,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } } @@ -5384,13 +5477,14 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, * allow. However, an SCTP transmitter MUST NOT be more aggressive than * the following algorithms allow. */ -sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -5414,7 +5508,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5425,7 +5520,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -5475,7 +5570,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5485,7 +5581,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -5523,7 +5619,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep * the T2-Shutdown timer, giving its peer ample opportunity to transmit * all of its queued DATA chunks that have not yet been sent. */ -sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5532,7 +5629,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); ((struct sctp_association *)asoc)->shutdown_retries++; @@ -5542,8 +5639,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, /* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -5592,6 +5689,7 @@ nomem: * If the T4 RTO timer expires the endpoint should do B1 to B5 */ sctp_disposition_t sctp_sf_t4_timer_expire( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5601,7 +5699,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; - SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS); /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in @@ -5626,8 +5724,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -5662,7 +5760,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( * At the expiration of this timer the sender SHOULD abort the association * by sending an ABORT chunk. */ -sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5671,7 +5770,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -5683,8 +5782,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; nomem: @@ -5697,6 +5796,7 @@ nomem: * the user. So this routine looks same as sctp_sf_do_9_2_prm_shutdown(). */ sctp_disposition_t sctp_sf_autoclose_timer_expire( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5705,7 +5805,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( { int disposition; - SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS); /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper @@ -5720,7 +5820,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, + disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } return disposition; @@ -5738,7 +5838,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_not_impl(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5755,7 +5856,8 @@ sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_bug(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5775,7 +5877,8 @@ sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_timer_ignore(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_timer_ignore(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5817,7 +5920,8 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk) /* Create an ABORT packet to be sent as a response, with the specified * error causes. */ -static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, +static struct sctp_packet *sctp_abort_pkt_new(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, const void *payload, @@ -5826,7 +5930,7 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, struct sctp_packet *packet; struct sctp_chunk *abort; - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. @@ -5858,7 +5962,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, } /* Allocate a packet for responding in the OOTB conditions. */ -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_packet *packet; @@ -5911,7 +6016,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc } /* Make a transport for the bucket, Eliza... */ - transport = sctp_transport_new(sctp_source(chunk), GFP_ATOMIC); + transport = sctp_transport_new(net, sctp_source(chunk), GFP_ATOMIC); if (!transport) goto nomem; @@ -5919,7 +6024,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc * the source address. */ sctp_transport_route(transport, (union sctp_addr *)&chunk->dest, - sctp_sk(sctp_get_ctl_sock())); + sctp_sk(net->sctp.ctl_sock)); packet = sctp_packet_init(&transport->packet, transport, sport, dport); packet = sctp_packet_config(packet, vtag, 0); @@ -5937,7 +6042,8 @@ void sctp_ootb_pkt_free(struct sctp_packet *packet) } /* Send a stale cookie error when a invalid COOKIE ECHO chunk is found */ -static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, +static void sctp_send_stale_cookie_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -5946,7 +6052,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (err_chunk) { - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { struct sctp_signed_cookie *cookie; @@ -5959,7 +6065,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, sctp_packet_append_chunk(packet, err_chunk); sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } else sctp_chunk_free (err_chunk); } @@ -5979,6 +6085,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, __u32 tsn; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); u16 ssn; u16 sid; u8 ordered = 0; @@ -6109,8 +6216,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_DATA)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_IERROR_NO_DATA; } @@ -6120,9 +6227,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * if we renege and the chunk arrives again. */ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS); else { - SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS); ordered = 1; } diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 7c211a7f90f4..84d98d8a5a74 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -59,7 +59,8 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES]; static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES]; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, + sctp_cid_t cid, sctp_state_t state); @@ -82,13 +83,14 @@ static const sctp_sm_table_entry_t bug = { rtn; \ }) -const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, +const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, + sctp_event_t event_type, sctp_state_t state, sctp_subtype_t event_subtype) { switch (event_type) { case SCTP_EVENT_T_CHUNK: - return sctp_chunk_event_lookup(event_subtype.chunk, state); + return sctp_chunk_event_lookup(net, event_subtype.chunk, state); case SCTP_EVENT_T_TIMEOUT: return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, timeout_event_table); @@ -906,7 +908,8 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE, }; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, + sctp_cid_t cid, sctp_state_t state) { if (state > SCTP_STATE_MAX) @@ -915,12 +918,12 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { if (cid == SCTP_CID_FWD_TSN) return &prsctp_chunk_event_table[0][state]; } - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { if (cid == SCTP_CID_ASCONF) return &addip_chunk_event_table[0][state]; @@ -928,7 +931,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, return &addip_chunk_event_table[1][state]; } - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { if (cid == SCTP_CID_AUTH) return &auth_chunk_event_table[0][state]; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5e259817a7f3..d37d24ff197f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -427,6 +427,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { + struct net *net = sock_net(asoc->base.sk); int retval = 0; /* If there is an outstanding ASCONF chunk, queue it for later @@ -439,7 +440,7 @@ static int sctp_send_asconf(struct sctp_association *asoc, /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(chunk); - retval = sctp_primitive_ASCONF(asoc, chunk); + retval = sctp_primitive_ASCONF(net, asoc, chunk); if (retval) sctp_chunk_free(chunk); else @@ -515,6 +516,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -529,7 +531,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, int i; int retval = 0; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -717,6 +719,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -732,7 +735,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, int stored = 0; chunk = NULL; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -1050,6 +1053,7 @@ static int __sctp_connect(struct sock* sk, int addrs_size, sctp_assoc_t *assoc_id) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc = NULL; @@ -1200,7 +1204,7 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - err = sctp_primitive_ASSOCIATE(asoc, NULL); + err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) { goto out_free; } @@ -1458,6 +1462,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, */ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_association *asoc; struct list_head *pos, *temp; @@ -1499,9 +1504,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) chunk = sctp_make_abort_user(asoc, NULL, 0); if (chunk) - sctp_primitive_ABORT(asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); } else - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } /* On a TCP-style socket, block for at most linger_time if set. */ @@ -1569,6 +1574,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t msg_len) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *new_asoc=NULL, *asoc=NULL; @@ -1714,7 +1720,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (sinfo_flags & SCTP_EOF) { SCTP_DEBUG_PRINTK("Shutting down association: %p\n", asoc); - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); err = 0; goto out_unlock; } @@ -1727,7 +1733,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); - sctp_primitive_ABORT(asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); err = 0; goto out_unlock; } @@ -1900,7 +1906,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Auto-connect, if we aren't connected already. */ if (sctp_state(asoc, CLOSED)) { - err = sctp_primitive_ASSOCIATE(asoc, NULL); + err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) goto out_free; SCTP_DEBUG_PRINTK("We associated primitively.\n"); @@ -1928,7 +1934,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, * works that way today. Keep it that way or this * breaks. */ - err = sctp_primitive_SEND(asoc, datamsg); + err = sctp_primitive_SEND(net, asoc, datamsg); /* Did the lower layer accept the chunk? */ if (err) sctp_datamsg_free(datamsg); @@ -2320,7 +2326,9 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, int error; if (params->spp_flags & SPP_HB_DEMAND && trans) { - error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); + struct net *net = sock_net(trans->asoc->base.sk); + + error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans); if (error) return error; } @@ -3033,6 +3041,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; @@ -3042,7 +3051,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva sp = sctp_sk(sk); - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return -EPERM; if (optlen != sizeof(struct sctp_setpeerprim)) @@ -3279,9 +3288,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authchunk val; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) @@ -3311,11 +3321,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo *hmacs; u32 idents; int err; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen < sizeof(struct sctp_hmacalgo)) @@ -3348,11 +3359,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkey *authkey; struct sctp_association *asoc; int ret; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen <= sizeof(struct sctp_authkey)) @@ -3389,10 +3401,11 @@ static int sctp_setsockopt_active_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3417,10 +3430,11 @@ static int sctp_setsockopt_del_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3471,7 +3485,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, sp->do_auto_asconf = 0; } else if (val && !sp->do_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } return 0; @@ -3843,6 +3857,7 @@ out: */ SCTP_STATIC int sctp_init_sock(struct sock *sk) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_sock *sp; @@ -3872,7 +3887,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_timetolive = 0; sp->default_rcv_context = 0; - sp->max_burst = sctp_max_burst; + sp->max_burst = net->sctp.max_burst; /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or @@ -3880,24 +3895,24 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) */ sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; - sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = sctp_rto_max; + sp->initmsg.sinit_max_attempts = net->sctp.max_retrans_init; + sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = sctp_rto_initial; - sp->rtoinfo.srto_max = sctp_rto_max; - sp->rtoinfo.srto_min = sctp_rto_min; + sp->rtoinfo.srto_initial = net->sctp.rto_initial; + sp->rtoinfo.srto_max = net->sctp.rto_max; + sp->rtoinfo.srto_min = net->sctp.rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. */ - sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association; + sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association; sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; + sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. @@ -3907,10 +3922,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = sctp_hb_interval; - sp->pathmaxrxt = sctp_max_retrans_path; + sp->hbinterval = net->sctp.hb_interval; + sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = sctp_sack_timeout; + sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | @@ -3961,10 +3976,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - if (sctp_default_auto_asconf) { + sock_prot_inuse_add(net, sk->sk_prot, 1); + if (net->sctp.default_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } else sp->do_auto_asconf = 0; @@ -4011,6 +4026,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) */ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -4022,7 +4038,7 @@ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) if (!list_empty(&ep->asocs)) { asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } } @@ -4653,9 +4669,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, union sctp_addr temp; int cnt = 0; int addrlen; + struct net *net = sock_net(sk); rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; @@ -5299,12 +5316,13 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; hmacs = sctp_sk(sk)->ep->auth_hmacs_list; @@ -5328,10 +5346,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, static int sctp_getsockopt_active_key(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authkeyid)) @@ -5360,6 +5379,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5367,7 +5387,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5403,6 +5423,7 @@ num: static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5410,7 +5431,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5769,7 +5790,7 @@ static void sctp_unhash(struct sock *sk) * a fastreuse flag (FIXME: NPI ipg). */ static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum); + struct sctp_bind_hashbucket *head, struct net *, unsigned short snum); static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { @@ -5799,11 +5820,12 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) rover = low; if (inet_is_reserved_local_port(rover)) continue; - index = sctp_phashfn(rover); + index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); sctp_for_each_hentry(pp, node, &head->chain) - if (pp->port == rover) + if ((pp->port == rover) && + net_eq(sock_net(sk), pp->net)) goto next; break; next: @@ -5827,10 +5849,10 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) * to the port number (snum) - we detect that with the * port iterator, pp being NULL. */ - head = &sctp_port_hashtable[sctp_phashfn(snum)]; + head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; sctp_spin_lock(&head->lock); sctp_for_each_hentry(pp, node, &head->chain) { - if (pp->port == snum) + if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; } } @@ -5881,7 +5903,7 @@ pp_found: pp_not_found: /* If there was a hash table miss, create a new port. */ ret = 1; - if (!pp && !(pp = sctp_bucket_create(head, snum))) + if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum))) goto fail_unlock; /* In either case (hit or miss), make sure fastreuse is 1 only @@ -6113,7 +6135,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) ********************************************************************/ static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum) + struct sctp_bind_hashbucket *head, struct net *net, unsigned short snum) { struct sctp_bind_bucket *pp; @@ -6123,6 +6145,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( pp->port = snum; pp->fastreuse = 0; INIT_HLIST_HEAD(&pp->owner); + pp->net = net; hlist_add_head(&pp->node, &head->chain); } return pp; @@ -6142,7 +6165,8 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) static inline void __sctp_put_port(struct sock *sk) { struct sctp_bind_hashbucket *head = - &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)]; + &sctp_port_hashtable[sctp_phashfn(sock_net(sk), + inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; sctp_spin_lock(&head->lock); @@ -6809,7 +6833,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ - head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)]; + head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), + inet_sk(oldsk)->inet_num)]; sctp_local_bh_disable(); sctp_spin_lock(&head->lock); pp = sctp_sk(oldsk)->bind_hash; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 2b2bfe933ff1..70e3ba5cb50b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -64,8 +64,34 @@ extern int sysctl_sctp_wmem[3]; static ctl_table sctp_table[] = { { + .procname = "sctp_mem", + .data = &sysctl_sctp_mem, + .maxlen = sizeof(sysctl_sctp_mem), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, + { + .procname = "sctp_rmem", + .data = &sysctl_sctp_rmem, + .maxlen = sizeof(sysctl_sctp_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sctp_wmem", + .data = &sysctl_sctp_wmem, + .maxlen = sizeof(sysctl_sctp_wmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + + { /* sentinel */ } +}; + +static ctl_table sctp_net_table[] = { + { .procname = "rto_initial", - .data = &sctp_rto_initial, + .data = &init_net.sctp.rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -74,7 +100,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_min", - .data = &sctp_rto_min, + .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -83,7 +109,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_max", - .data = &sctp_rto_max, + .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -91,17 +117,22 @@ static ctl_table sctp_table[] = { .extra2 = &timer_max }, { - .procname = "valid_cookie_life", - .data = &sctp_valid_cookie_life, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .procname = "rto_alpha_exp_divisor", + .data = &init_net.sctp.rto_alpha, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "rto_beta_exp_divisor", + .data = &init_net.sctp.rto_beta, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, }, { .procname = "max_burst", - .data = &sctp_max_burst, + .data = &init_net.sctp.max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -109,31 +140,42 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "association_max_retrans", - .data = &sctp_max_retrans_association, + .procname = "cookie_preserve_enable", + .data = &init_net.sctp.cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "valid_cookie_life", + .data = &init_net.sctp.valid_cookie_life, + .maxlen = sizeof(unsigned int), + .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &int_max + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "sndbuf_policy", - .data = &sctp_sndbuf_policy, + .procname = "sack_timeout", + .data = &init_net.sctp.sack_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &sack_timer_min, + .extra2 = &sack_timer_max, }, { - .procname = "rcvbuf_policy", - .data = &sctp_rcvbuf_policy, - .maxlen = sizeof(int), + .procname = "hb_interval", + .data = &init_net.sctp.hb_interval, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "path_max_retrans", - .data = &sctp_max_retrans_path, + .procname = "association_max_retrans", + .data = &init_net.sctp.max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -141,17 +183,17 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "pf_retrans", - .data = &sctp_pf_retrans, + .procname = "path_max_retrans", + .data = &init_net.sctp.max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = &one, .extra2 = &int_max }, { .procname = "max_init_retransmits", - .data = &sctp_max_retrans_init, + .data = &init_net.sctp.max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -159,103 +201,66 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "hb_interval", - .data = &sctp_hb_interval, - .maxlen = sizeof(unsigned int), + .procname = "pf_retrans", + .data = &init_net.sctp.pf_retrans, + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .extra1 = &zero, + .extra2 = &int_max }, { - .procname = "cookie_preserve_enable", - .data = &sctp_cookie_preserve_enable, + .procname = "sndbuf_policy", + .data = &init_net.sctp.sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "rto_alpha_exp_divisor", - .data = &sctp_rto_alpha, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "rto_beta_exp_divisor", - .data = &sctp_rto_beta, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "addip_enable", - .data = &sctp_addip_enable, + .procname = "rcvbuf_policy", + .data = &init_net.sctp.rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "default_auto_asconf", - .data = &sctp_default_auto_asconf, + .data = &init_net.sctp.default_auto_asconf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "prsctp_enable", - .data = &sctp_prsctp_enable, + .procname = "addip_enable", + .data = &init_net.sctp.addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "sack_timeout", - .data = &sctp_sack_timeout, + .procname = "addip_noauth_enable", + .data = &init_net.sctp.addip_noauth, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &sack_timer_min, - .extra2 = &sack_timer_max, - }, - { - .procname = "sctp_mem", - .data = &sysctl_sctp_mem, - .maxlen = sizeof(sysctl_sctp_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { - .procname = "sctp_rmem", - .data = &sysctl_sctp_rmem, - .maxlen = sizeof(sysctl_sctp_rmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sctp_wmem", - .data = &sysctl_sctp_wmem, - .maxlen = sizeof(sysctl_sctp_wmem), - .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "auth_enable", - .data = &sctp_auth_enable, + .procname = "prsctp_enable", + .data = &init_net.sctp.prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "addip_noauth_enable", - .data = &sctp_addip_noauth, + .procname = "auth_enable", + .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addr_scope_policy", - .data = &sctp_scope_policy, + .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -264,7 +269,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rwnd_update_shift", - .data = &sctp_rwnd_upd_shift, + .data = &init_net.sctp.rwnd_upd_shift, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -273,7 +278,7 @@ static ctl_table sctp_table[] = { }, { .procname = "max_autoclose", - .data = &sctp_max_autoclose, + .data = &init_net.sctp.max_autoclose, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, @@ -284,6 +289,27 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; +int sctp_sysctl_net_register(struct net *net) +{ + struct ctl_table *table; + int i; + + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + + net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + return 0; +} + +void sctp_sysctl_net_unregister(struct net *net) +{ + unregister_net_sysctl_table(net->sctp.sysctl_header); +} + static struct ctl_table_header * sctp_sysctl_header; /* Sysctl registration. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index c97472b248a2..953c21e4af97 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -59,7 +59,8 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, +static struct sctp_transport *sctp_transport_init(struct net *net, + struct sctp_transport *peer, const union sctp_addr *addr, gfp_t gfp) { @@ -76,7 +77,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(net->sctp.rto_initial); peer->last_time_heard = jiffies; peer->last_time_ecne_reduced = jiffies; @@ -86,8 +87,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, SPP_SACKDELAY_ENABLE; /* Initialize the default path max_retrans. */ - peer->pathmaxrxt = sctp_max_retrans_path; - peer->pf_retrans = sctp_pf_retrans; + peer->pathmaxrxt = net->sctp.max_retrans_path; + peer->pf_retrans = net->sctp.pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); @@ -109,7 +110,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, } /* Allocate and initialize a new transport. */ -struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, +struct sctp_transport *sctp_transport_new(struct net *net, + const union sctp_addr *addr, gfp_t gfp) { struct sctp_transport *transport; @@ -118,7 +120,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, if (!transport) goto fail; - if (!sctp_transport_init(transport, addr, gfp)) + if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; transport->malloced = 1; @@ -316,6 +318,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); if (tp->rttvar || tp->srtt) { + struct net *net = sock_net(tp->asoc->base.sk); /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -327,10 +330,10 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta) - + ((abs(tp->srtt - rtt)) >> sctp_rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha) - + (rtt >> sctp_rto_alpha); + tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) + + ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta); + tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) + + (rtt >> net->sctp.rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 33d894776192..10c018a5b9fe 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (rx_count >= asoc->base.sk->sk_rcvbuf) { if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || - (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) + (!sk_rmem_schedule(asoc->base.sk, chunk->skb, + chunk->skb->truesize))) goto fail; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f5a6a4f4faf7..360d8697b95c 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -326,7 +326,9 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) +static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, + struct sk_buff_head *queue, struct sk_buff *f_frag, + struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; @@ -394,7 +396,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu } event = sctp_skb2event(f_frag); - SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS); + SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS); return event; } @@ -493,7 +495,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { - retval = sctp_make_reassembled_event(&ulpq->reasm, + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm, pd_first, pd_last); if (retval) @@ -503,7 +506,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul done: return retval; found: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; @@ -563,7 +567,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -655,7 +660,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); return retval; } diff --git a/net/socket.c b/net/socket.c index dfe5b66c97e0..80dc7e84b046 100644 --- a/net/socket.c +++ b/net/socket.c @@ -88,6 +88,7 @@ #include <linux/nsproxy.h> #include <linux/magic.h> #include <linux/slab.h> +#include <linux/xattr.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -346,7 +347,8 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ -static int sock_alloc_file(struct socket *sock, struct file **f, int flags) +static int sock_alloc_file(struct socket *sock, struct file **f, int flags, + const char *dname) { struct qstr name = { .name = "" }; struct path path; @@ -357,6 +359,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; + if (dname) { + name.name = dname; + name.len = strlen(name.name); + } else if (sock->sk) { + name.name = sock->sk->sk_prot_creator->name; + name.len = strlen(name.name); + } path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); @@ -389,7 +398,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_file(sock, &newfile, flags); + int fd = sock_alloc_file(sock, &newfile, flags, NULL); if (likely(fd >= 0)) fd_install(fd, newfile); @@ -455,6 +464,68 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) return NULL; } +#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" +#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) +#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) +static ssize_t sockfs_getxattr(struct dentry *dentry, + const char *name, void *value, size_t size) +{ + const char *proto_name; + size_t proto_size; + int error; + + error = -ENODATA; + if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) { + proto_name = dentry->d_name.name; + proto_size = strlen(proto_name); + + if (value) { + error = -ERANGE; + if (proto_size + 1 > size) + goto out; + + strncpy(value, proto_name, proto_size + 1); + } + error = proto_size + 1; + } + +out: + return error; +} + +static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, + size_t size) +{ + ssize_t len; + ssize_t used = 0; + + len = security_inode_listsecurity(dentry->d_inode, buffer, size); + if (len < 0) + return len; + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + buffer += len; + } + + len = (XATTR_NAME_SOCKPROTONAME_LEN + 1); + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len); + buffer += len; + } + + return used; +} + +static const struct inode_operations sockfs_inode_ops = { + .getxattr = sockfs_getxattr, + .listxattr = sockfs_listxattr, +}; + /** * sock_alloc - allocate a socket * @@ -479,6 +550,7 @@ static struct socket *sock_alloc(void) inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); + inode->i_op = &sockfs_inode_ops; this_cpu_add(sockets_in_use, 1); return sock; @@ -1394,13 +1466,13 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_file(sock1, &newfile1, flags); + fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - fd2 = sock_alloc_file(sock2, &newfile2, flags); + fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL); if (unlikely(fd2 < 0)) { err = fd2; fput(newfile1); @@ -1536,7 +1608,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_file(newsock, &newfile, flags); + newfd = sock_alloc_file(newsock, &newfile, flags, + sock->sk->sk_prot_creator->name); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); @@ -2528,12 +2601,6 @@ static int __init sock_init(void) goto out; /* - * Initialize sock SLAB cache. - */ - - sk_init(); - - /* * Initialize skbuff SLAB cache */ skb_init(); @@ -2604,7 +2671,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) - err = compat_put_timeval(up, &ktv); + err = compat_put_timeval(&ktv, up); return err; } @@ -2620,7 +2687,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) - err = compat_put_timespec(up, &kts); + err = compat_put_timespec(&kts, up); return err; } @@ -2657,6 +2724,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; + memset(&ifc, 0, sizeof(ifc)); if (ifc32.ifcbuf == 0) { ifc32.ifc_len = 0; ifc.ifc_len = 0; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 9fe8857d8d59..03d03e37a7d5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. +config SUNRPC_SWAP + bool + depends on SUNRPC + select NETVM + config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" depends on SUNRPC && CRYPTO diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 727e506cacda..b5c067bccc45 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/hash.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/gss_api.h> #include <linux/spinlock.h> #ifdef RPC_DEBUG @@ -122,6 +123,59 @@ rpcauth_unregister(const struct rpc_authops *ops) } EXPORT_SYMBOL_GPL(rpcauth_unregister); +/** + * rpcauth_list_flavors - discover registered flavors and pseudoflavors + * @array: array to fill in + * @size: size of "array" + * + * Returns the number of array items filled in, or a negative errno. + * + * The returned array is not sorted by any policy. Callers should not + * rely on the order of the items in the returned array. + */ +int +rpcauth_list_flavors(rpc_authflavor_t *array, int size) +{ + rpc_authflavor_t flavor; + int result = 0; + + spin_lock(&rpc_authflavor_lock); + for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) { + const struct rpc_authops *ops = auth_flavors[flavor]; + rpc_authflavor_t pseudos[4]; + int i, len; + + if (result >= size) { + result = -ENOMEM; + break; + } + + if (ops == NULL) + continue; + if (ops->list_pseudoflavors == NULL) { + array[result++] = ops->au_flavor; + continue; + } + len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos)); + if (len < 0) { + result = len; + break; + } + for (i = 0; i < len; i++) { + if (result >= size) { + result = -ENOMEM; + break; + } + array[result++] = pseudos[i]; + } + } + spin_unlock(&rpc_authflavor_lock); + + dprintk("RPC: %s returns %d\n", __func__, result); + return result; +} +EXPORT_SYMBOL_GPL(rpcauth_list_flavors); + struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d3ad81f8da5b..34c522021004 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1619,6 +1619,7 @@ static const struct rpc_authops authgss_ops = { .crcreate = gss_create_cred, .pipes_create = gss_pipes_dentries_create, .pipes_destroy = gss_pipes_dentries_destroy, + .list_pseudoflavors = gss_mech_list_pseudoflavors, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 782bfe1b6465..b174fcd9ff4c 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -239,14 +239,28 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); -int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) +/** + * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors + * @array: array to fill in + * @size: size of "array" + * + * Returns the number of array items filled in, or a negative errno. + * + * The returned array is not sorted by any policy. Callers should not + * rely on the order of the items in the returned array. + */ +int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) { struct gss_api_mech *pos = NULL; int j, i = 0; spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { - for (j=0; j < pos->gm_pf_num; j++) { + for (j = 0; j < pos->gm_pf_num; j++) { + if (i >= size) { + spin_unlock(®istered_mechs_lock); + return -ENOMEM; + } array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; } } @@ -254,8 +268,6 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) return i; } -EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors); - u32 gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) { diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 47ad2666fdf6..2a68bb3db772 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1349,8 +1349,11 @@ static int c_show(struct seq_file *m, void *p) if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ seq_printf(m, "# "); - else + else { + if (cache_is_expired(cd, cp)) + seq_printf(m, "# "); cache_put(cp, cd); + } return cd->cache_show(m, cd, cp); } @@ -1632,7 +1635,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) void __init cache_initialize(void) { - INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); + INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean); } int cache_register_net(struct cache_detail *cd, struct net *net) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 00eb859b7de5..fa48c60aef23 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; + if (sk_memalloc_socks()) { + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + if (xprt->swapper) + task->tk_flags |= RPC_TASK_SWAPPER; + rcu_read_unlock(); + } /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -1844,12 +1853,13 @@ call_timeout(struct rpc_task *task) return; } if (RPC_IS_SOFT(task)) { - if (clnt->cl_chatty) + if (clnt->cl_chatty) { rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, rcu_dereference(clnt->cl_xprt)->servername); rcu_read_unlock(); + } if (task->tk_flags & RPC_TASK_TIMEOUT) rpc_exit(task, -ETIMEDOUT); else diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 92509ffe15fc..a70acae496e4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -251,7 +251,7 @@ static int rpcb_create_local_unix(struct net *net) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL rpcbind " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); goto out; } @@ -298,7 +298,7 @@ static int rpcb_create_local_net(struct net *net) if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); goto out; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 994cfea2bad6..128494ec9a64 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -300,8 +300,9 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); /* * Make an RPC task runnable. * - * Note: If the task is ASYNC, this must be called with - * the spinlock held to protect the wait queue operation. + * Note: If the task is ASYNC, and is being made runnable after sitting on an + * rpc_wait_queue, this must be called with the queue spinlock held to protect + * the wait queue operation. */ static void rpc_make_runnable(struct rpc_task *task) { @@ -790,7 +791,9 @@ void rpc_execute(struct rpc_task *task) static void rpc_async_schedule(struct work_struct *work) { + current->flags |= PF_FSTRANS; __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); + current->flags &= ~PF_FSTRANS; } /** @@ -812,7 +815,10 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; + gfp_t gfp = GFP_NOWAIT; + + if (RPC_IS_SWAPPER(task)) + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) @@ -886,7 +892,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); } /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 88f2bf671960..bac973a31367 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -316,7 +316,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_serv *serv = xprt->xpt_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; @@ -362,8 +361,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp, rqstp->rq_xprt); rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; wake_up(&rqstp->rq_wait); } else { @@ -640,8 +637,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (xprt) { rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); /* As there is a shortage of threads and this request * had to be queued, don't allow the thread to wait so @@ -738,6 +733,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) else len = xprt->xpt_ops->xpo_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); + rqstp->rq_reserved = serv->sv_max_mesg; + atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } svc_xprt_received(xprt); @@ -794,7 +791,8 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (test_bit(XPT_DEAD, &xprt->xpt_flags) + || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; else len = xprt->xpt_ops->xpo_sendto(rqstp); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 18bc130255a7..998aa8c1807c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1129,9 +1129,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len >= 0) svsk->sk_tcplen += len; if (len != want) { + svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) goto err_other; - svc_tcp_save_pages(svsk, rqstp); dprintk("svc: incomplete TCP record (%d of %d)\n", svsk->sk_tcplen, svsk->sk_reclen); goto err_noclose; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 0cf165580d8d..0afba1b4b656 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -129,34 +129,6 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) EXPORT_SYMBOL_GPL(xdr_terminate_string); void -xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, - unsigned int len) -{ - struct kvec *tail = xdr->tail; - u32 *p; - - xdr->pages = pages; - xdr->page_base = base; - xdr->page_len = len; - - p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len); - tail->iov_base = p; - tail->iov_len = 0; - - if (len & 3) { - unsigned int pad = 4 - (len & 3); - - *p = 0; - tail->iov_base = (char *)p + (len & 3); - tail->iov_len = pad; - len += pad; - } - xdr->buflen += len; - xdr->len += len; -} -EXPORT_SYMBOL_GPL(xdr_encode_pages); - -void xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, struct page **pages, unsigned int base, unsigned int len) { @@ -457,6 +429,16 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) EXPORT_SYMBOL_GPL(xdr_shift_buf); /** + * xdr_stream_pos - Return the current offset from the start of the xdr_stream + * @xdr: pointer to struct xdr_stream + */ +unsigned int xdr_stream_pos(const struct xdr_stream *xdr) +{ + return (unsigned int)(XDR_QUADLEN(xdr->buf->len) - xdr->nwords) << 2; +} +EXPORT_SYMBOL_GPL(xdr_stream_pos); + +/** * xdr_init_encode - Initialize a struct xdr_stream for sending data. * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer in which to encode data @@ -556,13 +538,11 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b EXPORT_SYMBOL_GPL(xdr_write_pages); static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov, - __be32 *p, unsigned int len) + unsigned int len) { if (len > iov->iov_len) len = iov->iov_len; - if (p == NULL) - p = (__be32*)iov->iov_base; - xdr->p = p; + xdr->p = (__be32*)iov->iov_base; xdr->end = (__be32*)(iov->iov_base + len); xdr->iov = iov; xdr->page_ptr = NULL; @@ -609,7 +589,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase -= xdr->buf->page_base; if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -618,7 +598,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); } return xdr->p != xdr->end; } @@ -634,10 +614,15 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) xdr->buf = buf; xdr->scratch.iov_base = NULL; xdr->scratch.iov_len = 0; + xdr->nwords = XDR_QUADLEN(buf->len); if (buf->head[0].iov_len != 0) - xdr_set_iov(xdr, buf->head, p, buf->len); + xdr_set_iov(xdr, buf->head, buf->len); else if (buf->page_len != 0) xdr_set_page_base(xdr, 0, buf->len); + if (p != NULL && p > xdr->p && xdr->end >= p) { + xdr->nwords -= p - xdr->p; + xdr->p = p; + } } EXPORT_SYMBOL_GPL(xdr_init_decode); @@ -662,12 +647,14 @@ EXPORT_SYMBOL_GPL(xdr_init_decode_pages); static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { + unsigned int nwords = XDR_QUADLEN(nbytes); __be32 *p = xdr->p; - __be32 *q = p + XDR_QUADLEN(nbytes); + __be32 *q = p + nwords; - if (unlikely(q > xdr->end || q < p)) + if (unlikely(nwords > xdr->nwords || q > xdr->end || q < p)) return NULL; xdr->p = q; + xdr->nwords -= nwords; return p; } @@ -734,6 +721,31 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_inline_decode); +static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *iov; + unsigned int nwords = XDR_QUADLEN(len); + unsigned int cur = xdr_stream_pos(xdr); + + if (xdr->nwords == 0) + return 0; + if (nwords > xdr->nwords) { + nwords = xdr->nwords; + len = nwords << 2; + } + /* Realign pages to current pointer position */ + iov = buf->head; + if (iov->iov_len > cur) + xdr_shrink_bufhead(buf, iov->iov_len - cur); + + /* Truncate page data and move it into the tail */ + if (buf->page_len > len) + xdr_shrink_pagelen(buf, buf->page_len - len); + xdr->nwords = XDR_QUADLEN(buf->len - cur); + return len; +} + /** * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position * @xdr: pointer to xdr_stream struct @@ -742,39 +754,37 @@ EXPORT_SYMBOL_GPL(xdr_inline_decode); * Moves data beyond the current pointer position from the XDR head[] buffer * into the page list. Any data that lies beyond current position + "len" * bytes is moved into the XDR tail[]. + * + * Returns the number of XDR encoded bytes now contained in the pages */ -void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) +unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) { struct xdr_buf *buf = xdr->buf; struct kvec *iov; - ssize_t shift; + unsigned int nwords; unsigned int end; - int padding; + unsigned int padding; - /* Realign pages to current pointer position */ - iov = buf->head; - shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p; - if (shift > 0) - xdr_shrink_bufhead(buf, shift); - - /* Truncate page data and move it into the tail */ - if (buf->page_len > len) - xdr_shrink_pagelen(buf, buf->page_len - len); - padding = (XDR_QUADLEN(len) << 2) - len; + len = xdr_align_pages(xdr, len); + if (len == 0) + return 0; + nwords = XDR_QUADLEN(len); + padding = (nwords << 2) - len; xdr->iov = iov = buf->tail; /* Compute remaining message length. */ - end = iov->iov_len; - shift = buf->buflen - buf->len; - if (shift < end) - end -= shift; - else if (shift > 0) - end = 0; + end = ((xdr->nwords - nwords) << 2) + padding; + if (end > iov->iov_len) + end = iov->iov_len; + /* * Position current pointer at beginning of tail, and * set remaining message length. */ xdr->p = (__be32 *)((char *)iov->iov_base + padding); xdr->end = (__be32 *)((char *)iov->iov_base + end); + xdr->page_ptr = NULL; + xdr->nwords = XDR_QUADLEN(end - padding); + return len; } EXPORT_SYMBOL_GPL(xdr_read_pages); @@ -790,12 +800,13 @@ EXPORT_SYMBOL_GPL(xdr_read_pages); */ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) { - xdr_read_pages(xdr, len); + len = xdr_align_pages(xdr, len); /* * Position current pointer at beginning of tail, and * set remaining message length. */ - xdr_set_page_base(xdr, 0, len); + if (len != 0) + xdr_set_page_base(xdr, 0, len); } EXPORT_SYMBOL_GPL(xdr_enter_page); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a5a402a7d21f..5d7f61d7559c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -969,11 +969,11 @@ static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) return false; } -static void xprt_alloc_slot(struct rpc_task *task) +void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; + spin_lock(&xprt->reserve_lock); if (!list_empty(&xprt->free)) { req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); list_del(&req->rq_list); @@ -994,12 +994,29 @@ static void xprt_alloc_slot(struct rpc_task *task) default: task->tk_status = -EAGAIN; } + spin_unlock(&xprt->reserve_lock); return; out_init_req: task->tk_status = 0; task->tk_rqstp = req; xprt_request_init(task, xprt); + spin_unlock(&xprt->reserve_lock); +} +EXPORT_SYMBOL_GPL(xprt_alloc_slot); + +void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) +{ + /* Note: grabbing the xprt_lock_write() ensures that we throttle + * new slot allocation if the transport is congested (i.e. when + * reconnecting a stream transport or when out of socket write + * buffer space). + */ + if (xprt_lock_write(xprt, task)) { + xprt_alloc_slot(xprt, task); + xprt_release_write(xprt, task); + } } +EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { @@ -1083,20 +1100,9 @@ void xprt_reserve(struct rpc_task *task) if (task->tk_rqstp != NULL) return; - /* Note: grabbing the xprt_lock_write() here is not strictly needed, - * but ensures that we throttle new slot allocation if the transport - * is congested (e.g. if reconnecting or if we're out of socket - * write buffer space). - */ task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (!xprt_lock_write(xprt, task)) - return; - - spin_lock(&xprt->reserve_lock); - xprt_alloc_slot(task); - spin_unlock(&xprt->reserve_lock); - xprt_release_write(xprt, task); + xprt->ops->alloc_slot(xprt, task); } static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index b446e100286f..5d9202dc7cb1 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work) int rc = 0; if (!xprt->shutdown) { + current->flags |= PF_FSTRANS; xprt_clear_connected(xprt); dprintk("RPC: %s: %sconnect\n", __func__, @@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work) out: xprt_wake_pending_tasks(xprt, rc); - out_clear: dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; } /* @@ -712,6 +713,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xprt_rdma_procs = { .reserve_xprt = xprt_rdma_reserve_xprt, .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ + .alloc_slot = xprt_alloc_slot, .release_request = xprt_release_rqst_cong, /* ditto */ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 62d0dac8f780..a35b8e52e551 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1892,6 +1892,8 @@ static void xs_local_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); status = __sock_create(xprt->xprt_net, AF_LOCAL, SOCK_STREAM, 0, &sock, 1); @@ -1925,7 +1927,47 @@ static void xs_local_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; +} + +#ifdef CONFIG_SUNRPC_SWAP +static void xs_set_memalloc(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (xprt->swapper) + sk_set_memalloc(transport->inet); +} + +/** + * xs_swapper - Tag this transport as being used for swap. + * @xprt: transport to tag + * @enable: enable/disable + * + */ +int xs_swapper(struct rpc_xprt *xprt, int enable) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + int err = 0; + + if (enable) { + xprt->swapper++; + xs_set_memalloc(xprt); + } else if (xprt->swapper) { + xprt->swapper--; + sk_clear_memalloc(transport->inet); + } + + return err; +} +EXPORT_SYMBOL_GPL(xs_swapper); +#else +static void xs_set_memalloc(struct rpc_xprt *xprt) +{ } +#endif static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { @@ -1951,6 +1993,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->sock = sock; transport->inet = sk; + xs_set_memalloc(xprt); + write_unlock_bh(&sk->sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1967,6 +2011,8 @@ static void xs_udp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + /* Start by resetting any existing state */ xs_reset_transport(transport); sock = xs_create_sock(xprt, transport, @@ -1985,6 +2031,7 @@ static void xs_udp_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /* @@ -2075,6 +2122,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!xprt_bound(xprt)) goto out; + xs_set_memalloc(xprt); + /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; @@ -2110,6 +2159,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); sock = xs_create_sock(xprt, transport, @@ -2159,6 +2210,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EINPROGRESS: case -EALREADY: xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; return; case -EINVAL: /* Happens, for instance, if the user specified a link @@ -2171,6 +2223,7 @@ out_eagain: out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /** @@ -2420,6 +2473,7 @@ static void bc_destroy(struct rpc_xprt *xprt) static struct rpc_xprt_ops xs_local_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, + .alloc_slot = xprt_alloc_slot, .rpcbind = xs_local_rpcbind, .set_port = xs_local_set_port, .connect = xs_connect, @@ -2436,6 +2490,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .alloc_slot = xprt_alloc_slot, .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, @@ -2453,6 +2508,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, + .alloc_slot = xprt_lock_and_alloc_slot, .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 09e71241265d..4ec5c80e8a7c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -49,21 +49,6 @@ struct tipc_bearer tipc_bearers[MAX_BEARERS]; static void bearer_disable(struct tipc_bearer *b_ptr); /** - * media_name_valid - validate media name - * - * Returns 1 if media name is valid, otherwise 0. - */ -static int media_name_valid(const char *name) -{ - u32 len; - - len = strlen(name); - if ((len + 1) > TIPC_MAX_MEDIA_NAME) - return 0; - return strspn(name, tipc_alphabet) == len; -} - -/** * tipc_media_find - locates specified media object by name */ struct tipc_media *tipc_media_find(const char *name) @@ -102,7 +87,7 @@ int tipc_register_media(struct tipc_media *m_ptr) write_lock_bh(&tipc_net_lock); - if (!media_name_valid(m_ptr->name)) + if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) goto exit; if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) || !m_ptr->bcast_addr.broadcast) @@ -206,9 +191,7 @@ static int bearer_name_validate(const char *name, /* validate component parts of bearer name */ if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || - (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME) || - (strspn(media_name, tipc_alphabet) != (media_len - 1)) || - (strspn(if_name, tipc_alphabet) != (if_len - 1))) + (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) return 0; /* return bearer name components, if necessary */ diff --git a/net/tipc/config.c b/net/tipc/config.c index a056a3852f71..f67866c765dd 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2012, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -208,36 +208,6 @@ static struct sk_buff *cfg_set_remote_mng(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_publications(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value < 1 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max publications must be 1-65535)"); - tipc_max_publications = value; - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_max_subscriptions(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value < 1 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max subscriptions must be 1-65535"); - tipc_max_subscriptions = value; - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_max_ports(void) { u32 value; @@ -357,12 +327,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_PORTS: rep_tlv_buf = cfg_set_max_ports(); break; - case TIPC_CMD_SET_MAX_PUBL: - rep_tlv_buf = cfg_set_max_publications(); - break; - case TIPC_CMD_SET_MAX_SUBSCR: - rep_tlv_buf = cfg_set_max_subscriptions(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -372,12 +336,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_PORTS: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); break; - case TIPC_CMD_GET_MAX_PUBL: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications); - break; - case TIPC_CMD_GET_MAX_SUBSCR: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -393,6 +351,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_CLUSTERS: case TIPC_CMD_SET_MAX_NODES: case TIPC_CMD_GET_MAX_NODES: + case TIPC_CMD_SET_MAX_SUBSCR: + case TIPC_CMD_GET_MAX_SUBSCR: + case TIPC_CMD_SET_MAX_PUBL: + case TIPC_CMD_GET_MAX_PUBL: case TIPC_CMD_SET_LOG_SIZE: case TIPC_CMD_DUMP_LOG: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED diff --git a/net/tipc/core.c b/net/tipc/core.c index 6586eac6a50e..bfe8af88469a 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -48,18 +48,13 @@ /* global variables used by multiple sub-systems within TIPC */ -int tipc_random; - -const char tipc_alphabet[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."; +int tipc_random __read_mostly; /* configurable TIPC parameters */ -u32 tipc_own_addr; -int tipc_max_ports; -int tipc_max_subscriptions; -int tipc_max_publications; -int tipc_net_id; -int tipc_remote_management; +u32 tipc_own_addr __read_mostly; +int tipc_max_ports __read_mostly; +int tipc_net_id __read_mostly; +int tipc_remote_management __read_mostly; /** @@ -101,9 +96,8 @@ int tipc_core_start_net(unsigned long addr) { int res; - res = tipc_net_start(addr); - if (!res) - res = tipc_eth_media_start(); + tipc_net_start(addr); + res = tipc_eth_media_start(); if (res) tipc_core_stop_net(); return res; @@ -160,8 +154,6 @@ static int __init tipc_init(void) tipc_own_addr = 0; tipc_remote_management = 1; - tipc_max_publications = 10000; - tipc_max_subscriptions = 2000; tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; diff --git a/net/tipc/core.h b/net/tipc/core.h index fd42e106c185..0207db04179a 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -60,7 +60,9 @@ #define TIPC_MOD_VER "2.0.0" -#define ULTRA_STRING_MAX_LEN 32768 +#define ULTRA_STRING_MAX_LEN 32768 +#define TIPC_MAX_SUBSCRIPTIONS 65535 +#define TIPC_MAX_PUBLICATIONS 65535 struct tipc_msg; /* msg.h */ @@ -74,19 +76,15 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...); /* * Global configuration variables */ -extern u32 tipc_own_addr; -extern int tipc_max_ports; -extern int tipc_max_subscriptions; -extern int tipc_max_publications; -extern int tipc_net_id; -extern int tipc_remote_management; +extern u32 tipc_own_addr __read_mostly; +extern int tipc_max_ports __read_mostly; +extern int tipc_net_id __read_mostly; +extern int tipc_remote_management __read_mostly; /* * Other global variables */ -extern int tipc_random; -extern const char tipc_alphabet[]; - +extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 90ac9bfa7abb..2132c1ef2951 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -46,19 +46,30 @@ * @bearer: ptr to associated "generic" bearer structure * @dev: ptr to associated Ethernet network device * @tipc_packet_type: used in binding TIPC to Ethernet driver + * @setup: work item used when enabling bearer * @cleanup: work item used when disabling bearer */ struct eth_bearer { struct tipc_bearer *bearer; struct net_device *dev; struct packet_type tipc_packet_type; + struct work_struct setup; struct work_struct cleanup; }; static struct tipc_media eth_media_info; static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; -static struct notifier_block notifier; + +static int recv_notification(struct notifier_block *nb, unsigned long evt, + void *dv); +/* + * Network device notifier info + */ +static struct notifier_block notifier = { + .notifier_call = recv_notification, + .priority = 0 +}; /** * eth_media_addr_set - initialize Ethernet media address structure @@ -134,6 +145,17 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, } /** + * setup_bearer - setup association between Ethernet bearer and interface + */ +static void setup_bearer(struct work_struct *work) +{ + struct eth_bearer *eb_ptr = + container_of(work, struct eth_bearer, setup); + + dev_add_pack(&eb_ptr->tipc_packet_type); +} + +/** * enable_bearer - attach TIPC bearer to an Ethernet interface */ static int enable_bearer(struct tipc_bearer *tb_ptr) @@ -173,7 +195,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) eb_ptr->tipc_packet_type.func = recv_msg; eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - dev_add_pack(&eb_ptr->tipc_packet_type); + INIT_WORK(&eb_ptr->setup, setup_bearer); + schedule_work(&eb_ptr->setup); /* Associate TIPC bearer with Ethernet bearer */ eb_ptr->bearer = tb_ptr; @@ -357,8 +380,6 @@ int tipc_eth_media_start(void) if (res) return res; - notifier.notifier_call = &recv_notification; - notifier.priority = 0; res = register_netdevice_notifier(¬ifier); if (!res) eth_started = 1; diff --git a/net/tipc/handler.c b/net/tipc/handler.c index 7a52d3922f3c..111ff8300ae5 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -45,7 +45,7 @@ struct queue_item { static struct kmem_cache *tipc_queue_item_cache; static struct list_head signal_queue_head; static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled; +static int handler_enabled __read_mostly; static void process_signal_queue(unsigned long dummy); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1c1e6151875e..a79c755cb417 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -210,9 +210,7 @@ static int link_name_validate(const char *name, (z_local > 255) || (c_local > 4095) || (n_local > 4095) || (z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) || (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) || - (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME) || - (strspn(if_local, tipc_alphabet) != (if_local_len - 1)) || - (strspn(if_peer, tipc_alphabet) != (if_peer_len - 1))) + (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME)) return 0; /* return link name components, if necessary */ diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 360c478b0b53..46754779fd3d 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -41,7 +41,7 @@ #include "subscr.h" #include "port.h" -static int tipc_nametbl_size = 1024; /* must be a power of 2 */ +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ /** * struct name_info - name sequence publication info @@ -114,7 +114,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock); static int hash(int x) { - return x & (tipc_nametbl_size - 1); + return x & (TIPC_NAMETBL_SIZE - 1); } /** @@ -667,9 +667,9 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, { struct publication *publ; - if (table.local_publ_count >= tipc_max_publications) { + if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", - tipc_max_publications); + TIPC_MAX_PUBLICATIONS); return NULL; } @@ -783,7 +783,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, if (!list_is_last(&publ->zone_list, &info->zone_list)) ret += tipc_snprintf(buf + ret, len - ret, "\n%33s", " "); - }; + } ret += tipc_snprintf(buf + ret, len - ret, "\n"); return ret; @@ -871,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, ret += nametbl_header(buf, len, depth); lowbound = 0; upbound = ~0; - for (i = 0; i < tipc_nametbl_size; i++) { + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { seq_head = &table.types[i]; hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, @@ -935,7 +935,7 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) int tipc_nametbl_init(void) { - table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head), + table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head), GFP_ATOMIC); if (!table.types) return -ENOMEM; @@ -953,7 +953,7 @@ void tipc_nametbl_stop(void) /* Verify name table is empty, then release it */ write_lock_bh(&tipc_nametbl_lock); - for (i = 0; i < tipc_nametbl_size; i++) { + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&table.types[i])) continue; pr_err("nametbl_stop(): orphaned hash chain detected\n"); diff --git a/net/tipc/net.c b/net/tipc/net.c index 5b5cea259caf..7d305ecc09c2 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -171,7 +171,7 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_send(buf, dnode, msg_link_selector(msg)); } -int tipc_net_start(u32 addr) +void tipc_net_start(u32 addr) { char addr_string[16]; @@ -187,7 +187,6 @@ int tipc_net_start(u32 addr) pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); - return 0; } void tipc_net_stop(void) diff --git a/net/tipc/net.h b/net/tipc/net.h index 9eb4b9e220eb..079daadb3f72 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -41,7 +41,7 @@ extern rwlock_t tipc_net_lock; void tipc_net_route_msg(struct sk_buff *buf); -int tipc_net_start(u32 addr); +void tipc_net_start(u32 addr); void tipc_net_stop(void); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 47a839df27dc..6675914dc592 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid); + genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); } return 0; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 09dc5b97e079..fd5f042dbff4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -220,6 +220,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; + sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2; tipc_sk(sk)->p = tp_ptr; tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 5ed5965eb0be..0f7d0d007e22 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -304,9 +304,9 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) { + if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", - tipc_max_subscriptions); + TIPC_MAX_SUBSCRIPTIONS); subscr_terminate(subscriber); return NULL; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 79981d97bc9c..5b5c876c80e9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -441,7 +441,7 @@ static int unix_release_sock(struct sock *sk, int embrion) /* ---- Socket is dead now and most probably destroyed ---- */ /* - * Fixme: BSD difference: In BSD all sockets connected to use get + * Fixme: BSD difference: In BSD all sockets connected to us get * ECONNRESET and we die on the spot. In Linux we behave * like files and pipes do and wait for the last * dereference. @@ -481,7 +481,6 @@ static int unix_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); struct pid *old_pid = NULL; - const struct cred *old_cred = NULL; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) @@ -503,8 +502,6 @@ static int unix_listen(struct socket *sock, int backlog) out_unlock: unix_state_unlock(sk); put_pid(old_pid); - if (old_cred) - put_cred(old_cred); out: return err; } @@ -823,6 +820,34 @@ fail: return NULL; } +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) +{ + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; + + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); + if (!err) { + err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); + if (!err) { + res->mnt = mntget(path.mnt); + res->dentry = dget(dentry); + } + } + done_path_create(&path, dentry); + return err; +} static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -831,8 +856,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - struct dentry *dentry = NULL; - struct path path; int err; unsigned int hash; struct unix_address *addr; @@ -869,43 +892,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - umode_t mode; - err = 0; - /* - * Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_mknod_parent; - - /* - * All right, let's create it. - */ - mode = S_IFSOCK | + struct path path; + umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = mnt_want_write(path.mnt); - if (err) - goto out_mknod_dput; - err = security_path_mknod(&path, dentry, mode, 0); - if (err) - goto out_mknod_drop_write; - err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); -out_mknod_drop_write: - mnt_drop_write(path.mnt); - if (err) - goto out_mknod_dput; - mutex_unlock(&path.dentry->d_inode->i_mutex); - dput(path.dentry); - path.dentry = dentry; - + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + unix_release_addr(addr); + goto out_up; + } addr->hash = UNIX_HASH_SIZE; - } - - spin_lock(&unix_table_lock); - - if (!sun_path[0]) { + hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1); + spin_lock(&unix_table_lock); + u->path = path; + list = &unix_socket_table[hash]; + } else { + spin_lock(&unix_table_lock); err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { @@ -914,9 +917,6 @@ out_mknod_drop_write: } list = &unix_socket_table[addr->hash]; - } else { - list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; - u->path = path; } err = 0; @@ -930,16 +930,6 @@ out_up: mutex_unlock(&u->readlock); out: return err; - -out_mknod_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); -out_mknod_parent: - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) @@ -1457,7 +1447,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1626,7 +1616,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -2067,10 +2057,14 @@ static int unix_shutdown(struct socket *sock, int mode) struct sock *sk = sock->sk; struct sock *other; - mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); - - if (!mode) - return 0; + if (mode < SHUT_RD || mode > SHUT_RDWR) + return -EINVAL; + /* This maps: + * SHUT_RD (0) -> RCV_SHUTDOWN (1) + * SHUT_WR (1) -> SEND_SHUTDOWN (2) + * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) + */ + ++mode; unix_state_lock(sk); sk->sk_shutdown |= mode; diff --git a/net/unix/diag.c b/net/unix/diag.c index 750b13408449..06748f108a57 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -110,12 +110,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), flags); if (!nlh) return -EMSGSIZE; @@ -159,7 +159,7 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -170,7 +170,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -200,7 +200,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(req->udiag_states & (1 << sk->sk_state))) goto next; if (sk_diag_dump(sk, skb, req, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) goto done; @@ -267,7 +267,7 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); @@ -277,7 +277,7 @@ again: goto again; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index d355f67d0cdd..2f876b9ee344 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -105,7 +105,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, ASSERT_WDEV_LOCK(wdev); - if (!netif_running(wdev->netdev)) + if (wdev->netdev && !netif_running(wdev->netdev)) return; switch (wdev->iftype) { @@ -143,6 +143,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, case NL80211_IFTYPE_WDS: /* these interface types don't really have a channel */ return; + case NL80211_IFTYPE_P2P_DEVICE: + if (wdev->wiphy->features & + NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL) + *chanmode = CHAN_MODE_EXCLUSIVE; + return; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: WARN_ON(1); diff --git a/net/wireless/core.c b/net/wireless/core.c index 31b40cc4a9c3..443d4d7deea2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -230,9 +230,24 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) - if (wdev->netdev) + list_for_each_entry(wdev, &rdev->wdev_list, list) { + if (wdev->netdev) { dev_close(wdev->netdev); + continue; + } + /* otherwise, check iftype */ + switch (wdev->iftype) { + case NL80211_IFTYPE_P2P_DEVICE: + if (!wdev->p2p_started) + break; + rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); + wdev->p2p_started = false; + rdev->opencount--; + break; + default: + break; + } + } mutex_unlock(&rdev->devlist_mtx); rtnl_unlock(); @@ -407,6 +422,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) if (WARN_ON(wiphy->software_iftypes & types)) return -EINVAL; + /* Only a single P2P_DEVICE can be allowed */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) && + c->limits[j].max > 1)) + return -EINVAL; + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -734,6 +754,35 @@ static void wdev_cleanup_work(struct work_struct *work) dev_put(wdev->netdev); } +void cfg80211_unregister_wdev(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + ASSERT_RTNL(); + + if (WARN_ON(wdev->netdev)) + return; + + mutex_lock(&rdev->devlist_mtx); + list_del_rcu(&wdev->list); + rdev->devlist_generation++; + + switch (wdev->iftype) { + case NL80211_IFTYPE_P2P_DEVICE: + if (!wdev->p2p_started) + break; + rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); + wdev->p2p_started = false; + rdev->opencount--; + break; + default: + WARN_ON_ONCE(1); + break; + } + mutex_unlock(&rdev->devlist_mtx); +} +EXPORT_SYMBOL(cfg80211_unregister_wdev); + static struct device_type wiphy_type = { .name = "wlan", }; @@ -952,6 +1001,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, */ synchronize_rcu(); INIT_LIST_HEAD(&wdev->list); + /* + * Ensure that all events have been processed and + * freed. + */ + cfg80211_process_wdev_events(wdev); break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) diff --git a/net/wireless/core.h b/net/wireless/core.h index 5206c6844fd7..a343be4a52bd 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -55,7 +55,7 @@ struct cfg80211_registered_device { int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; - u32 ap_beacons_nlpid; + u32 ap_beacons_nlportid; /* protected by RTNL only */ int num_running_ifaces; @@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, u32 *flags, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); +void cfg80211_process_wdev_events(struct wireless_dev *wdev); int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 1cdb1d5e6b0f..8016fee0752b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -612,10 +612,21 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) } EXPORT_SYMBOL(cfg80211_del_sta); +void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, + enum nl80211_connect_failed_reason reason, + gfp_t gfp) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp); +} +EXPORT_SYMBOL(cfg80211_conn_failed); + struct cfg80211_mgmt_registration { struct list_head list; - u32 nlpid; + u32 nlportid; int match_len; @@ -624,7 +635,7 @@ struct cfg80211_mgmt_registration { u8 match[]; }; -int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, int match_len) { @@ -672,7 +683,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; - nreg->nlpid = snd_pid; + nreg->nlportid = snd_portid; nreg->frame_type = cpu_to_le16(frame_type); list_add(&nreg->list, &wdev->mgmt_registrations); @@ -685,7 +696,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, return err; } -void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); @@ -694,7 +705,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_lock_bh(&wdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { - if (reg->nlpid != nlpid) + if (reg->nlportid != nlportid) continue; if (rdev->ops->mgmt_frame_register) { @@ -710,8 +721,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_unlock_bh(&wdev->mgmt_registrations_lock); - if (nlpid == wdev->ap_unexpected_nlpid) - wdev->ap_unexpected_nlpid = 0; + if (nlportid == wdev->ap_unexpected_nlportid) + wdev->ap_unexpected_nlportid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) @@ -736,7 +747,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct net_device *dev = wdev->netdev; const struct ieee80211_mgmt *mgmt; u16 stype; @@ -796,7 +806,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_AP_VLAN: - if (!ether_addr_equal(mgmt->bssid, dev->dev_addr)) + if (!ether_addr_equal(mgmt->bssid, wdev_address(wdev))) err = -EINVAL; break; case NL80211_IFTYPE_MESH_POINT: @@ -809,6 +819,11 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * cfg80211 doesn't track the stations */ break; + case NL80211_IFTYPE_P2P_DEVICE: + /* + * fall through, P2P device only supports + * public action frames + */ default: err = -EOPNOTSUPP; break; @@ -819,7 +834,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return err; } - if (!ether_addr_equal(mgmt->sa, dev->dev_addr)) + if (!ether_addr_equal(mgmt->sa, wdev_address(wdev))) return -EINVAL; /* Transmit the Action frame as requested by user space */ @@ -868,7 +883,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_mgmt(rdev, wdev, reg->nlpid, + if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, freq, sig_mbm, buf, len, gfp)) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1e37dbf00cb3..0418a6d5c1a6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -496,11 +496,11 @@ static bool is_valid_ie_attr(const struct nlattr *attr) } /* message building helper */ -static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ - return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd); + return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd); } static int nl80211_msg_put_channel(struct sk_buff *msg, @@ -851,7 +851,7 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { void *hdr; @@ -866,7 +866,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -1; @@ -1100,6 +1100,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) goto nla_put_failure; } + CMD(start_p2p_device, START_P2P_DEVICE); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -1266,7 +1267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++idx <= start) continue; - if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) { idx--; @@ -1289,7 +1290,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) { + if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -1735,26 +1736,26 @@ static inline u64 wdev_id(struct wireless_dev *wdev) ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); } -static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) return -1; if (dev && (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || - nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, dev->dev_addr))) + nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name))) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2))) @@ -1806,7 +1807,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; continue; } - if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { mutex_unlock(&rdev->devlist_mtx); @@ -1837,7 +1838,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, dev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2021,8 +2022,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(wdev); } - if (type == NL80211_IFTYPE_MESH_POINT && - info->attrs[NL80211_ATTR_MESH_ID]) { + switch (type) { + case NL80211_IFTYPE_MESH_POINT: + if (!info->attrs[NL80211_ATTR_MESH_ID]) + break; wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -2031,9 +2034,29 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->mesh_id_up_len); wdev_unlock(wdev); + break; + case NL80211_IFTYPE_P2P_DEVICE: + /* + * P2P Device doesn't have a netdev, so doesn't go + * through the netdev notifier and must be added here + */ + mutex_init(&wdev->mtx); + INIT_LIST_HEAD(&wdev->event_list); + spin_lock_init(&wdev->event_lock); + INIT_LIST_HEAD(&wdev->mgmt_registrations); + spin_lock_init(&wdev->mgmt_registrations_lock); + + mutex_lock(&rdev->devlist_mtx); + wdev->identifier = ++rdev->wdev_id; + list_add_rcu(&wdev->list, &rdev->wdev_list); + rdev->devlist_generation++; + mutex_unlock(&rdev->devlist_mtx); + break; + default: + break; } - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2168,7 +2191,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2746,7 +2769,7 @@ nla_put_failure: return false; } -static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -2755,7 +2778,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *sinfoattr, *bss_param; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -2908,7 +2931,7 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; if (nl80211_send_station(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev, netdev, mac_addr, &sinfo) < 0) @@ -2954,7 +2977,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3280,7 +3303,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr); } -static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) @@ -3288,7 +3311,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *pinfoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -3366,7 +3389,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, if (err) goto out_err; - if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, dst, next_hop, &pinfo) < 0) @@ -3415,7 +3438,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, next_hop, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3656,7 +3679,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; @@ -3975,7 +3998,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) goto put_failure; @@ -4593,7 +4616,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, ASSERT_WDEV_LOCK(wdev); - hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).pid, seq, flags, + hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); if (!hdr) return -1; @@ -4712,14 +4735,14 @@ static int nl80211_dump_scan(struct sk_buff *skb, return skb->len; } -static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, struct survey_info *survey) { void *hdr; struct nlattr *infoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) return -ENOMEM; @@ -4813,7 +4836,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, } if (nl80211_send_survey(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, &survey) < 0) @@ -5428,7 +5451,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } while (1) { - void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).pid, + void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_TESTMODE); struct nlattr *tmdata; @@ -5468,7 +5491,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, static struct sk_buff * __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 pid, u32 seq, gfp_t gfp) + int approxlen, u32 portid, u32 seq, gfp_t gfp) { struct sk_buff *skb; void *hdr; @@ -5478,7 +5501,7 @@ __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, if (!skb) return NULL; - hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE); + hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); if (!hdr) { kfree_skb(skb); return NULL; @@ -5508,7 +5531,7 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, return NULL; return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_pid, + rdev->testmode_info->snd_portid, rdev->testmode_info->snd_seq, GFP_KERNEL); } @@ -5846,7 +5869,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); if (IS_ERR(hdr)) { @@ -6055,6 +6078,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -6064,7 +6088,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type, + return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); } @@ -6101,6 +6125,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -6144,7 +6169,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); if (IS_ERR(hdr)) { @@ -6197,6 +6222,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -6260,7 +6286,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_POWER_SAVE); if (!hdr) { err = -ENOBUFS; @@ -6462,7 +6488,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_WOWLAN); if (!hdr) goto nla_put_failure; @@ -6736,10 +6762,10 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb, wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; - if (wdev->ap_unexpected_nlpid) + if (wdev->ap_unexpected_nlportid) return -EBUSY; - wdev->ap_unexpected_nlpid = info->snd_pid; + wdev->ap_unexpected_nlportid = info->snd_portid; return 0; } @@ -6769,7 +6795,7 @@ static int nl80211_probe_client(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); if (IS_ERR(hdr)) { @@ -6804,10 +6830,72 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) return -EOPNOTSUPP; - if (rdev->ap_beacons_nlpid) + if (rdev->ap_beacons_nlportid) return -EBUSY; - rdev->ap_beacons_nlpid = info->snd_pid; + rdev->ap_beacons_nlportid = info->snd_portid; + + return 0; +} + +static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + int err; + + if (!rdev->ops->start_p2p_device) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) + return -EOPNOTSUPP; + + if (wdev->p2p_started) + return 0; + + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_can_add_interface(rdev, wdev->iftype); + mutex_unlock(&rdev->devlist_mtx); + if (err) + return err; + + err = rdev->ops->start_p2p_device(&rdev->wiphy, wdev); + if (err) + return err; + + wdev->p2p_started = true; + mutex_lock(&rdev->devlist_mtx); + rdev->opencount++; + mutex_unlock(&rdev->devlist_mtx); + + return 0; +} + +static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) + return -EOPNOTSUPP; + + if (!rdev->ops->stop_p2p_device) + return -EOPNOTSUPP; + + if (!wdev->p2p_started) + return 0; + + rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); + wdev->p2p_started = false; + + mutex_lock(&rdev->devlist_mtx); + rdev->opencount--; + mutex_unlock(&rdev->devlist_mtx); + + if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, true); + } return 0; } @@ -6819,7 +6907,7 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) #define NL80211_FLAG_NEED_NETDEV_UP (NL80211_FLAG_NEED_NETDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_NEED_WDEV 0x10 -/* If a netdev is associated, it must be UP */ +/* If a netdev is associated, it must be UP, P2P must be started */ #define NL80211_FLAG_NEED_WDEV_UP (NL80211_FLAG_NEED_WDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) @@ -6880,6 +6968,13 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, } dev_hold(dev); + } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { + if (!wdev->p2p_started) { + mutex_unlock(&cfg80211_mutex); + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } } cfg80211_lock_rdev(rdev); @@ -7441,7 +7536,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, - + { + .cmd = NL80211_CMD_START_P2P_DEVICE, + .doit = nl80211_start_p2p_device, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_STOP_P2P_DEVICE, + .doit = nl80211_stop_p2p_device, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -7520,12 +7630,12 @@ static int nl80211_add_scan_req(struct sk_buff *msg, static int nl80211_send_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -7549,11 +7659,11 @@ static int nl80211_send_sched_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct net_device *netdev, - u32 pid, u32 seq, int flags, u32 cmd) + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -8254,6 +8364,40 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *mac_addr, + enum nl80211_connect_failed_reason reason, + gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONN_FAILED); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || + nla_put_u32(msg, NL80211_ATTR_CONN_FAILED_REASON, reason)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) { @@ -8262,9 +8406,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct sk_buff *msg; void *hdr; int err; - u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid); + u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); - if (!nlpid) + if (!nlportid) return false; msg = nlmsg_new(100, gfp); @@ -8288,7 +8432,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, return true; } - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; nla_put_failure: @@ -8312,7 +8456,7 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev, } int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlportid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp) { @@ -8341,7 +8485,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, genlmsg_end(msg, hdr); - return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: genlmsg_cancel(msg, hdr); @@ -8696,9 +8840,9 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid); + u32 nlportid = ACCESS_ONCE(rdev->ap_beacons_nlportid); - if (!nlpid) + if (!nlportid) return; msg = nlmsg_new(len + 100, gfp); @@ -8721,7 +8865,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, genlmsg_end(msg, hdr); - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return; nla_put_failure: @@ -8745,9 +8889,9 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) - cfg80211_mlme_unregister_socket(wdev, notify->pid); - if (rdev->ap_beacons_nlpid == notify->pid) - rdev->ap_beacons_nlpid = 0; + cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (rdev->ap_beacons_nlportid == notify->portid) + rdev->ap_beacons_nlportid = 0; } rcu_read_unlock(); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 9f2616fffb40..f6153516068c 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -91,6 +91,11 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, gfp_t gfp); +void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *mac_addr, + enum nl80211_connect_failed_reason reason, + gfp_t gfp); + int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 nlpid, int freq, int sig_dbm, diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index c4ad7958af52..7d604c06c3dc 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -41,6 +41,8 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = { [IEEE80211_RADIOTAP_TX_FLAGS] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_RTS_RETRIES] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, }, + [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, }, /* * add more here as they are defined in radiotap.h */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2ded3c7fad06..3b8cbbc214db 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -350,6 +350,9 @@ static void reg_regdb_search(struct work_struct *work) struct reg_regdb_search_request *request; const struct ieee80211_regdomain *curdom, *regdom; int i, r; + bool set_reg = false; + + mutex_lock(&cfg80211_mutex); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -365,9 +368,7 @@ static void reg_regdb_search(struct work_struct *work) r = reg_copy_regd(®dom, curdom); if (r) break; - mutex_lock(&cfg80211_mutex); - set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + set_reg = true; break; } } @@ -375,6 +376,11 @@ static void reg_regdb_search(struct work_struct *work) kfree(request); } mutex_unlock(®_regdb_search_mutex); + + if (set_reg) + set_regdom(regdom); + + mutex_unlock(&cfg80211_mutex); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); @@ -504,9 +510,11 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, * * This lets us know if a specific frequency rule is or is not relevant to * a specific frequency's band. Bands are device specific and artificial - * definitions (the "2.4 GHz band" and the "5 GHz band"), however it is - * safe for now to assume that a frequency rule should not be part of a - * frequency's band if the start freq or end freq are off by more than 2 GHz. + * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"), + * however it is safe for now to assume that a frequency rule should not be + * part of a frequency's band if the start freq or end freq are off by more + * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the + * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". **/ @@ -514,9 +522,16 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, u32 freq_khz) { #define ONE_GHZ_IN_KHZ 1000000 - if (abs(freq_khz - freq_range->start_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + /* + * From 802.11ad: directional multi-gigabit (DMG): + * Pertaining to operation in a frequency band containing a channel + * with the Channel starting frequency above 45 GHz. + */ + u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ? + 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; + if (abs(freq_khz - freq_range->start_freq_khz) <= limit) return true; - if (abs(freq_khz - freq_range->end_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + if (abs(freq_khz - freq_range->end_freq_khz) <= limit) return true; return false; #undef ONE_GHZ_IN_KHZ @@ -1949,8 +1964,7 @@ static void restore_regulatory_settings(bool reset_user) if (reg_request->initiator != NL80211_REGDOM_SET_BY_USER) continue; - list_del(®_request->list); - list_add_tail(®_request->list, &tmp_reg_req_list); + list_move_tail(®_request->list, &tmp_reg_req_list); } } spin_unlock(®_requests_lock); @@ -2009,8 +2023,7 @@ static void restore_regulatory_settings(bool reset_user) "into the queue\n", reg_request->alpha2[0], reg_request->alpha2[1]); - list_del(®_request->list); - list_add_tail(®_request->list, ®_requests_list); + list_move_tail(®_request->list, ®_requests_list); } spin_unlock(®_requests_lock); @@ -2195,7 +2208,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) static int __set_regdom(const struct ieee80211_regdomain *rd) { const struct ieee80211_regdomain *intersected_rd = NULL; - struct cfg80211_registered_device *rdev = NULL; struct wiphy *request_wiphy; /* Some basic sanity checks first */ @@ -2307,24 +2319,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - if (!intersected_rd) - return -EINVAL; - - rdev = wiphy_to_dev(request_wiphy); - - rdev->country_ie_alpha2[0] = rd->alpha2[0]; - rdev->country_ie_alpha2[1] = rd->alpha2[1]; - rdev->env = last_request->country_ie_env; - - BUG_ON(intersected_rd == rd); - - kfree(rd); - rd = NULL; - - reset_regdomains(false); - cfg80211_regdomain = intersected_rd; - - return 0; + return -EINVAL; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 848523a2b22f..9730c9862bdc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -815,7 +815,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, return NULL; if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && - (signal < 0 || signal > 100))) + (signal < 0 || signal > 100))) return NULL; if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable))) diff --git a/net/wireless/util.c b/net/wireless/util.c index 26f8cd30f712..ef35f4ef2aa6 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -684,22 +684,10 @@ EXPORT_SYMBOL(cfg80211_classify8021d); const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie) { - u8 *end, *pos; - - pos = bss->information_elements; - if (pos == NULL) + if (bss->information_elements == NULL) return NULL; - end = pos + bss->len_information_elements; - - while (pos + 1 < end) { - if (pos + 2 + pos[1] > end) - break; - if (pos[0] == ie) - return pos; - pos += 2 + pos[1]; - } - - return NULL; + return cfg80211_find_ie(ie, bss->information_elements, + bss->len_information_elements); } EXPORT_SYMBOL(ieee80211_bss_get_ie); @@ -735,7 +723,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) wdev->connect_keys = NULL; } -static void cfg80211_process_wdev_events(struct wireless_dev *wdev) +void cfg80211_process_wdev_events(struct wireless_dev *wdev) { struct cfg80211_event *ev; unsigned long flags; @@ -812,6 +800,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; + /* cannot change into P2P device type */ + if (ntype == NL80211_IFTYPE_P2P_DEVICE) + return -EOPNOTSUPP; + if (!rdev->ops->change_virtual_intf || !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; @@ -889,6 +881,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NUM_NL80211_IFTYPES: /* not happening */ break; + case NL80211_IFTYPE_P2P_DEVICE: + WARN_ON(1); + break; } } @@ -1053,8 +1048,15 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { if (wdev_iter == wdev) continue; - if (!netif_running(wdev_iter->netdev)) - continue; + if (wdev_iter->netdev) { + if (!netif_running(wdev_iter->netdev)) + continue; + } else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) { + if (!wdev_iter->p2p_started) + continue; + } else { + WARN_ON(1); + } if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype)) continue; diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index b0eb7aa49b60..c8717c1d082e 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -478,13 +478,13 @@ void wireless_send_event(struct net_device * dev, if (descr->header_type == IW_HEADER_TYPE_POINT) { /* Check if number of token fits within bounds */ if (wrqu->data.length > descr->max_tokens) { - netdev_err(dev, "(WE) : Wireless Event too big (%d)\n", - wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too big (%d)\n", + cmd, wrqu->data.length); return; } if (wrqu->data.length < descr->min_tokens) { - netdev_err(dev, "(WE) : Wireless Event too small (%d)\n", - wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too small (%d)\n", + cmd, wrqu->data.length); return; } /* Calculate extra_len - extra is NULL for restricted events */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 54a0dc2e2f8d..ab2bb42fe094 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,7 +212,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (async && x->repl->check(x, skb, seq)) { + if (async && x->repl->recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..41eabc46f110 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -42,13 +42,12 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); static struct dst_entry *xfrm_policy_sk_bundles; static DEFINE_RWLOCK(xfrm_policy_lock); -static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); +static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] + __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); @@ -95,6 +94,24 @@ bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl return false; } +static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo; + + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} + +static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + rcu_read_unlock(); +} + static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr, @@ -585,6 +602,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_pol_hold(policy); net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); + rt_genid_bump(net); if (delpol) __xfrm_policy_unlink(delpol, dir); policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); @@ -1357,6 +1375,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); xdst->flo.ops = &xfrm_bundle_fc_ops; + if (afinfo->init_dst) + afinfo->init_dst(net, xdst); } else xdst = ERR_PTR(-ENOBUFS); @@ -1761,7 +1781,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, if (!afinfo) { dst_release(dst_orig); - ret = ERR_PTR(-EINVAL); + return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } @@ -2418,7 +2438,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { @@ -2439,9 +2459,9 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = xfrm_garbage_collect_deferred; - xfrm_policy_afinfo[afinfo->family] = afinfo; + rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); } - write_unlock_bh(&xfrm_policy_afinfo_lock); + spin_unlock(&xfrm_policy_afinfo_lock); rtnl_lock(); for_each_net(net) { @@ -2474,21 +2494,26 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { - struct dst_ops *dst_ops = afinfo->dst_ops; - xfrm_policy_afinfo[afinfo->family] = NULL; - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - afinfo->garbage_collect = NULL; - } + else + RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], + NULL); + } + spin_unlock(&xfrm_policy_afinfo_lock); + if (!err) { + struct dst_ops *dst_ops = afinfo->dst_ops; + + synchronize_rcu(); + + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; + afinfo->garbage_collect = NULL; } - write_unlock_bh(&xfrm_policy_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); @@ -2497,33 +2522,16 @@ static void __net_init xfrm_dst_ops_init(struct net *net) { struct xfrm_policy_afinfo *afinfo; - read_lock_bh(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[AF_INET]; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); if (afinfo) net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; #if IS_ENABLED(CONFIG_IPV6) - afinfo = xfrm_policy_afinfo[AF_INET6]; + afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); if (afinfo) net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; #endif - read_unlock_bh(&xfrm_policy_afinfo_lock); -} - -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - read_lock(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; - if (unlikely(!afinfo)) - read_unlock(&xfrm_policy_afinfo_lock); - return afinfo; -} - -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - read_unlock(&xfrm_policy_afinfo_lock); + rcu_read_unlock(); } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -2630,12 +2638,12 @@ static void xfrm_policy_fini(struct net *net) flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); @@ -2742,7 +2750,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2757,7 +2765,7 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 2f6d11d04a2b..3efb07d3eb27 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -420,6 +420,18 @@ err: return -EINVAL; } +static int xfrm_replay_recheck_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != + htonl(xfrm_replay_seqhi(x, net_seq)))) { + x->stats.replay_window++; + return -EINVAL; + } + + return xfrm_replay_check_esn(x, skb, net_seq); +} + static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; @@ -479,6 +491,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) static struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, + .recheck = xfrm_replay_check, .notify = xfrm_replay_notify, .overflow = xfrm_replay_overflow, }; @@ -486,6 +499,7 @@ static struct xfrm_replay xfrm_replay_legacy = { static struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_bmp, }; @@ -493,6 +507,7 @@ static struct xfrm_replay xfrm_replay_bmp = { static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_esn, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b3..3459692092ec 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -166,7 +166,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) { @@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.hard_add_expires_seconds) { long tmo = x->lft.hard_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) - goto expired; + if (tmo <= 0) { + if (x->xflags & XFRM_SOFT_EXPIRE) { + /* enter hard expire without soft expire first?! + * setting a new date could trigger this. + * workarbound: fix x->curflt.add_time by below: + */ + x->curlft.add_time = now - x->saved_tmo - 1; + tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; + } else + goto expired; + } if (tmo < next) next = tmo; } @@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.soft_add_expires_seconds) { long tmo = x->lft.soft_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) + if (tmo <= 0) { warn = 1; - else if (tmo < next) + x->xflags &= ~XFRM_SOFT_EXPIRE; + } else if (tmo < next) { next = tmo; + x->xflags |= XFRM_SOFT_EXPIRE; + x->saved_tmo = tmo; + } } if (x->lft.soft_use_expires_seconds) { long tmo = x->lft.soft_use_expires_seconds + @@ -1661,13 +1674,13 @@ void km_state_notify(struct xfrm_state *x, const struct km_event *c) EXPORT_SYMBOL(km_policy_notify); EXPORT_SYMBOL(km_state_notify); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); @@ -1687,7 +1700,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT); + acqret = km->acquire(x, t, pol); if (!acqret) err = acqret; } @@ -1713,13 +1726,13 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) } EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); @@ -1981,8 +1994,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) goto error; x->outer_mode = xfrm_get_mode(x->props.mode, family); - if (x->outer_mode == NULL) + if (x->outer_mode == NULL) { + err = -EPROTONOSUPPORT; goto error; + } if (init_replay) { err = xfrm_init_replay(x); @@ -2045,7 +2060,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); @@ -2112,7 +2127,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } void xfrm_audit_state_add(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2127,7 +2142,7 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, EXPORT_SYMBOL_GPL(xfrm_audit_state_add); void xfrm_audit_state_delete(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35c..421f98444335 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; - if ((p->flags & XFRM_STATE_ESN) && !rt) - return -EINVAL; + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); + + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; + + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } if (!rt) return 0; @@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; + int ulen; if (!replay_esn || !rp) return 0; up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); - if (xfrm_replay_state_esn_len(replay_esn) != - xfrm_replay_state_esn_len(up)) + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; return 0; @@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; if (!rta) return 0; up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); - p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + p = kzalloc(klen, GFP_KERNEL); if (!p) return -ENOMEM; - pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + pp = kzalloc(klen, GFP_KERNEL); if (!pp) { kfree(p); return -ENOMEM; } + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + *replay_esn = p; *preplay_esn = pp; @@ -442,10 +461,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; - struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; @@ -555,7 +575,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; /* override default values from above */ - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 0); return x; @@ -575,7 +595,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -603,7 +623,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, } c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -654,7 +674,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -676,7 +696,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -689,6 +709,7 @@ out: static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); @@ -742,7 +763,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) return -EMSGSIZE; algo = nla_data(nla); - strcpy(algo->alg_name, auth->alg_name); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; @@ -826,7 +847,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -878,6 +899,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -888,9 +910,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; @@ -904,7 +927,7 @@ static inline size_t xfrm_spdinfo_msgsize(void) } static int build_spdinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; @@ -913,7 +936,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -946,17 +969,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -967,7 +990,7 @@ static inline size_t xfrm_sadinfo_msgsize(void) } static int build_sadinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; @@ -975,7 +998,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -1003,17 +1026,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1033,7 +1056,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1114,7 +1137,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -1317,6 +1340,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); @@ -1369,7 +1393,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1401,7 +1425,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); @@ -1421,6 +1445,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); @@ -1486,7 +1511,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -1546,6 +1571,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1556,9 +1582,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; @@ -1621,10 +1648,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, err = PTR_ERR(resp_skb); } else { err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); } } else { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1638,7 +1665,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } @@ -1668,7 +1695,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_state_notify(NULL, &c); @@ -1695,7 +1722,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) return -EMSGSIZE; @@ -1777,11 +1804,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, spin_lock_bh(&x->lock); c.data.aevent = p->flags; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1822,12 +1849,12 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.data.aevent = XFRM_AE_CU; km_state_notify(x, &c); err = 0; @@ -1862,7 +1889,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_policy_notify(NULL, 0, &c); return 0; @@ -1918,7 +1945,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1930,7 +1957,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, // reset the timers here? WARN(1, "Dont know what to do with soft policy expire\n"); } - km_policy_expired(xp, p->dir, up->hard, current->pid); + km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid); out: xfrm_pol_put(xp); @@ -1958,10 +1985,10 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; if (x->km.state != XFRM_STATE_VALID) goto out; - km_state_expired(x, ue->hard, current->pid); + km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -2370,7 +2397,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2429,7 +2456,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; @@ -2497,7 +2524,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2567,8 +2594,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, } static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, - struct xfrm_tmpl *xt, struct xfrm_policy *xp, - int dir) + struct xfrm_tmpl *xt, struct xfrm_policy *xp) { __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; @@ -2583,7 +2609,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); - copy_to_user_policy(xp, &ua->policy, dir); + copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT); ua->aalgos = xt->aalgos; ua->ealgos = xt->ealgos; ua->calgos = xt->calgos; @@ -2605,7 +2631,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, - struct xfrm_policy *xp, int dir) + struct xfrm_policy *xp) { struct net *net = xs_net(x); struct sk_buff *skb; @@ -2614,7 +2640,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (skb == NULL) return -ENOMEM; - if (build_acquire(skb, x, xt, xp, dir) < 0) + if (build_acquire(skb, x, xt, xp) < 0) BUG(); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); @@ -2697,7 +2723,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2757,7 +2783,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2811,7 +2837,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2964,7 +2990,7 @@ static int __net_init xfrm_user_net_init(struct net *net) .input = xfrm_netlink_rcv, }; - nlsk = netlink_kernel_create(net, NETLINK_XFRM, THIS_MODULE, &cfg); + nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg); if (nlsk == NULL) return -ENOMEM; net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ |