diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-01 18:45:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-01 18:45:28 -0700 |
commit | 345464fb760d1b772e891538b498e111c588b692 (patch) | |
tree | 22d1135650f8ddb25707c788cd4846c77b428ff0 /net | |
parent | 9f159ae07f07fc540290f21937231034f554bdd7 (diff) | |
parent | e1e54ec7fb55501c33b117c111cb0a045b8eded2 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from David Miller:
1) Fix some length checks during OGM processing in batman-adv, from
Sven Eckelmann.
2) Fix regression that caused netfilter conntrack sysctls to not be
per-netns any more. From Florian Westphal.
3) Use after free in netpoll, from Feng Sun.
4) Guard destruction of pfifo_fast per-cpu qdisc stats with
qdisc_is_percpu_stats(), from Davide Caratti. Similar bug is fixed
in pfifo_fast_enqueue().
5) Fix memory leak in mld_del_delrec(), from Eric Dumazet.
6) Handle neigh events on internal ports correctly in nfp, from John
Hurley.
7) Clear SKB timestamp in NF flow table code so that it does not
confuse fq scheduler. From Florian Westphal.
8) taprio destroy can crash if it is invoked in a failure path of
taprio_init(), because the list head isn't setup properly yet and
the list del is unconditional. Perform the list add earlier to
address this. From Vladimir Oltean.
9) Make sure to reapply vlan filters on device up, in aquantia driver.
From Dmitry Bogdanov.
10) sgiseeq driver releases DMA memory using free_page() instead of
dma_free_attrs(). From Christophe JAILLET.
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (58 commits)
net: seeq: Fix the function used to release some memory in an error handling path
enetc: Add missing call to 'pci_free_irq_vectors()' in probe and remove functions
net: bcmgenet: use ethtool_op_get_ts_info()
tc-testing: don't hardcode 'ip' in nsPlugin.py
net: dsa: microchip: add KSZ8563 compatibility string
dt-bindings: net: dsa: document additional Microchip KSZ8563 switch
net: aquantia: fix out of memory condition on rx side
net: aquantia: linkstate irq should be oneshot
net: aquantia: reapply vlan filters on up
net: aquantia: fix limit of vlan filters
net: aquantia: fix removal of vlan 0
net/sched: cbs: Set default link speed to 10 Mbps in cbs_set_port_rate
taprio: Set default link speed to 10 Mbps in taprio_set_picos_per_byte
taprio: Fix kernel panic in taprio_destroy
net: dsa: microchip: fill regmap_config name
rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2]
net: stmmac: dwmac-rk: Don't fail if phy regulator is absent
amd-xgbe: Fix error path in xgbe_mod_init()
netfilter: nft_meta_bridge: Fix get NFT_META_BRI_IIFVPROTO in network byteorder
mac80211: Correctly set noencrypt for PAE frames
...
Diffstat (limited to 'net')
57 files changed, 584 insertions, 402 deletions
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 240ed70912d6..d78938e3e008 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -277,17 +277,23 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv) * batadv_iv_ogm_aggr_packet() - checks if there is another OGM attached * @buff_pos: current position in the skb * @packet_len: total length of the skb - * @tvlv_len: tvlv length of the previously considered OGM + * @ogm_packet: potential OGM in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ -static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, - __be16 tvlv_len) +static bool +batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, + const struct batadv_ogm_packet *ogm_packet) { int next_buff_pos = 0; - next_buff_pos += buff_pos + BATADV_OGM_HLEN; - next_buff_pos += ntohs(tvlv_len); + /* check if there is enough space for the header */ + next_buff_pos += buff_pos + sizeof(*ogm_packet); + if (next_buff_pos > packet_len) + return false; + + /* check if there is enough space for the optional TVLV */ + next_buff_pos += ntohs(ogm_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); @@ -315,7 +321,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, - batadv_ogm_packet->tvlv_len)) { + batadv_ogm_packet)) { /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -1704,7 +1710,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, /* unpack the aggregated packets and process them one by one */ while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb), - ogm_packet->tvlv_len)) { + ogm_packet)) { batadv_iv_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM_HLEN; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index fad95ef64e01..bc06e3cdfa84 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -631,17 +631,23 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, * batadv_v_ogm_aggr_packet() - checks if there is another OGM aggregated * @buff_pos: current position in the skb * @packet_len: total length of the skb - * @tvlv_len: tvlv length of the previously considered OGM + * @ogm2_packet: potential OGM2 in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ -static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, - __be16 tvlv_len) +static bool +batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, + const struct batadv_ogm2_packet *ogm2_packet) { int next_buff_pos = 0; - next_buff_pos += buff_pos + BATADV_OGM2_HLEN; - next_buff_pos += ntohs(tvlv_len); + /* check if there is enough space for the header */ + next_buff_pos += buff_pos + sizeof(*ogm2_packet); + if (next_buff_pos > packet_len) + return false; + + /* check if there is enough space for the optional TVLV */ + next_buff_pos += ntohs(ogm2_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); @@ -818,7 +824,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, ogm_packet = (struct batadv_ogm2_packet *)skb->data; while (batadv_v_ogm_aggr_packet(ogm_offset, skb_headlen(skb), - ogm_packet->tvlv_len)) { + ogm_packet)) { batadv_v_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM2_HLEN; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 1804e867f715..7c9e92b2f806 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -53,7 +53,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, goto err; br_vlan_get_proto(br_dev, &p_proto); - nft_reg_store16(dest, p_proto); + nft_reg_store16(dest, htons(p_proto)); return; } default: diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2cf27da1baeb..849380a622ef 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -122,7 +122,7 @@ static void queue_process(struct work_struct *work) txq = netdev_get_tx_queue(dev, q_index); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || - netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { + !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); @@ -335,7 +335,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, HARD_TX_UNLOCK(dev, txq); - if (status == NETDEV_TX_OK) + if (dev_xmit_complete(status)) break; } @@ -352,7 +352,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } - if (status != NETDEV_TX_OK) { + if (!dev_xmit_complete(status)) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 6ebbd799c4eb..67a1bc635a7b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -28,6 +28,7 @@ * * RSV - VID[9]: * To be used for further expansion of SWITCH_ID or for other purposes. + * Must be transmitted as zero and ignored on receive. * * SWITCH_ID - VID[8:6]: * Index of switch within DSA tree. Must be between 0 and @@ -35,6 +36,7 @@ * * RSV - VID[5:4]: * To be used for further expansion of PORT or for other purposes. + * Must be transmitted as zero and ignored on receive. * * PORT - VID[3:0]: * Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 77b485d60b9d..61082065b26a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -935,6 +935,22 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } +/* In some cases, both sendpage() and sendmsg() could have added + * an skb to the write queue, but failed adding payload on it. + * We need to remove it to consume less memory, but more + * importantly be able to generate EPOLLOUT for Edge Trigger epoll() + * users. + */ +static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +{ + if (skb && !skb->len) { + tcp_unlink_write_queue(skb, sk); + if (tcp_write_queue_empty(sk)) + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + sk_wmem_free_skb(sk, skb); + } +} + ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1064,6 +1080,7 @@ out: return copied; do_error: + tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); if (copied) goto out; out_err: @@ -1388,18 +1405,11 @@ out_nopush: sock_zerocopy_put(uarg); return copied + copied_syn; +do_error: + skb = tcp_write_queue_tail(sk); do_fault: - if (!skb->len) { - tcp_unlink_write_queue(skb, sk); - /* It is the one place in all of TCP, except connection - * reset, where we can be unlinking the send_head. - */ - if (tcp_write_queue_empty(sk)) - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - sk_wmem_free_skb(sk, skb); - } + tcp_remove_empty_skb(sk, skb); -do_error: if (copied + copied_syn) goto out; out_err: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 979520e46e33..8a645f304e6c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2053,7 +2053,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor)) + if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) return false; len -= skb->len; @@ -2170,6 +2170,7 @@ static int tcp_mtu_probe(struct sock *sk) * we need to propagate it to the new skb. */ TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; + tcp_skb_collapse_tstamp(nskb, skb); tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); } else { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7f3f13c37916..eaa4c2cc2fbb 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -787,14 +787,15 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) if (pmc) { im->idev = pmc->idev; if (im->mca_sfmode == MCAST_INCLUDE) { - im->mca_tomb = pmc->mca_tomb; - im->mca_sources = pmc->mca_sources; + swap(im->mca_tomb, pmc->mca_tomb); + swap(im->mca_sources, pmc->mca_sources); for (psf = im->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = idev->mc_qrv; } else { im->mca_crcount = idev->mc_qrv; } in6_dev_put(pmc->idev); + ip6_mc_clear_src(pmc); kfree(pmc); } spin_unlock_bh(&im->mca_lock); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3c1ab870fefe..768d14c9a716 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2447,11 +2447,13 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, skb->protocol == cpu_to_be16(ETH_P_PREAUTH)) && sdata->control_port_over_nl80211)) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - bool noencrypt = status->flag & RX_FLAG_DECRYPTED; + bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED); cfg80211_rx_control_port(dev, skb, noencrypt); dev_kfree_skb(skb); } else { + memset(skb->cb, 0, sizeof(skb->cb)); + /* deliver to local stack */ if (rx->napi) napi_gro_receive(rx->napi, skb); @@ -2546,8 +2548,6 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if (skb) { skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - ieee80211_deliver_skb_to_local_stack(skb, rx); } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 0ecb3e289ef2..8d96738b7dfd 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -322,7 +322,7 @@ static int find_pattern(const char *data, size_t dlen, i++; } - pr_debug("Skipped up to `%c'!\n", skip); + pr_debug("Skipped up to 0x%hhx delimiter!\n", skip); *numoff = i; *numlen = getnum(data + i, dlen - i, cmd, term, numoff); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e0d392cb3075..0006503d2da9 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1037,9 +1037,14 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[NF_SYSCTL_CT_COUNT].data = &net->ct.count; table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid; + table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct; + table[NF_SYSCTL_CT_HELPER].data = &net->ct.sysctl_auto_assign_helper; #ifdef CONFIG_NF_CONNTRACK_EVENTS table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; #endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + table[NF_SYSCTL_CT_TIMESTAMP].data = &net->ct.sysctl_tstamp; +#endif table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index d68c801dd614..b9e7dd6e60ce 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -228,7 +228,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, { skb_orphan(skb); skb_dst_set_noref(skb, dst); - skb->tstamp = 0; dst_output(state->net, state->sk, skb); return NF_STOLEN; } @@ -284,6 +283,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; iph = ip_hdr(skb); ip_decrease_ttl(iph); + skb->tstamp = 0; if (unlikely(dst_xfrm(&rt->dst))) { memset(skb->cb, 0, sizeof(struct inet_skb_parm)); @@ -512,6 +512,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ip6h = ipv6_hdr(skb); ip6h->hop_limit--; + skb->tstamp = 0; if (unlikely(dst_xfrm(&rt->dst))) { memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index ead7c6022208..b92b22ce8abd 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -101,11 +101,9 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && - par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { + par->hook_mask & (1 << NF_INET_LOCAL_OUT)) { pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n"); - if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) - return -EINVAL; + return -EINVAL; } if (!brnf_probed) { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index d8da6477d6be..05249eb45082 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -525,6 +525,11 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, return -EPFNOSUPPORT; } + /* The key extracted from the fragment that completed this datagram + * likely didn't have an L4 header, so regenerate it. + */ + ovs_flow_key_update_l3l4(skb, key); + key->ip.frag = OVS_FRAG_TYPE_NONE; skb_clear_hash(skb); skb->ignore_df = 1; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index bc89e16e0505..9d81d2c7bf82 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -523,78 +523,15 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) } /** - * key_extract - extracts a flow key from an Ethernet frame. + * key_extract_l3l4 - extracts L3/L4 header information. * @skb: sk_buff that contains the frame, with skb->data pointing to the - * Ethernet header + * L3 header * @key: output flow key * - * The caller must ensure that skb->len >= ETH_HLEN. - * - * Returns 0 if successful, otherwise a negative errno value. - * - * Initializes @skb header fields as follows: - * - * - skb->mac_header: the L2 header. - * - * - skb->network_header: just past the L2 header, or just past the - * VLAN header, to the first byte of the L2 payload. - * - * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 - * on output, then just past the IP header, if one is present and - * of a correct length, otherwise the same as skb->network_header. - * For other key->eth.type values it is left untouched. - * - * - skb->protocol: the type of the data starting at skb->network_header. - * Equals to key->eth.type. */ -static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) +static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) { int error; - struct ethhdr *eth; - - /* Flags are always used as part of stats */ - key->tp.flags = 0; - - skb_reset_mac_header(skb); - - /* Link layer. */ - clear_vlan(key); - if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { - if (unlikely(eth_type_vlan(skb->protocol))) - return -EINVAL; - - skb_reset_network_header(skb); - key->eth.type = skb->protocol; - } else { - eth = eth_hdr(skb); - ether_addr_copy(key->eth.src, eth->h_source); - ether_addr_copy(key->eth.dst, eth->h_dest); - - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. - */ - - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; - - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; - - /* Multiple tagged packets need to retain TPID to satisfy - * skb_vlan_pop(), which will later shift the ethertype into - * skb->protocol. - */ - if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK)) - skb->protocol = key->eth.cvlan.tpid; - else - skb->protocol = key->eth.type; - - skb_reset_network_header(skb); - __skb_push(skb, skb->data - skb_mac_header(skb)); - } - skb_reset_mac_len(skb); /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { @@ -623,6 +560,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) offset = nh->frag_off & htons(IP_OFFSET); if (offset) { key->ip.frag = OVS_FRAG_TYPE_LATER; + memset(&key->tp, 0, sizeof(key->tp)); return 0; } if (nh->frag_off & htons(IP_MF) || @@ -740,8 +678,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return error; } - if (key->ip.frag == OVS_FRAG_TYPE_LATER) + if (key->ip.frag == OVS_FRAG_TYPE_LATER) { + memset(&key->tp, 0, sizeof(key->tp)); return 0; + } if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; @@ -788,6 +728,92 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return 0; } +/** + * key_extract - extracts a flow key from an Ethernet frame. + * @skb: sk_buff that contains the frame, with skb->data pointing to the + * Ethernet header + * @key: output flow key + * + * The caller must ensure that skb->len >= ETH_HLEN. + * + * Returns 0 if successful, otherwise a negative errno value. + * + * Initializes @skb header fields as follows: + * + * - skb->mac_header: the L2 header. + * + * - skb->network_header: just past the L2 header, or just past the + * VLAN header, to the first byte of the L2 payload. + * + * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 + * on output, then just past the IP header, if one is present and + * of a correct length, otherwise the same as skb->network_header. + * For other key->eth.type values it is left untouched. + * + * - skb->protocol: the type of the data starting at skb->network_header. + * Equals to key->eth.type. + */ +static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) +{ + struct ethhdr *eth; + + /* Flags are always used as part of stats */ + key->tp.flags = 0; + + skb_reset_mac_header(skb); + + /* Link layer. */ + clear_vlan(key); + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { + if (unlikely(eth_type_vlan(skb->protocol))) + return -EINVAL; + + skb_reset_network_header(skb); + key->eth.type = skb->protocol; + } else { + eth = eth_hdr(skb); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); + + __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ + + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) + return -ENOMEM; + + /* Multiple tagged packets need to retain TPID to satisfy + * skb_vlan_pop(), which will later shift the ethertype into + * skb->protocol. + */ + if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK)) + skb->protocol = key->eth.cvlan.tpid; + else + skb->protocol = key->eth.type; + + skb_reset_network_header(skb); + __skb_push(skb, skb->data - skb_mac_header(skb)); + } + + skb_reset_mac_len(skb); + + /* Fill out L3/L4 key info, if any */ + return key_extract_l3l4(skb, key); +} + +/* In the case of conntrack fragment handling it expects L3 headers, + * add a helper. + */ +int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key) +{ + return key_extract_l3l4(skb, key); +} + int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { int res; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a5506e2d4b7a..b830d5ff7af4 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -270,6 +270,7 @@ void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key); +int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key); int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key); diff --git a/net/psample/psample.c b/net/psample/psample.c index 841f198ea1a8..66e4b61a350d 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -154,7 +154,7 @@ static void psample_group_destroy(struct psample_group *group) { psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP); list_del(&group->list); - kfree(group); + kfree_rcu(group, rcu); } static struct psample_group * diff --git a/net/rds/recv.c b/net/rds/recv.c index 853de4876088..a42ba7fa06d5 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -811,6 +811,7 @@ void rds6_inc_info_copy(struct rds_incoming *inc, minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo6.len = be32_to_cpu(inc->i_hdr.h_len); + minfo6.tos = inc->i_conn->c_tos; if (flip) { minfo6.laddr = *daddr; @@ -824,6 +825,8 @@ void rds6_inc_info_copy(struct rds_incoming *inc, minfo6.fport = inc->i_hdr.h_dport; } + minfo6.flags = 0; + rds_info_copy(iter, &minfo6, sizeof(minfo6)); } #endif diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0dbbfd1b6487..d72ddb67bb74 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -862,7 +862,6 @@ static void rxrpc_sock_destructor(struct sock *sk) static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); - struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); @@ -898,8 +897,6 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); - rxrpc_queue_work(&rxnet->service_conn_reaper); - rxrpc_queue_work(&rxnet->client_conn_reaper); rxrpc_unuse_local(rx->local); rx->local = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 145335611af6..8051dfdcf26d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -185,11 +185,17 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - union { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + atomic_t nr_ring_pins; /* Number of rxtx ring pins */ + u8 nr_subpackets; /* Number of subpackets */ + u8 rx_flags; /* Received packet flags */ +#define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ +#define RXRPC_SKB_TX_BUFFER 0x02 /* - Is transmit buffer */ union { int remain; /* amount of space remaining for next write */ + + /* List of requested ACKs on subpackets */ + unsigned long rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) / + BITS_PER_LONG]; }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -613,8 +619,7 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_LAST 0x04 #define RXRPC_TX_ANNO_RESENT 0x08 -#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ -#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_SUBPACKET 0x3f /* Subpacket number in jumbogram */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but * not hard-ACK'd packet follows this. @@ -905,6 +910,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); void rxrpc_discard_expired_client_conns(struct work_struct *); void rxrpc_destroy_all_client_connections(struct rxrpc_net *); +void rxrpc_clean_up_local_conns(struct rxrpc_local *); /* * conn_event.c @@ -1105,6 +1111,7 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c767679bfa5d..cedbbb3a7c2e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -199,7 +199,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) continue; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); if (anno_type == RXRPC_TX_ANNO_UNACK) { if (ktime_after(skb->tstamp, max_age)) { @@ -255,18 +255,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) continue; skb = call->rxtx_buffer[ix]; - rxrpc_get_skb(skb, rxrpc_skb_tx_got); + rxrpc_get_skb(skb, rxrpc_skb_got); spin_unlock_bh(&call->lock); if (rxrpc_send_data_packet(call, skb, true) < 0) { - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); return; } if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); spin_lock_bh(&call->lock); /* We need to clear the retransmit state, but there are two diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 217b12be9e08..014548c259ce 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -422,6 +422,19 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) } /* + * Clean up the RxTx skb ring. + */ +static void rxrpc_cleanup_ring(struct rxrpc_call *call) +{ + int i; + + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned); + call->rxtx_buffer[i] = NULL; + } +} + +/* * Detach a call from its owning socket. */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) @@ -429,7 +442,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; - int i; _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); @@ -479,13 +491,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (conn) rxrpc_disconnect_call(call); - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - call->rxtx_buffer[i] = NULL; - } - + rxrpc_cleanup_ring(call); _leave(""); } @@ -568,8 +574,6 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - int i; - _net("DESTROY CALL %d", call->debug_id); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); @@ -580,13 +584,8 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); ASSERTCMP(call->conn, ==, NULL); - /* Clean up the Rx/Tx buffer */ - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - - rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); + rxrpc_cleanup_ring(call); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index aea82f909c60..3f1da1b49f69 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -1162,3 +1162,47 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) _leave(""); } + +/* + * Clean up the client connections on a local endpoint. + */ +void rxrpc_clean_up_local_conns(struct rxrpc_local *local) +{ + struct rxrpc_connection *conn, *tmp; + struct rxrpc_net *rxnet = local->rxnet; + unsigned int nr_active; + LIST_HEAD(graveyard); + + _enter(""); + + spin_lock(&rxnet->client_conn_cache_lock); + nr_active = rxnet->nr_active_client_conns; + + list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, + cache_link) { + if (conn->params.local == local) { + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_IDLE); + + trace_rxrpc_client(conn, -1, rxrpc_client_discard); + if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + BUG(); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_move(&conn->cache_link, &graveyard); + nr_active--; + } + } + + rxnet->nr_active_client_conns = nr_active; + spin_unlock(&rxnet->client_conn_cache_lock); + ASSERTCMP(nr_active, >=, 0); + + while (!list_empty(&graveyard)) { + conn = list_entry(graveyard.next, + struct rxrpc_connection, cache_link); + list_del_init(&conn->cache_link); + + rxrpc_put_connection(conn); + } + + _leave(" [culled]"); +} diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index df6624c140be..a1ceef4f5cd0 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -472,7 +472,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -484,7 +484,7 @@ void rxrpc_process_connection(struct work_struct *work) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); break; } } @@ -501,6 +501,6 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); goto out; } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 434ef392212b..ed05b6922132 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -398,7 +398,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - if (rxnet->live) { + if (rxnet->live && !conn->params.local->dead) { idle_timestamp = READ_ONCE(conn->idle_timestamp); expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; if (conn->params.local->service_closed) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dd47d465d1d3..d122c53c8697 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -233,7 +233,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; annotation = call->rxtx_annotations[ix]; - rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); + rxrpc_see_skb(skb, rxrpc_skb_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; @@ -258,7 +258,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb = list; list = skb->next; skb_mark_not_on_list(skb); - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } return rot_last; @@ -347,7 +347,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } /* - * Scan a jumbo packet to validate its structure and to work out how many + * Scan a data packet to validate its structure and to work out how many * subpackets it contains. * * A jumbo packet is a collection of consecutive packets glued together with @@ -358,16 +358,21 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any * size. */ -static bool rxrpc_validate_jumbo(struct sk_buff *skb) +static bool rxrpc_validate_data(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len; - int nr_jumbo = 1; u8 flags = sp->hdr.flags; - do { - nr_jumbo++; + for (;;) { + if (flags & RXRPC_REQUEST_ACK) + __set_bit(sp->nr_subpackets, sp->rx_req_ack); + sp->nr_subpackets++; + + if (!(flags & RXRPC_JUMBO_PACKET)) + break; + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) goto protocol_error; if (flags & RXRPC_LAST_PACKET) @@ -376,9 +381,10 @@ static bool rxrpc_validate_jumbo(struct sk_buff *skb) if (skb_copy_bits(skb, offset, &flags, 1) < 0) goto protocol_error; offset += sizeof(struct rxrpc_jumbo_header); - } while (flags & RXRPC_JUMBO_PACKET); + } - sp->nr_jumbo = nr_jumbo; + if (flags & RXRPC_LAST_PACKET) + sp->rx_flags |= RXRPC_SKB_INCL_LAST; return true; protocol_error: @@ -399,10 +405,10 @@ protocol_error: * (that information is encoded in the ACK packet). */ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, - u8 annotation, bool *_jumbo_bad) + bool is_jumbo, bool *_jumbo_bad) { /* Discard normal packets that are duplicates. */ - if (annotation == 0) + if (is_jumbo) return; /* Skip jumbo subpackets that are duplicates. When we've had three or @@ -416,29 +422,30 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, } /* - * Process a DATA packet, adding the packet to the Rx ring. + * Process a DATA packet, adding the packet to the Rx ring. The caller's + * packet ref must be passed on or discarded. */ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; - unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int ix; + unsigned int j; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; - rxrpc_seq_t seq = sp->hdr.seq, hard_ack; - bool immediate_ack = false, jumbo_bad = false, queued; - u16 len; - u8 ack = 0, flags, annotation = 0; + rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_bad = false; + u8 ack = 0; _enter("{%u,%u},{%u,%u}", - call->rx_hard_ack, call->rx_top, skb->len, seq); + call->rx_hard_ack, call->rx_top, skb->len, seq0); - _proto("Rx DATA %%%u { #%u f=%02x }", - sp->hdr.serial, seq, sp->hdr.flags); + _proto("Rx DATA %%%u { #%u f=%02x n=%u }", + sp->hdr.serial, seq0, sp->hdr.flags, sp->nr_subpackets); state = READ_ONCE(call->state); - if (state >= RXRPC_CALL_COMPLETE) + if (state >= RXRPC_CALL_COMPLETE) { + rxrpc_free_skb(skb, rxrpc_skb_freed); return; + } if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); @@ -463,137 +470,137 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) !rxrpc_receiving_reply(call)) goto unlock; - call->ackr_prev_seq = seq; - + call->ackr_prev_seq = seq0; hard_ack = READ_ONCE(call->rx_hard_ack); - if (after(seq, hard_ack + call->rx_winsize)) { - ack = RXRPC_ACK_EXCEEDS_WINDOW; - ack_serial = serial; - goto ack; - } - flags = sp->hdr.flags; - if (flags & RXRPC_JUMBO_PACKET) { + if (sp->nr_subpackets > 1) { if (call->nr_jumbo_bad > 3) { ack = RXRPC_ACK_NOSPACE; ack_serial = serial; goto ack; } - annotation = 1; } -next_subpacket: - queued = false; - ix = seq & RXRPC_RXTX_BUFF_MASK; - len = skb->len; - if (flags & RXRPC_JUMBO_PACKET) - len = RXRPC_JUMBO_DATALEN; - - if (flags & RXRPC_LAST_PACKET) { - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) { - rxrpc_proto_abort("LSN", call, seq); - goto unlock; - } - } else { - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) { - rxrpc_proto_abort("LSA", call, seq); - goto unlock; + for (j = 0; j < sp->nr_subpackets; j++) { + rxrpc_serial_t serial = sp->hdr.serial + j; + rxrpc_seq_t seq = seq0 + j; + unsigned int ix = seq & RXRPC_RXTX_BUFF_MASK; + bool terminal = (j == sp->nr_subpackets - 1); + bool last = terminal && (sp->rx_flags & RXRPC_SKB_INCL_LAST); + u8 flags, annotation = j; + + _proto("Rx DATA+%u %%%u { #%x t=%u l=%u }", + j, serial, seq, terminal, last); + + if (last) { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } } - } - trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); - if (before_eq(seq, hard_ack)) { - ack = RXRPC_ACK_DUPLICATE; - ack_serial = serial; - goto skip; - } + flags = 0; + if (last) + flags |= RXRPC_LAST_PACKET; + if (!terminal) + flags |= RXRPC_JUMBO_PACKET; + if (test_bit(j, sp->rx_req_ack)) + flags |= RXRPC_REQUEST_ACK; + trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); - if (flags & RXRPC_REQUEST_ACK && !ack) { - ack = RXRPC_ACK_REQUESTED; - ack_serial = serial; - } - - if (call->rxtx_buffer[ix]) { - rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad); - if (ack != RXRPC_ACK_DUPLICATE) { + if (before_eq(seq, hard_ack)) { ack = RXRPC_ACK_DUPLICATE; ack_serial = serial; + continue; } - immediate_ack = true; - goto skip; - } - /* Queue the packet. We use a couple of memory barriers here as need - * to make sure that rx_top is perceived to be set after the buffer - * pointer and that the buffer pointer is set after the annotation and - * the skb data. - * - * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() - * and also rxrpc_fill_out_ack(). - */ - rxrpc_get_skb(skb, rxrpc_skb_rx_got); - call->rxtx_annotations[ix] = annotation; - smp_wmb(); - call->rxtx_buffer[ix] = skb; - if (after(seq, call->rx_top)) { - smp_store_release(&call->rx_top, seq); - } else if (before(seq, call->rx_top)) { - /* Send an immediate ACK if we fill in a hole */ - if (!ack) { - ack = RXRPC_ACK_DELAY; - ack_serial = serial; + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, sp->nr_subpackets > 1, + &jumbo_bad); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + } + immediate_ack = true; + continue; } - immediate_ack = true; - } - if (flags & RXRPC_LAST_PACKET) { - set_bit(RXRPC_CALL_RX_LAST, &call->flags); - trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); - } else { - trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); - } - queued = true; - if (after_eq(seq, call->rx_expect_next)) { - if (after(seq, call->rx_expect_next)) { - _net("OOS %u > %u", seq, call->rx_expect_next); - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - ack_serial = serial; - } - call->rx_expect_next = seq + 1; - } - -skip: - offset += len; - if (flags & RXRPC_JUMBO_PACKET) { - if (skb_copy_bits(skb, offset, &flags, 1) < 0) { - rxrpc_proto_abort("XJF", call, seq); - goto unlock; - } - offset += sizeof(struct rxrpc_jumbo_header); - seq++; - serial++; - annotation++; - if (flags & RXRPC_JUMBO_PACKET) - annotation |= RXRPC_RX_ANNO_JLAST; if (after(seq, hard_ack + call->rx_winsize)) { ack = RXRPC_ACK_EXCEEDS_WINDOW; ack_serial = serial; - if (!jumbo_bad) { - call->nr_jumbo_bad++; - jumbo_bad = true; + if (flags & RXRPC_JUMBO_PACKET) { + if (!jumbo_bad) { + call->nr_jumbo_bad++; + jumbo_bad = true; + } } + goto ack; } - _proto("Rx DATA Jumbo %%%u", serial); - goto next_subpacket; - } + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } + + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + if (!terminal) + rxrpc_get_skb(skb, rxrpc_skb_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) { + smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } + + if (terminal) { + /* From this point on, we're not allowed to touch the + * packet any longer as its ref now belongs to the Rx + * ring. + */ + skb = NULL; + } - if (queued && flags & RXRPC_LAST_PACKET && !ack) { - ack = RXRPC_ACK_DELAY; - ack_serial = serial; + if (last) { + set_bit(RXRPC_CALL_RX_LAST, &call->flags); + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } + + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; + } } ack: @@ -606,13 +613,14 @@ ack: false, true, rxrpc_propose_ack_input_data); - if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) { + if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) { trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); } unlock: spin_unlock(&call->input_lock); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" [queued]"); } @@ -1021,7 +1029,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb); - break; + goto no_free; case RXRPC_PACKET_TYPE_ACK: rxrpc_input_ack(call, skb); @@ -1048,6 +1056,8 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, break; } + rxrpc_free_skb(skb, rxrpc_skb_freed); +no_free: _leave(""); } @@ -1109,7 +1119,7 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, skb_queue_tail(&local->event_queue, skb); rxrpc_queue_local(local); } else { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } } @@ -1124,7 +1134,7 @@ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) skb_queue_tail(&local->reject_queue, skb); rxrpc_queue_local(local); } else { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } } @@ -1188,7 +1198,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); - rxrpc_new_skb(skb, rxrpc_skb_rx_received); + rxrpc_new_skb(skb, rxrpc_skb_received); skb_pull(skb, sizeof(struct udphdr)); @@ -1205,7 +1215,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_free_skb(skb, rxrpc_skb_rx_lost); + rxrpc_free_skb(skb, rxrpc_skb_lost); return 0; } } @@ -1237,9 +1247,26 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) if (sp->hdr.callNumber == 0 || sp->hdr.seq == 0) goto bad_message; - if (sp->hdr.flags & RXRPC_JUMBO_PACKET && - !rxrpc_validate_jumbo(skb)) + if (!rxrpc_validate_data(skb)) goto bad_message; + + /* Unshare the packet so that it can be modified for in-place + * decryption. + */ + if (sp->hdr.securityIndex != 0) { + struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC); + if (!nskb) { + rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem); + goto out; + } + + if (nskb != skb) { + rxrpc_eaten_skb(skb, rxrpc_skb_received); + rxrpc_new_skb(skb, rxrpc_skb_unshared); + skb = nskb; + sp = rxrpc_skb(skb); + } + } break; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -1373,11 +1400,14 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) mutex_unlock(&call->user_mutex); } + /* Process a call packet; this either discards or passes on the ref + * elsewhere. + */ rxrpc_input_call_packet(call, skb); - goto discard; + goto out; discard: - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); out: trace_rxrpc_rx_done(0, 0); return 0; diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index e93a78f7c05e..3ce6d628cd75 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -90,7 +90,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { @@ -108,7 +108,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) break; } - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } _leave(""); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 72a6e12a9304..36587260cabd 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -426,11 +426,14 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) _enter("%d", local->debug_id); + local->dead = true; + mutex_lock(&rxnet->local_mutex); list_del_init(&local->link); mutex_unlock(&rxnet->local_mutex); - ASSERT(RB_EMPTY_ROOT(&local->client_conns)); + rxrpc_clean_up_local_conns(local); + rxrpc_service_connection_reaper(&rxnet->service_conn_reaper); ASSERT(!local->service); if (socket) { diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 369e516c4bdf..935bb60fff56 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -565,7 +565,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) memset(&whdr, 0, sizeof(whdr)); while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); sp = rxrpc_skb(skb); switch (skb->mark) { @@ -581,7 +581,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) ioc = 2; break; default: - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); continue; } @@ -606,7 +606,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) rxrpc_tx_point_reject); } - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } _leave(""); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7666ec72d37e..c97ebdc043e4 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -163,11 +163,11 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } - rxrpc_new_skb(skb, rxrpc_skb_rx_received); + rxrpc_new_skb(skb, rxrpc_skb_received); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); return; } @@ -177,7 +177,7 @@ void rxrpc_error_report(struct sock *sk) peer = NULL; if (!peer) { rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" [no peer]"); return; } @@ -189,7 +189,7 @@ void rxrpc_error_report(struct sock *sk) serr->ee.ee_code == ICMP_FRAG_NEEDED)) { rxrpc_adjust_mtu(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); rxrpc_put_peer(peer); _leave(" [MTU update]"); return; @@ -197,7 +197,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); rxrpc_put_peer(peer); _leave(""); diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 99ce322d7caa..49bb972539aa 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -89,6 +89,15 @@ struct rxrpc_jumbo_header { #define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ #define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) +/* + * The maximum number of subpackets that can possibly fit in a UDP packet is: + * + * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1 + * = ((65535 - 28 - 28) / 1416) + 1 + * = 46 non-terminal packets and 1 terminal packet. + */ +#define RXRPC_MAX_NR_JUMBO 47 + /*****************************************************************************/ /* * on-the-wire Rx ACK packet data payload diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9a7e1bc9791d..3b0becb12041 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -177,7 +177,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) struct sk_buff *skb; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; - u8 flags; + bool last = false; + u8 subpacket; int ix; _enter("%d", call->debug_id); @@ -189,23 +190,25 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) hard_ack++; ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); + rxrpc_see_skb(skb, rxrpc_skb_rotated); sp = rxrpc_skb(skb); - flags = sp->hdr.flags; - serial = sp->hdr.serial; - if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; + + subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; + serial = sp->hdr.serial + subpacket; + + if (subpacket == sp->nr_subpackets - 1 && + sp->rx_flags & RXRPC_SKB_INCL_LAST) + last = true; call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; /* Barrier against rxrpc_input_data(). */ smp_store_release(&call->rx_hard_ack, hard_ack); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); - _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); - if (flags & RXRPC_LAST_PACKET) { + if (last) { rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ @@ -233,18 +236,19 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_seq_t seq = sp->hdr.seq; u16 cksum = sp->hdr.cksum; + u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; _enter(""); /* For all but the head jumbo subpacket, the security checksum is in a * jumbo header immediately prior to the data. */ - if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { + if (subpacket > 0) { __be16 tmp; if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) BUG(); cksum = ntohs(tmp); - seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; + seq += subpacket; } return call->conn->security->verify_packet(call, skb, offset, len, @@ -265,19 +269,18 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, u8 *_annotation, unsigned int *_offset, unsigned int *_len) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len; int ret; u8 annotation = *_annotation; + u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; /* Locate the subpacket */ + offset += subpacket * RXRPC_JUMBO_SUBPKTLEN; len = skb->len - offset; - if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { - offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * - RXRPC_JUMBO_SUBPKTLEN); - len = (annotation & RXRPC_RX_ANNO_JLAST) ? - skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; - } + if (subpacket < sp->nr_subpackets - 1) + len = RXRPC_JUMBO_DATALEN; if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { ret = rxrpc_verify_packet(call, skb, annotation, offset, len); @@ -303,6 +306,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; size_t remain; bool last; @@ -336,12 +340,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, break; } smp_rmb(); - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); sp = rxrpc_skb(skb); - if (!(flags & MSG_PEEK)) + if (!(flags & MSG_PEEK)) { + serial = sp->hdr.serial; + serial += call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; trace_rxrpc_receive(call, rxrpc_receive_front, - sp->hdr.serial, seq); + serial, seq); + } if (msg) sock_recv_timestamp(msg, sock->sk, skb); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ae8cd8926456..c60c520fde7c 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -187,10 +187,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; unsigned int len; u16 check; - int nsg; int err; sp = rxrpc_skb(skb); @@ -214,15 +212,14 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, crypto_skcipher_encrypt(req); /* we want to encrypt the skbuff in-place */ - nsg = skb_cow_data(skb, 0, &trailer); - err = -ENOMEM; - if (nsg < 0 || nsg > 16) + err = -EMSGSIZE; + if (skb_shinfo(skb)->nr_frags > 16) goto out; len = data_size + call->conn->size_align - 1; len &= ~(call->conn->size_align - 1); - sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); err = skb_to_sgvec(skb, sg, 0, len); if (unlikely(err < 0)) goto out; @@ -319,11 +316,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxkad_level1_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; - int nsg, ret; + int ret; _enter(""); @@ -336,11 +332,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0 || nsg > 16) - goto nomem; - - sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); ret = skb_to_sgvec(skb, sg, offset, 8); if (unlikely(ret < 0)) return ret; @@ -388,10 +380,6 @@ protocol_error: if (aborted) rxrpc_send_abort_packet(call); return -EPROTO; - -nomem: - _leave(" = -ENOMEM"); - return -ENOMEM; } /* @@ -406,7 +394,6 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxkad_level2_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; @@ -423,12 +410,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0) - goto nomem; - sg = _sg; - if (unlikely(nsg > 4)) { + nsg = skb_shinfo(skb)->nr_frags; + if (nsg <= 4) { + nsg = 4; + } else { sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); if (!sg) goto nomem; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index bae14438f869..6a1547b270fe 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -176,7 +176,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, skb->tstamp = ktime_get_real(); ix = seq & RXRPC_RXTX_BUFF_MASK; - rxrpc_get_skb(skb, rxrpc_skb_tx_got); + rxrpc_get_skb(skb, rxrpc_skb_got); call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -248,7 +248,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, } out: - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" = %d", ret); return ret; } @@ -289,7 +289,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb = call->tx_pending; call->tx_pending = NULL; - rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); copied = 0; do { @@ -336,7 +336,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (!skb) goto maybe_error; - rxrpc_new_skb(skb, rxrpc_skb_tx_new); + sp = rxrpc_skb(skb); + sp->rx_flags |= RXRPC_SKB_TX_BUFFER; + rxrpc_new_skb(skb, rxrpc_skb_new); _debug("ALLOC SEND %p", skb); @@ -346,7 +348,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb_reserve(skb, call->conn->security_size); skb->len += call->conn->security_size; - sp = rxrpc_skb(skb); sp->remain = chunk; if (sp->remain > skb_tailroom(skb)) sp->remain = skb_tailroom(skb); @@ -439,7 +440,7 @@ out: return ret; call_terminated: - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" = %d", call->error); return call->error; diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 9ad5045b7c2f..0348d2bf6f7d 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -14,7 +14,8 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) +#define is_tx_skb(skb) (rxrpc_skb(skb)->rx_flags & RXRPC_SKB_TX_BUFFER) +#define select_skb_count(skb) (is_tx_skb(skb) ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) /* * Note the allocation or reception of a socket buffer. @@ -22,8 +23,9 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); } /* @@ -33,8 +35,9 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { - int n = atomic_read(select_skb_count(op)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + int n = atomic_read(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); } } @@ -44,12 +47,23 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + int n = atomic_inc_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); skb_get(skb); } /* + * Note the dropping of a ref on a socket buffer by the core. + */ +void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&rxrpc_n_rx_skbs); + trace_rxrpc_skb(skb, op, 0, n, 0, here); +} + +/* * Note the destruction of a socket buffer. */ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) @@ -58,8 +72,9 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); kfree_skb(skb); } } @@ -72,9 +87,10 @@ void rxrpc_purge_queue(struct sk_buff_head *list) const void *here = __builtin_return_address(0); struct sk_buff *skb; while ((skb = skb_dequeue((list))) != NULL) { - int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); - trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, - refcount_read(&skb->users), n, here); + int n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, rxrpc_skb_purged, + refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); kfree_skb(skb); } } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index fd1f7e799e23..04b7bd4ec751 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -422,7 +422,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops); + return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct list_head *net_list) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 32ac04d77a45..2b43cacf82af 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -231,7 +231,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops); + return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct list_head *net_list) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 9b9288267a54..d3cfad88dc3a 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -714,7 +714,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops); + return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 33a1a7406e87..cdd6f3818097 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -939,7 +939,7 @@ static __net_init int ct_init_net(struct net *net) tn->labels = true; } - return tc_action_net_init(&tn->tn, &act_ct_ops); + return tc_action_net_init(net, &tn->tn, &act_ct_ops); } static void __net_exit ct_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 06ef74b74911..0dbcfd1dca7b 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -376,7 +376,7 @@ static __net_init int ctinfo_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ctinfo_net_id); - return tc_action_net_init(tn, &act_ctinfo_ops); + return tc_action_net_init(net, tn, &act_ctinfo_ops); } static void __net_exit ctinfo_exit_net(struct list_head *net_list) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 8f0140c6ca58..324f1d1f6d47 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -278,7 +278,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops); + return tc_action_net_init(net, tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 92ee853d43e6..3a31e241c647 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -890,7 +890,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops); + return tc_action_net_init(net, tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index ce2c30a591d2..214a03d405cf 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -61,12 +61,13 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, return 0; } -static void ipt_destroy_target(struct xt_entry_target *t) +static void ipt_destroy_target(struct xt_entry_target *t, struct net *net) { struct xt_tgdtor_param par = { .target = t->u.kernel.target, .targinfo = t->data, .family = NFPROTO_IPV4, + .net = net, }; if (par.target->destroy != NULL) par.target->destroy(&par); @@ -78,7 +79,7 @@ static void tcf_ipt_release(struct tc_action *a) struct tcf_ipt *ipt = to_ipt(a); if (ipt->tcfi_t) { - ipt_destroy_target(ipt->tcfi_t); + ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net); kfree(ipt->tcfi_t); } kfree(ipt->tcfi_tname); @@ -180,7 +181,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, spin_lock_bh(&ipt->tcf_lock); if (ret != ACT_P_CREATED) { - ipt_destroy_target(ipt->tcfi_t); + ipt_destroy_target(ipt->tcfi_t, net); kfree(ipt->tcfi_tname); kfree(ipt->tcfi_t); } @@ -350,7 +351,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops); + return tc_action_net_init(net, tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct list_head *net_list) @@ -399,7 +400,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops); + return tc_action_net_init(net, tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct list_head *net_list) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index be3f88dfc37e..9d1bf508075a 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -453,7 +453,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops); + return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 0f299e3b618c..e168df0e008a 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -375,7 +375,7 @@ static __net_init int mpls_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mpls_net_id); - return tc_action_net_init(tn, &act_mpls_ops); + return tc_action_net_init(net, tn, &act_mpls_ops); } static void __net_exit mpls_exit_net(struct list_head *net_list) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7b858c11b1b5..ea4c5359e7df 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -327,7 +327,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops); + return tc_action_net_init(net, tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct list_head *net_list) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17360c6faeaa..cdfaa79382a2 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -498,7 +498,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops); + return tc_action_net_init(net, tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct list_head *net_list) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 49cec3e64a4d..6315e0f8d26e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -371,7 +371,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops); + return tc_action_net_init(net, tn, &act_police_ops); } static void __net_exit police_exit_net(struct list_head *net_list) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 595308d60133..10229124a992 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -102,13 +102,17 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); s->rate = rate; s->psample_group_num = psample_group_num; - RCU_INIT_POINTER(s->psample_group, psample_group); + rcu_swap_protected(s->psample_group, psample_group, + lockdep_is_held(&s->tcf_lock)); if (tb[TCA_SAMPLE_TRUNC_SIZE]) { s->truncate = true; s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } spin_unlock_bh(&s->tcf_lock); + + if (psample_group) + psample_group_put(psample_group); if (goto_ch) tcf_chain_put_by_act(goto_ch); @@ -265,7 +269,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops); + return tc_action_net_init(net, tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct list_head *net_list) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 33aefa25b545..6120e56117ca 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -232,7 +232,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops); + return tc_action_net_init(net, tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct list_head *net_list) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 37dced00b63d..6a8d3337c577 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -336,7 +336,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops); + return tc_action_net_init(net, tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct list_head *net_list) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 7da3518e18ef..888437f97ba6 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -287,7 +287,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops); + return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct list_head *net_list) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 6d0debdc9b97..2f83a79f76aa 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -600,7 +600,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops); + return tc_action_net_init(net, tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct list_head *net_list) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a3c9eea1ee8a..287a30bf8930 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -334,7 +334,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops); + return tc_action_net_init(net, tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct list_head *net_list) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 732e109c3055..810645b5c086 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -181,11 +181,6 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) s64 credits; int len; - if (atomic64_read(&q->port_rate) == -1) { - WARN_ONCE(1, "cbs: dequeue() called with unknown port rate."); - return NULL; - } - if (q->credits < 0) { credits = timediff_to_credits(now - q->last, q->idleslope); @@ -303,11 +298,19 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; + int speed = SPEED_10; int port_rate = -1; + int err; + + err = __ethtool_get_link_ksettings(dev, &ecmd); + if (err < 0) + goto skip; + + if (ecmd.base.speed != SPEED_UNKNOWN) + speed = ecmd.base.speed; - if (!__ethtool_get_link_ksettings(dev, &ecmd) && - ecmd.base.speed != SPEED_UNKNOWN) - port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT; +skip: + port_rate = speed * 1000 * BYTES_PER_KBIT; atomic64_set(&q->port_rate, port_rate); netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n", diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 11c03cf4aa74..137db1cbde85 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -624,8 +624,12 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, err = skb_array_produce(q, skb); - if (unlikely(err)) - return qdisc_drop_cpu(skb, qdisc, to_free); + if (unlikely(err)) { + if (qdisc_is_percpu_stats(qdisc)) + return qdisc_drop_cpu(skb, qdisc, to_free); + else + return qdisc_drop(skb, qdisc, to_free); + } qdisc_update_stats_at_enqueue(qdisc, pkt_len); return NET_XMIT_SUCCESS; @@ -688,11 +692,14 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) kfree_skb(skb); } - for_each_possible_cpu(i) { - struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); + if (qdisc_is_percpu_stats(qdisc)) { + for_each_possible_cpu(i) { + struct gnet_stats_queue *q; - q->backlog = 0; - q->qlen = 0; + q = per_cpu_ptr(qdisc->cpu_qstats, i); + q->backlog = 0; + q->qlen = 0; + } } } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index e25d414ae12f..8d8bc2ec5cd6 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -477,11 +477,6 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) u32 gate_mask; int i; - if (atomic64_read(&q->picos_per_byte) == -1) { - WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); - return NULL; - } - rcu_read_lock(); entry = rcu_dereference(q->current_entry); /* if there's no entry, it means that the schedule didn't @@ -958,12 +953,20 @@ static void taprio_set_picos_per_byte(struct net_device *dev, struct taprio_sched *q) { struct ethtool_link_ksettings ecmd; - int picos_per_byte = -1; + int speed = SPEED_10; + int picos_per_byte; + int err; - if (!__ethtool_get_link_ksettings(dev, &ecmd) && - ecmd.base.speed != SPEED_UNKNOWN) - picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, - ecmd.base.speed * 1000 * 1000); + err = __ethtool_get_link_ksettings(dev, &ecmd); + if (err < 0) + goto skip; + + if (ecmd.base.speed != SPEED_UNKNOWN) + speed = ecmd.base.speed; + +skip: + picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, + speed * 1000 * 1000); atomic64_set(&q->picos_per_byte, picos_per_byte); netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", @@ -1249,6 +1252,10 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, */ q->clockid = -1; + spin_lock(&taprio_list_lock); + list_add(&q->taprio_list, &taprio_list); + spin_unlock(&taprio_list_lock); + if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; @@ -1266,10 +1273,6 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - spin_lock(&taprio_list_lock); - list_add(&q->taprio_list, &taprio_list); - spin_unlock(&taprio_list_lock); - for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *dev_queue; struct Qdisc *qdisc; |