summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/appletalk/aarp.c5
-rw-r--r--net/atm/signaling.c1
-rw-r--r--net/batman-adv/bat_iv_ogm.c167
-rw-r--r--net/batman-adv/bat_v_ogm.c59
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c120
-rw-r--r--net/batman-adv/distributed-arp-table.c3
-rw-r--r--net/batman-adv/fragmentation.c58
-rw-r--r--net/batman-adv/gateway_client.c4
-rw-r--r--net/batman-adv/main.c1
-rw-r--r--net/batman-adv/mesh-interface.c1
-rw-r--r--net/batman-adv/originator.c4
-rw-r--r--net/batman-adv/tp_meter.c229
-rw-r--r--net/batman-adv/tp_meter.h1
-rw-r--r--net/batman-adv/translation-table.c55
-rw-r--r--net/batman-adv/tvlv.c28
-rw-r--r--net/batman-adv/tvlv.h2
-rw-r--r--net/batman-adv/types.h63
-rw-r--r--net/bluetooth/af_bluetooth.c97
-rw-r--r--net/bluetooth/bnep/core.c2
-rw-r--r--net/bluetooth/hci_sync.c6
-rw-r--r--net/bluetooth/iso.c14
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/l2cap_sock.c51
-rw-r--r--net/bluetooth/mgmt.c6
-rw-r--r--net/bluetooth/rfcomm/sock.c9
-rw-r--r--net/bluetooth/sco.c9
-rw-r--r--net/bridge/br_multicast.c22
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/bridge/br_netlink.c10
-rw-r--r--net/bridge/netfilter/ebtable_broute.c14
-rw-r--r--net/bridge/netfilter/ebtable_filter.c14
-rw-r--r--net/bridge/netfilter/ebtable_nat.c12
-rw-r--r--net/bridge/netfilter/ebtables.c71
-rw-r--r--net/ceph/auth_x.c5
-rw-r--r--net/ceph/crush/crush.c6
-rw-r--r--net/ceph/osdmap.c17
-rw-r--r--net/core/bpf_sk_storage.c14
-rw-r--r--net/core/dev.c21
-rw-r--r--net/core/devmem.c11
-rw-r--r--net/core/failover.c6
-rw-r--r--net/core/filter.c55
-rw-r--r--net/core/gro.c7
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c11
-rw-r--r--net/core/skmsg.c9
-rw-r--r--net/core/sock_map.c39
-rw-r--r--net/ethtool/bitset.c8
-rw-r--r--net/ethtool/phy.c36
-rw-r--r--net/hsr/hsr_framereg.c9
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c53
-rw-r--r--net/ipv4/netfilter/arptable_filter.c27
-rw-r--r--net/ipv4/netfilter/ip_tables.c59
-rw-r--r--net/ipv4/netfilter/iptable_filter.c27
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c29
-rw-r--r--net/ipv4/netfilter/iptable_nat.c6
-rw-r--r--net/ipv4/netfilter/iptable_raw.c26
-rw-r--r--net/ipv4/netfilter/iptable_security.c27
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_ao.c3
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp_offload.c22
-rw-r--r--net/ipv6/exthdrs.c21
-rw-r--r--net/ipv6/ip6_flowlabel.c46
-rw-r--r--net/ipv6/netfilter/ip6_tables.c56
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c4
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c26
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c27
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c6
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c24
-rw-r--r--net/ipv6/netfilter/ip6table_security.c27
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/mac80211/cfg.c5
-rw-r--r--net/mac80211/mlme.c5
-rw-r--r--net/mac80211/parse.c105
-rw-r--r--net/mac80211/rx.c6
-rw-r--r--net/mptcp/bpf.c2
-rw-r--r--net/mptcp/pm.c56
-rw-r--r--net/mptcp/protocol.c25
-rw-r--r--net/mptcp/sockopt.c10
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h57
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c187
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c7
-rw-r--r--net/netfilter/nf_conntrack_expect.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c12
-rw-r--r--net/netfilter/nf_conntrack_helper.c18
-rw-r--r--net/netfilter/nf_conntrack_netlink.c21
-rw-r--r--net/netfilter/nf_conntrack_sip.c10
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_inner.c3
-rw-r--r--net/netfilter/x_tables.c177
-rw-r--r--net/netlink/genetlink.c8
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/phonet/pep.c19
-rw-r--r--net/rds/message.c1
-rw-r--r--net/rds/tcp.c9
-rw-r--r--net/rxrpc/ar-internal.h14
-rw-r--r--net/rxrpc/call_event.c20
-rw-r--r--net/rxrpc/call_object.c2
-rw-r--r--net/rxrpc/conn_event.c29
-rw-r--r--net/rxrpc/insecure.c8
-rw-r--r--net/rxrpc/recvmsg.c68
-rw-r--r--net/rxrpc/rxgk.c160
-rw-r--r--net/rxrpc/rxgk_app.c46
-rw-r--r--net/rxrpc/rxgk_common.h66
-rw-r--r--net/rxrpc/rxkad.c115
-rw-r--r--net/sched/sch_cbs.c16
-rw-r--r--net/sched/sch_dualpi2.c4
-rw-r--r--net/sctp/socket.c9
-rw-r--r--net/shaper/shaper.c224
-rw-r--r--net/shaper/shaper_nl_gen.c7
-rw-r--r--net/shaper/shaper_nl_gen.h2
-rw-r--r--net/smc/af_smc.c20
-rw-r--r--net/smc/smc_tracepoint.h2
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/tls/tls_sw.c46
-rw-r--r--net/unix/af_unix.c11
-rw-r--r--net/vmw_vsock/virtio_transport_common.c159
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/scan.c3
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/xdp/xskmap.c4
139 files changed, 2421 insertions, 1376 deletions
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index e7315c01a299..30493ea3c010 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -542,6 +542,11 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
struct ddpehdr *ddp = (struct ddpehdr *)skb->data;
int ft = 2;
+ if (!at) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
/*
* Compressible ?
*
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 358fbe5e4d1d..b991d937205a 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -179,6 +179,7 @@ as_indicate_complete:
break;
default:
pr_alert("bad message type %d\n", (int)msg->type);
+ dev_kfree_skb(skb);
/* Paired with find_get_vcc(msg->vcc) above */
sock_put(sk);
return -EINVAL;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f28e9cbf8ad5..b8b1b997960a 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -173,19 +173,12 @@ free_orig_node_hash:
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr,
- struct batadv_orig_node *orig_node,
- struct batadv_orig_node *orig_neigh)
+ struct batadv_orig_node *orig_node)
{
struct batadv_neigh_node *neigh_node;
neigh_node = batadv_neigh_node_get_or_create(orig_node,
hard_iface, neigh_addr);
- if (!neigh_node)
- goto out;
-
- neigh_node->orig_node = orig_neigh;
-
-out:
return neigh_node;
}
@@ -231,6 +224,8 @@ static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
hard_iface->bat_iv.ogm_buff = NULL;
mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+ cancel_delayed_work_sync(&hard_iface->bat_iv.reschedule_work);
}
static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
@@ -335,7 +330,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface);
const char *fwd_str;
u8 packet_num;
- s16 buff_pos;
+ int buff_pos;
struct batadv_ogm_packet *batadv_ogm_packet;
struct sk_buff *skb;
u8 *packet_pos;
@@ -543,8 +538,10 @@ out:
* @if_incoming: interface where the packet was received
* @if_outgoing: interface for which the retransmission should be considered
* @own_packet: true if it is a self-generated ogm
+ *
+ * Return: whether forward packet was scheduled
*/
-static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
+static bool batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
int packet_len, unsigned long send_time,
bool direct_link,
struct batadv_hard_iface *if_incoming,
@@ -568,13 +565,13 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
skb = netdev_alloc_skb_ip_align(NULL, skb_size);
if (!skb)
- return;
+ return false;
forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
queue_left, bat_priv, skb);
if (!forw_packet_aggr) {
kfree_skb(skb);
- return;
+ return false;
}
forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
@@ -597,6 +594,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
batadv_iv_send_outstanding_bat_ogm_packet);
batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time);
+
+ return true;
}
/* aggregate a new packet into the existing ogm packet */
@@ -624,8 +623,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
* @if_outgoing: interface for which the retransmission should be considered
* @own_packet: true if it is a self-generated ogm
* @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * Return: whether forward packet was scheduled
*/
-static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
+static bool batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
unsigned char *packet_buff,
int packet_len,
struct batadv_hard_iface *if_incoming,
@@ -677,14 +678,16 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
if (!own_packet && atomic_read(&bat_priv->aggregated_ogms))
send_time += max_aggregation_jiffies;
- batadv_iv_ogm_aggregate_new(packet_buff, packet_len,
- send_time, direct_link,
- if_incoming, if_outgoing,
- own_packet);
+ return batadv_iv_ogm_aggregate_new(packet_buff, packet_len,
+ send_time, direct_link,
+ if_incoming, if_outgoing,
+ own_packet);
} else {
batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff,
packet_len, direct_link);
spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+ return true;
}
}
@@ -797,6 +800,9 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
u32 seqno;
u16 tvlv_len = 0;
unsigned long send_time;
+ bool reschedule = false;
+ bool scheduled;
+ int ret;
lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
@@ -820,9 +826,15 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
* appended as it may alter the tt tvlv container
*/
batadv_tt_local_commit_changes(bat_priv);
- tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
- ogm_buff_len,
- BATADV_OGM_HLEN);
+ ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
+ ogm_buff_len,
+ BATADV_OGM_HLEN);
+ if (ret < 0) {
+ reschedule = true;
+ goto out;
+ }
+
+ tvlv_len = ret;
}
batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff);
@@ -841,8 +853,11 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
/* OGMs from secondary interfaces are only scheduled on their
* respective interfaces.
*/
- batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len,
- hard_iface, hard_iface, 1, send_time);
+ scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len,
+ hard_iface, hard_iface, 1, send_time);
+ if (!scheduled)
+ reschedule = true;
+
goto out;
}
@@ -854,15 +869,28 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
if (!kref_get_unless_zero(&tmp_hard_iface->refcount))
continue;
- batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
- *ogm_buff_len, hard_iface,
- tmp_hard_iface, 1, send_time);
-
+ scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
+ *ogm_buff_len, hard_iface,
+ tmp_hard_iface, 1, send_time);
batadv_hardif_put(tmp_hard_iface);
+
+ if (!scheduled && tmp_hard_iface == hard_iface)
+ reschedule = true;
}
rcu_read_unlock();
out:
+ if (reschedule) {
+ /* there was a failure scheduling the own forward packet.
+ * as result, the batadv_iv_send_outstanding_bat_ogm_packet()
+ * work item is no longer scheduled. it is therefore necessary
+ * to reschedule it manually
+ */
+ queue_delayed_work(batadv_event_workqueue,
+ &hard_iface->bat_iv.reschedule_work,
+ msecs_to_jiffies(atomic_read(&bat_priv->orig_interval)));
+ }
+
batadv_hardif_put(primary_if);
}
@@ -877,6 +905,17 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
+static void batadv_iv_ogm_reschedule(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct batadv_hard_iface *hard_iface;
+
+ hard_iface = container_of(delayed_work,
+ struct batadv_hard_iface,
+ bat_iv.reschedule_work);
+ batadv_iv_ogm_schedule(hard_iface);
+}
+
/**
* batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface
* @orig_node: originator which reproadcasted the OGMs directly
@@ -907,6 +946,31 @@ static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_iv_ogm_neigh_ifinfo_sum() - Get bcast_own sum for a last-hop neighbor
+ * @bat_priv: the bat priv with all the mesh interface information
+ * @neigh_node: last-hop neighbor of an originator
+ *
+ * Return: Number of replied (rebroadcasted) OGMs for the originator currently
+ * announced by the neighbor. Returns 0 if the neighbor's originator entry is
+ * not available anymore.
+ */
+static u8 batadv_iv_ogm_neigh_ifinfo_sum(struct batadv_priv *bat_priv,
+ const struct batadv_neigh_node *neigh_node)
+{
+ struct batadv_orig_node *orig_neigh;
+ u8 sum;
+
+ orig_neigh = batadv_orig_hash_find(bat_priv, neigh_node->addr);
+ if (!orig_neigh)
+ return 0;
+
+ sum = batadv_iv_orig_ifinfo_sum(orig_neigh, neigh_node->if_incoming);
+ batadv_orig_node_put(orig_neigh);
+
+ return sum;
+}
+
+/**
* batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an
* originator
* @bat_priv: the bat priv with all the mesh interface information
@@ -975,17 +1039,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
}
if (!neigh_node) {
- struct batadv_orig_node *orig_tmp;
-
- orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source);
- if (!orig_tmp)
- goto unlock;
-
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
ethhdr->h_source,
- orig_node, orig_tmp);
-
- batadv_orig_node_put(orig_tmp);
+ orig_node);
if (!neigh_node)
goto unlock;
} else {
@@ -1037,10 +1093,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
*/
if (router_ifinfo &&
neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) {
- sum_orig = batadv_iv_orig_ifinfo_sum(router->orig_node,
- router->if_incoming);
- sum_neigh = batadv_iv_orig_ifinfo_sum(neigh_node->orig_node,
- neigh_node->if_incoming);
+ sum_orig = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv, router);
+ sum_neigh = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv,
+ neigh_node);
if (sum_orig >= sum_neigh)
goto out;
}
@@ -1106,7 +1161,6 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
if (!neigh_node)
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
orig_neigh_node->orig,
- orig_neigh_node,
orig_neigh_node);
if (!neigh_node)
@@ -1303,6 +1357,32 @@ out:
}
/**
+ * batadv_orig_to_direct_router() - get direct next hop neighbor to an orig address
+ * @bat_priv: the bat priv with all the mesh interface information
+ * @orig_addr: the originator MAC address to search the best next hop router for
+ * @if_outgoing: the interface where the OGM should be sent to
+ *
+ * Return: A neighbor node which is the best router towards the given originator
+ * address. Bonding candidates are ignored.
+ */
+static struct batadv_neigh_node *
+batadv_orig_to_direct_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ if (!orig_node)
+ return NULL;
+
+ neigh_node = batadv_orig_router_get(orig_node, if_outgoing);
+ batadv_orig_node_put(orig_node);
+
+ return neigh_node;
+}
+
+/**
* batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing
* interface
* @skb: the skb containing the OGM
@@ -1372,8 +1452,9 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
router = batadv_orig_router_get(orig_node, if_outgoing);
if (router) {
- router_router = batadv_orig_router_get(router->orig_node,
- if_outgoing);
+ router_router = batadv_orig_to_direct_router(bat_priv,
+ router->addr,
+ if_outgoing);
router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
}
@@ -2227,6 +2308,8 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface)
{
+ INIT_DELAYED_WORK(&hard_iface->bat_iv.reschedule_work, batadv_iv_ogm_reschedule);
+
/* begin scheduling originator messages on that interface */
batadv_iv_ogm_schedule(hard_iface);
}
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index e3870492dab7..d66ca77b1aaa 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -113,14 +113,14 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
/**
* batadv_v_ogm_send_to_if() - send a batman ogm using a given interface
+ * @bat_priv: the bat priv with all the mesh interface information
* @skb: the OGM to send
* @hard_iface: the interface to use to send the OGM
*/
-static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
+static void batadv_v_ogm_send_to_if(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
struct batadv_hard_iface *hard_iface)
{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface);
-
if (hard_iface->if_status != BATADV_IF_ACTIVE) {
kfree_skb(skb);
return;
@@ -187,6 +187,7 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
/**
* batadv_v_ogm_aggr_send() - flush & send aggregation queue
+ * @bat_priv: the bat priv with all the mesh interface information
* @hard_iface: the interface with the aggregation queue to flush
*
* Aggregates all OGMv2 packets currently in the aggregation queue into a
@@ -196,7 +197,8 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
*
* Caller needs to hold the hard_iface->bat_v.aggr_list.lock.
*/
-static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
+static void batadv_v_ogm_aggr_send(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface)
{
unsigned int aggr_len = hard_iface->bat_v.aggr_len;
struct sk_buff *skb_aggr;
@@ -226,27 +228,32 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
consume_skb(skb);
}
- batadv_v_ogm_send_to_if(skb_aggr, hard_iface);
+ batadv_v_ogm_send_to_if(bat_priv, skb_aggr, hard_iface);
}
/**
* batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface
+ * @bat_priv: the bat priv with all the mesh interface information
* @skb: the OGM to queue
* @hard_iface: the interface to queue the OGM on
*/
-static void batadv_v_ogm_queue_on_if(struct sk_buff *skb,
+static void batadv_v_ogm_queue_on_if(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
struct batadv_hard_iface *hard_iface)
{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface);
+ if (hard_iface->mesh_iface != bat_priv->mesh_iface) {
+ kfree_skb(skb);
+ return;
+ }
if (!atomic_read(&bat_priv->aggregated_ogms)) {
- batadv_v_ogm_send_to_if(skb, hard_iface);
+ batadv_v_ogm_send_to_if(bat_priv, skb, hard_iface);
return;
}
spin_lock_bh(&hard_iface->bat_v.aggr_list.lock);
if (!batadv_v_ogm_queue_left(skb, hard_iface))
- batadv_v_ogm_aggr_send(hard_iface);
+ batadv_v_ogm_aggr_send(bat_priv, hard_iface);
hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb);
__skb_queue_tail(&hard_iface->bat_v.aggr_list, skb);
@@ -262,10 +269,10 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
struct batadv_hard_iface *hard_iface;
struct batadv_ogm2_packet *ogm_packet;
struct sk_buff *skb, *skb_tmp;
- unsigned char *ogm_buff;
+ unsigned char **ogm_buff;
struct list_head *iter;
- int ogm_buff_len;
- u16 tvlv_len = 0;
+ int *ogm_buff_len;
+ u16 tvlv_len;
int ret;
lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex);
@@ -273,25 +280,27 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
goto out;
- ogm_buff = bat_priv->bat_v.ogm_buff;
- ogm_buff_len = bat_priv->bat_v.ogm_buff_len;
+ ogm_buff = &bat_priv->bat_v.ogm_buff;
+ ogm_buff_len = &bat_priv->bat_v.ogm_buff_len;
+
/* tt changes have to be committed before the tvlv data is
* appended as it may alter the tt tvlv container
*/
batadv_tt_local_commit_changes(bat_priv);
- tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff,
- &ogm_buff_len,
- BATADV_OGM2_HLEN);
+ ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
+ ogm_buff_len,
+ BATADV_OGM2_HLEN);
+ if (ret < 0)
+ goto reschedule;
- bat_priv->bat_v.ogm_buff = ogm_buff;
- bat_priv->bat_v.ogm_buff_len = ogm_buff_len;
+ tvlv_len = ret;
- skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len);
+ skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + *ogm_buff_len);
if (!skb)
goto reschedule;
skb_reserve(skb, ETH_HLEN);
- skb_put_data(skb, ogm_buff, ogm_buff_len);
+ skb_put_data(skb, *ogm_buff, *ogm_buff_len);
ogm_packet = (struct batadv_ogm2_packet *)skb->data;
ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
@@ -343,7 +352,7 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
break;
}
- batadv_v_ogm_queue_on_if(skb_tmp, hard_iface);
+ batadv_v_ogm_queue_on_if(bat_priv, skb_tmp, hard_iface);
batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
@@ -383,12 +392,14 @@ void batadv_v_ogm_aggr_work(struct work_struct *work)
{
struct batadv_hard_iface_bat_v *batv;
struct batadv_hard_iface *hard_iface;
+ struct batadv_priv *bat_priv;
batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work);
hard_iface = container_of(batv, struct batadv_hard_iface, bat_v);
+ bat_priv = netdev_priv(hard_iface->mesh_iface);
spin_lock_bh(&hard_iface->bat_v.aggr_list.lock);
- batadv_v_ogm_aggr_send(hard_iface);
+ batadv_v_ogm_aggr_send(bat_priv, hard_iface);
spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock);
batadv_v_ogm_start_queue_timer(hard_iface);
@@ -578,7 +589,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
if_outgoing->net_dev->name, ntohl(ogm_forward->throughput),
ogm_forward->ttl, if_incoming->net_dev->name);
- batadv_v_ogm_queue_on_if(skb, if_outgoing);
+ batadv_v_ogm_queue_on_if(bat_priv, skb, if_outgoing);
out:
batadv_orig_ifinfo_put(orig_ifinfo);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 51fe028b9088..ffe854018bd3 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -318,8 +318,8 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
if (claim->backbone_gw != backbone_gw)
continue;
- batadv_claim_put(claim);
hlist_del_rcu(&claim->hash_entry);
+ batadv_claim_put(claim);
}
spin_unlock_bh(list_lock);
}
@@ -356,12 +356,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac,
sizeof(local_claim_dest));
local_claim_dest.type = claimtype;
- mesh_iface = primary_if->mesh_iface;
+ mesh_iface = READ_ONCE(primary_if->mesh_iface);
+ if (!mesh_iface)
+ goto out;
skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
/* IP DST: 0.0.0.0 */
zeroip,
- primary_if->mesh_iface,
+ mesh_iface,
/* IP SRC: 0.0.0.0 */
zeroip,
/* Ethernet DST: Broadcast */
@@ -514,8 +516,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig,
entry->crc = BATADV_BLA_CRC_INIT;
entry->bat_priv = bat_priv;
spin_lock_init(&entry->crc_lock);
- atomic_set(&entry->request_sent, 0);
- atomic_set(&entry->wait_periods, 0);
+ entry->state = BATADV_BLA_BACKBONE_GW_SYNCED;
+ entry->wait_periods = 0;
ether_addr_copy(entry->orig, orig);
INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
kref_init(&entry->refcount);
@@ -544,9 +546,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig,
batadv_bla_send_announce(bat_priv, entry);
/* this will be decreased in the worker thread */
- atomic_inc(&entry->request_sent);
- atomic_set(&entry->wait_periods, BATADV_BLA_WAIT_PERIODS);
- atomic_inc(&bat_priv->bla.num_requests);
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (entry->state == BATADV_BLA_BACKBONE_GW_SYNCED) {
+ entry->state = BATADV_BLA_BACKBONE_GW_UNSYNCED;
+ entry->wait_periods = BATADV_BLA_WAIT_PERIODS;
+ atomic_inc(&bat_priv->bla.num_requests);
+ }
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
}
return entry;
@@ -649,10 +655,12 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
backbone_gw->vid, BATADV_CLAIM_TYPE_REQUEST);
/* no local broadcasts should be sent or received, for now. */
- if (!atomic_read(&backbone_gw->request_sent)) {
+ spin_lock_bh(&backbone_gw->bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_SYNCED) {
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_UNSYNCED;
atomic_inc(&backbone_gw->bat_priv->bla.num_requests);
- atomic_set(&backbone_gw->request_sent, 1);
}
+ spin_unlock_bh(&backbone_gw->bat_priv->bla.num_requests_lock);
}
/**
@@ -723,6 +731,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
if (unlikely(hash_added != 0)) {
/* only local changes happened. */
+ batadv_backbone_gw_put(backbone_gw);
kfree(claim);
return;
}
@@ -872,10 +881,12 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
/* if we have sent a request and the crc was OK,
* we can allow traffic again.
*/
- if (atomic_read(&backbone_gw->request_sent)) {
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) {
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED;
atomic_dec(&backbone_gw->bat_priv->bla.num_requests);
- atomic_set(&backbone_gw->request_sent, 0);
}
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
}
batadv_backbone_gw_put(backbone_gw);
@@ -1223,6 +1234,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
struct hlist_head *head;
struct batadv_hashtable *hash;
spinlock_t *list_lock; /* protects write access to the hash lists */
+ bool purged;
int i;
hash = bat_priv->bla.backbone_hash;
@@ -1233,30 +1245,49 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
head = &hash->table[i];
list_lock = &hash->list_locks[i];
- spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(backbone_gw, node_tmp,
- head, hash_entry) {
- if (now)
- goto purge_now;
- if (!batadv_has_timed_out(backbone_gw->lasttime,
- BATADV_BLA_BACKBONE_TIMEOUT))
- continue;
+ do {
+ purged = false;
+
+ spin_lock_bh(list_lock);
+ hlist_for_each_entry_safe(backbone_gw, node_tmp,
+ head, hash_entry) {
+ if (now)
+ goto purge_now;
+ if (!batadv_has_timed_out(backbone_gw->lasttime,
+ BATADV_BLA_BACKBONE_TIMEOUT))
+ continue;
- batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
- "%s(): backbone gw %pM timed out\n",
- __func__, backbone_gw->orig);
+ batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
+ "%s(): backbone gw %pM timed out\n",
+ __func__, backbone_gw->orig);
purge_now:
- /* don't wait for the pending request anymore */
- if (atomic_read(&backbone_gw->request_sent))
- atomic_dec(&bat_priv->bla.num_requests);
+ purged = true;
- batadv_bla_del_backbone_claims(backbone_gw);
+ /* don't wait for the pending request anymore */
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED)
+ atomic_dec(&bat_priv->bla.num_requests);
- hlist_del_rcu(&backbone_gw->hash_entry);
- batadv_backbone_gw_put(backbone_gw);
- }
- spin_unlock_bh(list_lock);
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_STOPPED;
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
+
+ batadv_bla_del_backbone_claims(backbone_gw);
+
+ hlist_del_rcu(&backbone_gw->hash_entry);
+ break;
+ }
+ spin_unlock_bh(list_lock);
+
+ if (purged) {
+ /* reference for pending report_work */
+ if (cancel_work_sync(&backbone_gw->report_work))
+ batadv_backbone_gw_put(backbone_gw);
+
+ /* reference for hash_entry */
+ batadv_backbone_gw_put(backbone_gw);
+ }
+ } while (purged);
}
}
@@ -1288,6 +1319,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
rcu_read_lock();
hlist_for_each_entry_rcu(claim, head, hash_entry) {
+ /* only purge claims not currently in the process of being released.
+ * Such claims could otherwise have a NULL-ptr backbone_gw set because
+ * they already went through batadv_claim_release()
+ */
+ if (!kref_get_unless_zero(&claim->refcount))
+ continue;
+
backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
if (now)
goto purge_now;
@@ -1313,6 +1351,7 @@ purge_now:
claim->addr, claim->vid);
skip:
batadv_backbone_gw_put(backbone_gw);
+ batadv_claim_put(claim);
}
rcu_read_unlock();
}
@@ -1483,7 +1522,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
batadv_bla_send_loopdetect(bat_priv,
backbone_gw);
- /* request_sent is only set after creation to avoid
+ /* state is only set to unsynced after creation to avoid
* problems when we are not yet known as backbone gw
* in the backbone.
*
@@ -1492,14 +1531,21 @@ static void batadv_bla_periodic_work(struct work_struct *work)
* some grace time.
*/
- if (atomic_read(&backbone_gw->request_sent) == 0)
- continue;
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state != BATADV_BLA_BACKBONE_GW_UNSYNCED)
+ goto unlock_next;
- if (!atomic_dec_and_test(&backbone_gw->wait_periods))
- continue;
+ if (backbone_gw->wait_periods > 0)
+ backbone_gw->wait_periods--;
+
+ if (backbone_gw->wait_periods > 0)
+ goto unlock_next;
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED;
atomic_dec(&backbone_gw->bat_priv->bla.num_requests);
- atomic_set(&backbone_gw->request_sent, 0);
+
+unlock_next:
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 3efc4cf50b46..0a8bd95e2f99 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -696,6 +696,9 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv,
goto free_orig;
tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
+ if (!tmp_skb)
+ goto free_neigh;
+
if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
cand[i].orig_node,
packet_subtype)) {
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index f4e45cc25816..e9553db42349 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -17,6 +17,7 @@
#include <linux/lockdep.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
+#include <linux/overflow.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -80,9 +81,9 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
*
* Return: the maximum size of payload that can be fragmented.
*/
-static int batadv_frag_size_limit(void)
+static size_t batadv_frag_size_limit(void)
{
- int limit = BATADV_FRAG_MAX_FRAG_SIZE;
+ size_t limit = BATADV_FRAG_MAX_FRAG_SIZE;
limit -= sizeof(struct batadv_frag_packet);
limit *= BATADV_FRAG_MAX_FRAGMENTS;
@@ -143,7 +144,9 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
struct batadv_frag_packet *frag_packet;
u8 bucket;
u16 seqno, hdr_size = sizeof(struct batadv_frag_packet);
+ bool overflow = false;
bool ret = false;
+ size_t data_len;
/* Linearize packet to avoid linearizing 16 packets in a row when doing
* the later merge. Non-linear merge should be added to remove this
@@ -153,6 +156,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
goto err;
frag_packet = (struct batadv_frag_packet *)skb->data;
+ data_len = skb->len - hdr_size;
seqno = ntohs(frag_packet->seqno);
bucket = seqno % BATADV_FRAG_BUFFER_COUNT;
@@ -171,7 +175,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
spin_lock_bh(&chain->lock);
if (batadv_frag_init_chain(chain, seqno)) {
hlist_add_head(&frag_entry_new->list, &chain->fragment_list);
- chain->size = skb->len - hdr_size;
+ chain->size = data_len;
chain->timestamp = jiffies;
chain->total_size = ntohs(frag_packet->total_size);
ret = true;
@@ -188,7 +192,11 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
if (frag_entry_curr->no < frag_entry_new->no) {
hlist_add_before(&frag_entry_new->list,
&frag_entry_curr->list);
- chain->size += skb->len - hdr_size;
+
+ if (check_add_overflow(chain->size, data_len,
+ &chain->size))
+ overflow = true;
+
chain->timestamp = jiffies;
ret = true;
goto out;
@@ -201,13 +209,16 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
/* Reached the end of the list, so insert after 'frag_entry_last'. */
if (likely(frag_entry_last)) {
hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list);
- chain->size += skb->len - hdr_size;
+
+ if (check_add_overflow(chain->size, data_len, &chain->size))
+ overflow = true;
+
chain->timestamp = jiffies;
ret = true;
}
out:
- if (chain->size > batadv_frag_size_limit() ||
+ if (overflow || chain->size > batadv_frag_size_limit() ||
chain->total_size != ntohs(frag_packet->total_size) ||
chain->total_size > batadv_frag_size_limit()) {
/* Clear chain if total size of either the list or the packet
@@ -294,6 +305,31 @@ free:
}
/**
+ * batadv_skb_is_frag() - check if newly merged skb contains unicast fragment
+ * @skb: newly merged skb
+ *
+ * Return: if newly merged skb is of type BATADV_UNICAST_FRAG
+ */
+static bool batadv_skb_is_frag(struct sk_buff *skb)
+{
+ struct batadv_ogm_packet *batadv_ogm_packet;
+
+ /* packet should hold at least type and version */
+ if (unlikely(!pskb_may_pull(skb, 2)))
+ return false;
+
+ batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data;
+
+ if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION)
+ return false;
+
+ if (batadv_ogm_packet->packet_type != BATADV_UNICAST_FRAG)
+ return false;
+
+ return true;
+}
+
+/**
* batadv_frag_skb_buffer() - buffer fragment for later merge
* @skb: skb to buffer
* @orig_node_src: originator that the skb is received from
@@ -326,6 +362,16 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
if (!skb_out)
goto out_err;
+ /* fragment in fragment is not allowed. otherwise it is possible
+ * to exhaust the stack when receiving a matryoshka-style
+ * "fragments in a fragment packet"
+ */
+ if (batadv_skb_is_frag(skb_out)) {
+ kfree_skb(skb_out);
+ skb_out = NULL;
+ goto out_err;
+ }
+
out:
ret = true;
out_err:
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 51e9c081a2a4..a9d0346e8332 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -478,10 +478,14 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
*/
void batadv_gw_node_free(struct batadv_priv *bat_priv)
{
+ struct batadv_gw_node *curr_gw;
struct batadv_gw_node *gw_node;
struct hlist_node *node_tmp;
spin_lock_bh(&bat_priv->gw.list_lock);
+ curr_gw = rcu_replace_pointer(bat_priv->gw.curr_gw, NULL, true);
+ batadv_gw_node_put(curr_gw);
+
hlist_for_each_entry_safe(gw_node, node_tmp,
&bat_priv->gw.gateway_list, list) {
hlist_del_init_rcu(&gw_node->list);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3a35aadd8b41..a4d33ee0fda5 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -249,6 +249,7 @@ void batadv_mesh_free(struct net_device *mesh_iface)
atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
batadv_purge_outstanding_packets(bat_priv, NULL);
+ batadv_tp_stop_all(bat_priv);
batadv_gw_node_free(bat_priv);
diff --git a/net/batman-adv/mesh-interface.c b/net/batman-adv/mesh-interface.c
index 56ca1c1b83f2..e7aa45bc6b7a 100644
--- a/net/batman-adv/mesh-interface.c
+++ b/net/batman-adv/mesh-interface.c
@@ -787,6 +787,7 @@ static int batadv_meshif_init_late(struct net_device *dev)
atomic_set(&bat_priv->tt.ogm_append_cnt, 0);
#ifdef CONFIG_BATMAN_ADV_BLA
atomic_set(&bat_priv->bla.num_requests, 0);
+ spin_lock_init(&bat_priv->bla.num_requests_lock);
#endif
atomic_set(&bat_priv->tp_num, 0);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index b3468ccab535..ad4921b659d9 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -835,8 +835,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
orig_node = container_of(rcu, struct batadv_orig_node, rcu);
- batadv_mcast_purge_orig(orig_node);
-
batadv_frag_purge_orig(orig_node, NULL);
kfree(orig_node->tt_buff);
@@ -887,6 +885,8 @@ void batadv_orig_node_release(struct kref *ref)
}
spin_unlock_bh(&orig_node->vlan_list_lock);
+ batadv_mcast_purge_orig(orig_node);
+
call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 2e42f6b348c8..0fc4ca78e84e 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -8,10 +8,12 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bug.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/completion.h>
#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/etherdevice.h>
@@ -253,6 +255,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
* batadv_tp_list_find() - find a tp_vars object in the global list
* @bat_priv: the bat priv with all the mesh interface information
* @dst: the other endpoint MAC address to look for
+ * @role: role of the session
*
* Look for a tp_vars object matching dst as end_point and return it after
* having increment the refcounter. Return NULL is not found
@@ -260,7 +263,8 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
* Return: matching tp_vars or NULL when no tp_vars with @dst was found
*/
static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
- const u8 *dst)
+ const u8 *dst,
+ enum batadv_tp_meter_role role)
{
struct batadv_tp_vars *pos, *tp_vars = NULL;
@@ -269,6 +273,9 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
if (!batadv_compare_eth(pos->other_end, dst))
continue;
+ if (pos->role != role)
+ continue;
+
/* most of the time this function is invoked during the normal
* process..it makes sens to pay more when the session is
* finished and to speed the process up during the measurement
@@ -285,11 +292,32 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
}
/**
+ * batadv_tp_list_active() - check if session from/to destination is ongoing
+ * @bat_priv: the bat priv with all the mesh interface information
+ * @dst: the other endpoint MAC address to look for
+ *
+ * Return: if matching session with @dst was found
+ */
+static bool batadv_tp_list_active(struct batadv_priv *bat_priv, const u8 *dst)
+ __must_hold(&bat_priv->tp_list_lock)
+{
+ struct batadv_tp_vars *tp_vars;
+
+ hlist_for_each_entry_rcu(tp_vars, &bat_priv->tp_list, list) {
+ if (batadv_compare_eth(tp_vars->other_end, dst))
+ return true;
+ }
+
+ return false;
+}
+
+/**
* batadv_tp_list_find_session() - find tp_vars session object in the global
* list
* @bat_priv: the bat priv with all the mesh interface information
* @dst: the other endpoint MAC address to look for
* @session: session identifier
+ * @role: role of the session
*
* Look for a tp_vars object matching dst as end_point, session as tp meter
* session and return it after having increment the refcounter. Return NULL
@@ -299,7 +327,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
*/
static struct batadv_tp_vars *
batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
- const u8 *session)
+ const u8 *session, enum batadv_tp_meter_role role)
{
struct batadv_tp_vars *pos, *tp_vars = NULL;
@@ -311,6 +339,9 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
if (memcmp(pos->session, session, sizeof(pos->session)) != 0)
continue;
+ if (pos->role != role)
+ continue;
+
/* most of the time this function is invoked during the normal
* process..it makes sense to pay more when the session is
* finished and to speed the process up during the measurement
@@ -365,32 +396,41 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
}
/**
- * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer
- * @bat_priv: the bat priv with all the mesh interface information
- * @tp_vars: the private data of the current TP meter session to cleanup
+ * batadv_tp_list_detach() - remove tp session from mesh session list once
+ * @tp_vars: the private data of the current TP meter session
*/
-static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
- struct batadv_tp_vars *tp_vars)
+static void batadv_tp_list_detach(struct batadv_tp_vars *tp_vars)
{
- cancel_delayed_work(&tp_vars->finish_work);
+ bool detached = false;
spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
- hlist_del_rcu(&tp_vars->list);
+ if (!hlist_unhashed(&tp_vars->list)) {
+ hlist_del_init_rcu(&tp_vars->list);
+ detached = true;
+ }
spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+ if (!detached)
+ return;
+
+ atomic_dec(&tp_vars->bat_priv->tp_num);
+
/* drop list reference */
batadv_tp_vars_put(tp_vars);
+}
- atomic_dec(&tp_vars->bat_priv->tp_num);
+/**
+ * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer
+ * @tp_vars: the private data of the current TP meter session to cleanup
+ */
+static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars)
+{
+ cancel_delayed_work_sync(&tp_vars->finish_work);
+
+ batadv_tp_list_detach(tp_vars);
/* kill the timer and remove its reference */
- timer_delete_sync(&tp_vars->timer);
- /* the worker might have rearmed itself therefore we kill it again. Note
- * that if the worker should run again before invoking the following
- * timer_delete(), it would not re-arm itself once again because the status
- * is OFF now
- */
- timer_delete(&tp_vars->timer);
+ timer_shutdown_sync(&tp_vars->timer);
batadv_tp_vars_put(tp_vars);
}
@@ -402,11 +442,14 @@ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
struct batadv_tp_vars *tp_vars)
{
+ enum batadv_tp_meter_reason reason;
u32 session_cookie;
+ reason = atomic_read(&tp_vars->send_result);
+
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Test towards %pM finished..shutting down (reason=%d)\n",
- tp_vars->other_end, tp_vars->reason);
+ tp_vars->other_end, reason);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n",
@@ -419,7 +462,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
session_cookie = batadv_tp_session_cookie(tp_vars->session,
tp_vars->icmp_uid);
- batadv_tp_batctl_notify(tp_vars->reason,
+ batadv_tp_batctl_notify(reason,
tp_vars->other_end,
bat_priv,
tp_vars->start_time,
@@ -435,10 +478,18 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
enum batadv_tp_meter_reason reason)
{
- if (!atomic_dec_and_test(&tp_vars->sending))
- return;
+ atomic_cmpxchg(&tp_vars->send_result, 0, reason);
+}
- tp_vars->reason = reason;
+/**
+ * batadv_tp_sender_stopped() - check if tp session was stopped with reason
+ * @tp_vars: the private data of the current TP meter session
+ *
+ * Return: whether stop reason was found
+ */
+static bool batadv_tp_sender_stopped(struct batadv_tp_vars *tp_vars)
+{
+ return atomic_read(&tp_vars->send_result) != 0;
}
/**
@@ -468,7 +519,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
/* most of the time this function is invoked while normal packet
* reception...
*/
- if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ if (unlikely(batadv_tp_sender_stopped(tp_vars)))
/* timer ref will be dropped in batadv_tp_sender_cleanup */
return;
@@ -488,7 +539,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t)
struct batadv_tp_vars *tp_vars = timer_container_of(tp_vars, t, timer);
struct batadv_priv *bat_priv = tp_vars->bat_priv;
- if (atomic_read(&tp_vars->sending) == 0)
+ if (batadv_tp_sender_stopped(tp_vars))
return;
/* if the user waited long enough...shutdown the test */
@@ -643,11 +694,11 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv,
/* find the tp_vars */
tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
- icmp->session);
+ icmp->session, BATADV_TP_SENDER);
if (unlikely(!tp_vars))
return;
- if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ if (unlikely(batadv_tp_sender_stopped(tp_vars)))
goto out;
/* old ACK? silently drop it.. */
@@ -813,21 +864,21 @@ static int batadv_tp_send(void *arg)
if (unlikely(tp_vars->role != BATADV_TP_SENDER)) {
err = BATADV_TP_REASON_DST_UNREACHABLE;
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
goto out;
}
orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end);
if (unlikely(!orig_node)) {
err = BATADV_TP_REASON_DST_UNREACHABLE;
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
goto out;
}
primary_if = batadv_primary_if_get_selected(bat_priv);
if (unlikely(!primary_if)) {
err = BATADV_TP_REASON_DST_UNREACHABLE;
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
goto out;
}
@@ -846,7 +897,7 @@ static int batadv_tp_send(void *arg)
queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work,
msecs_to_jiffies(tp_vars->test_length));
- while (atomic_read(&tp_vars->sending) != 0) {
+ while (!batadv_tp_sender_stopped(tp_vars)) {
if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) {
batadv_tp_wait_available(tp_vars, payload_len);
continue;
@@ -869,8 +920,7 @@ static int batadv_tp_send(void *arg)
"Meter: %s() cannot send packets (%d)\n",
__func__, err);
/* ensure nobody else tries to stop the thread now */
- if (atomic_dec_and_test(&tp_vars->sending))
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
break;
}
@@ -886,7 +936,8 @@ out:
batadv_orig_node_put(orig_node);
batadv_tp_sender_end(bat_priv, tp_vars);
- batadv_tp_sender_cleanup(bat_priv, tp_vars);
+ batadv_tp_sender_cleanup(tp_vars);
+ complete(&tp_vars->finished);
batadv_tp_vars_put(tp_vars);
@@ -918,7 +969,8 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
batadv_tp_vars_put(tp_vars);
/* cleanup of failed tp meter variables */
- batadv_tp_sender_cleanup(bat_priv, tp_vars);
+ batadv_tp_sender_cleanup(tp_vars);
+ complete(&tp_vars->finished);
return;
}
@@ -947,10 +999,15 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
/* look for an already existing test towards this node */
spin_lock_bh(&bat_priv->tp_list_lock);
- tp_vars = batadv_tp_list_find(bat_priv, dst);
- if (tp_vars) {
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_DST_UNREACHABLE,
+ dst, bat_priv, session_cookie);
+ return;
+ }
+
+ if (batadv_tp_list_active(bat_priv, dst)) {
spin_unlock_bh(&bat_priv->tp_list_lock);
- batadv_tp_vars_put(tp_vars);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Meter: test to or from the same node already ongoing, aborting\n");
batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING,
@@ -969,6 +1026,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC);
if (!tp_vars) {
+ atomic_dec(&bat_priv->tp_num);
spin_unlock_bh(&bat_priv->tp_list_lock);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Meter: %s cannot allocate list elements\n",
@@ -982,7 +1040,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
ether_addr_copy(tp_vars->other_end, dst);
kref_init(&tp_vars->refcount);
tp_vars->role = BATADV_TP_SENDER;
- atomic_set(&tp_vars->sending, 1);
+ atomic_set(&tp_vars->send_result, 0);
memcpy(tp_vars->session, session_id, sizeof(session_id));
tp_vars->icmp_uid = icmp_uid;
@@ -1017,6 +1075,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
tp_vars->start_time = jiffies;
init_waitqueue_head(&tp_vars->more_bytes);
+ init_completion(&tp_vars->finished);
spin_lock_init(&tp_vars->unacked_lock);
INIT_LIST_HEAD(&tp_vars->unacked_list);
@@ -1069,16 +1128,16 @@ void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
if (!orig_node)
return;
- tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig);
+ tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig, BATADV_TP_SENDER);
if (!tp_vars) {
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Meter: trying to interrupt an already over connection\n");
- goto out;
+ goto out_put_orig_node;
}
batadv_tp_sender_shutdown(tp_vars, return_value);
batadv_tp_vars_put(tp_vars);
-out:
+out_put_orig_node:
batadv_orig_node_put(orig_node);
}
@@ -1119,14 +1178,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t)
"Shutting down for inactivity (more than %dms) from %pM\n",
BATADV_TP_RECV_TIMEOUT, tp_vars->other_end);
- spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
- hlist_del_rcu(&tp_vars->list);
- spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
-
- /* drop list reference */
- batadv_tp_vars_put(tp_vars);
-
- atomic_dec(&bat_priv->tp_num);
+ batadv_tp_list_detach(tp_vars);
spin_lock_bh(&tp_vars->unacked_lock);
list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
@@ -1136,6 +1188,9 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t)
spin_unlock_bh(&tp_vars->unacked_lock);
/* drop reference of timer */
+ if (WARN_ON(atomic_xchg(&tp_vars->receiving, 0) != 1))
+ return;
+
batadv_tp_vars_put(tp_vars);
}
@@ -1329,11 +1384,14 @@ static struct batadv_tp_vars *
batadv_tp_init_recv(struct batadv_priv *bat_priv,
const struct batadv_icmp_tp_packet *icmp)
{
- struct batadv_tp_vars *tp_vars;
+ struct batadv_tp_vars *tp_vars = NULL;
spin_lock_bh(&bat_priv->tp_list_lock);
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ goto out_unlock;
+
tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
- icmp->session);
+ icmp->session, BATADV_TP_RECEIVER);
if (tp_vars)
goto out_unlock;
@@ -1344,11 +1402,14 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv,
}
tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC);
- if (!tp_vars)
+ if (!tp_vars) {
+ atomic_dec(&bat_priv->tp_num);
goto out_unlock;
+ }
ether_addr_copy(tp_vars->other_end, icmp->orig);
tp_vars->role = BATADV_TP_RECEIVER;
+ atomic_set(&tp_vars->receiving, 1);
memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session));
tp_vars->last_recv = BATADV_TP_FIRST_SEQ;
tp_vars->bat_priv = bat_priv;
@@ -1401,7 +1462,7 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
}
} else {
tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
- icmp->session);
+ icmp->session, BATADV_TP_RECEIVER);
if (!tp_vars) {
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Unexpected packet from %pM!\n",
@@ -1410,13 +1471,6 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
}
}
- if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) {
- batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: dropping packet: not expected (role=%u)\n",
- tp_vars->role);
- goto out;
- }
-
tp_vars->last_recv_time = jiffies;
/* if the packet is a duplicate, it may be the case that an ACK has been
@@ -1464,6 +1518,9 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
{
struct batadv_icmp_tp_packet *icmp;
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ goto out;
+
icmp = (struct batadv_icmp_tp_packet *)skb->data;
switch (icmp->subtype) {
@@ -1478,10 +1535,62 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
"Received unknown TP Metric packet type %u\n",
icmp->subtype);
}
+
+out:
consume_skb(skb);
}
/**
+ * batadv_tp_stop_all() - stop all currently running tp meter sessions
+ * @bat_priv: the bat priv with all the mesh interface information
+ */
+void batadv_tp_stop_all(struct batadv_priv *bat_priv)
+{
+ struct batadv_tp_vars *tp_vars[BATADV_TP_MAX_NUM];
+ struct batadv_tp_vars *tp_var;
+ size_t count = 0;
+ size_t i;
+
+ spin_lock_bh(&bat_priv->tp_list_lock);
+ hlist_for_each_entry(tp_var, &bat_priv->tp_list, list) {
+ if (WARN_ON_ONCE(count >= BATADV_TP_MAX_NUM))
+ break;
+
+ if (!kref_get_unless_zero(&tp_var->refcount))
+ continue;
+
+ tp_vars[count++] = tp_var;
+ }
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+
+ for (i = 0; i < count; i++) {
+ tp_var = tp_vars[i];
+
+ switch (tp_var->role) {
+ case BATADV_TP_SENDER:
+ batadv_tp_sender_shutdown(tp_var,
+ BATADV_TP_REASON_CANCEL);
+ wake_up(&tp_var->more_bytes);
+ wait_for_completion(&tp_var->finished);
+ break;
+ case BATADV_TP_RECEIVER:
+ batadv_tp_list_detach(tp_var);
+ timer_shutdown_sync(&tp_var->timer);
+
+ if (atomic_xchg(&tp_var->receiving, 0) != 1)
+ break;
+
+ batadv_tp_vars_put(tp_var);
+ break;
+ }
+
+ batadv_tp_vars_put(tp_var);
+ }
+
+ synchronize_net();
+}
+
+/**
* batadv_tp_meter_init() - initialize global tp_meter structures
*/
void __init batadv_tp_meter_init(void)
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index f0046d366eac..4e97cd10cd02 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -17,6 +17,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
u32 test_length, u32 *cookie);
void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
u8 return_value);
+void batadv_tp_stop_all(struct batadv_priv *bat_priv);
void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb);
#endif /* _NET_BATMAN_ADV_TP_METER_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 05cddcf994f6..9f6e67771ffa 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -797,24 +797,33 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
s32 *tt_len)
{
u16 num_vlan = 0;
- u16 num_entries = 0;
u16 tvlv_len = 0;
unsigned int change_offset;
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_orig_node_vlan *vlan;
+ u16 total_entries = 0;
u8 *tt_change_ptr;
+ int vlan_entries;
+ u16 sum_entries;
spin_lock_bh(&orig_node->vlan_list_lock);
hlist_for_each_entry(vlan, &orig_node->vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+
+ if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) {
+ *tt_len = 0;
+ goto out;
+ }
+
+ total_entries = sum_entries;
num_vlan++;
- num_entries += atomic_read(&vlan->tt.num_entries);
}
change_offset = struct_size(*tt_data, vlan_data, num_vlan);
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
- *tt_len = batadv_tt_len(num_entries);
+ *tt_len = batadv_tt_len(total_entries);
if (change_offset > U16_MAX || *tt_len > U16_MAX - change_offset) {
*tt_len = 0;
@@ -835,14 +844,26 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
(*tt_data)->num_vlan = htons(num_vlan);
tt_vlan = (*tt_data)->vlan_data;
+ num_vlan = 0;
hlist_for_each_entry(vlan, &orig_node->vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+ if (vlan_entries < 1)
+ continue;
+
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
tt_vlan->reserved = 0;
tt_vlan++;
+ num_vlan++;
}
+ /* recalculate in case number of VLANs reduced */
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
+ tvlv_len = *tt_len + change_offset;
+
+ (*tt_data)->num_vlan = htons(num_vlan);
+
tt_change_ptr = (u8 *)*tt_data + change_offset;
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
@@ -877,21 +898,25 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
{
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_meshif_vlan *vlan;
+ size_t change_offset;
u16 num_vlan = 0;
- u16 vlan_entries = 0;
u16 total_entries = 0;
u16 tvlv_len;
u8 *tt_change_ptr;
- int change_offset;
+ int vlan_entries;
+ u16 sum_entries;
spin_lock_bh(&bat_priv->meshif_vlan_list_lock);
hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) {
vlan_entries = atomic_read(&vlan->tt.num_entries);
- if (vlan_entries < 1)
- continue;
+ if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) {
+ tvlv_len = 0;
+ goto out;
+ }
+
+ total_entries = sum_entries;
num_vlan++;
- total_entries += vlan_entries;
}
change_offset = struct_size(*tt_data, vlan_data, num_vlan);
@@ -900,8 +925,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
if (*tt_len < 0)
*tt_len = batadv_tt_len(total_entries);
- tvlv_len = *tt_len;
- tvlv_len += change_offset;
+ if (check_add_overflow(*tt_len, change_offset, &tvlv_len)) {
+ tvlv_len = 0;
+ goto out;
+ }
*tt_data = kmalloc(tvlv_len, GFP_ATOMIC);
if (!*tt_data) {
@@ -914,6 +941,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
(*tt_data)->num_vlan = htons(num_vlan);
tt_vlan = (*tt_data)->vlan_data;
+ num_vlan = 0;
hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) {
vlan_entries = atomic_read(&vlan->tt.num_entries);
if (vlan_entries < 1)
@@ -924,8 +952,15 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
tt_vlan->reserved = 0;
tt_vlan++;
+ num_vlan++;
}
+ /* recalculate in case number of VLANs reduced */
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
+ tvlv_len = *tt_len + change_offset;
+
+ (*tt_data)->num_vlan = htons(num_vlan);
+
tt_change_ptr = (u8 *)*tt_data + change_offset;
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 8129a3f9c44d..cc6ac580c620 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -8,10 +8,12 @@
#include <linux/byteorder/generic.h>
#include <linux/container_of.h>
+#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/kref.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -159,10 +161,10 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
*
* Return: size of all currently registered tvlv containers in bytes.
*/
-static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+static size_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
{
struct batadv_tvlv_container *tvlv;
- u16 tvlv_len = 0;
+ size_t tvlv_len = 0;
lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
@@ -306,26 +308,35 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
* The ogm packet might be enlarged or shrunk depending on the current size
* and the size of the to-be-appended tvlv containers.
*
- * Return: size of all appended tvlv containers in bytes.
+ * Return: size of all appended tvlv containers in bytes (max U16_MAX), negative
+ * if operation failed
*/
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
unsigned char **packet_buff,
int *packet_buff_len, int packet_min_len)
{
struct batadv_tvlv_container *tvlv;
struct batadv_tvlv_hdr *tvlv_hdr;
- u16 tvlv_value_len;
+ size_t tvlv_value_len;
void *tvlv_value;
+ int tvlv_len_ret;
bool ret;
spin_lock_bh(&bat_priv->tvlv.container_list_lock);
tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
+ if (tvlv_value_len > U16_MAX) {
+ tvlv_len_ret = -E2BIG;
+ goto end;
+ }
ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
packet_min_len, tvlv_value_len);
-
- if (!ret)
+ if (!ret) {
+ tvlv_len_ret = -ENOMEM;
goto end;
+ }
+
+ tvlv_len_ret = tvlv_value_len;
if (!tvlv_value_len)
goto end;
@@ -344,7 +355,8 @@ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
end:
spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
- return tvlv_value_len;
+
+ return tvlv_len_ret;
}
/**
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index e5697230d991..f96f6b3f44a0 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -16,7 +16,7 @@
void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
u8 type, u8 version,
void *tvlv_value, u16 tvlv_value_len);
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
unsigned char **packet_buff,
int *packet_buff_len, int packet_min_len);
void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8fc5fe0e9b05..a01ee46d97f3 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -14,6 +14,7 @@
#include <linux/average.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
+#include <linux/completion.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/kref.h>
@@ -82,6 +83,9 @@ struct batadv_hard_iface_bat_iv {
/** @ogm_seqno: OGM sequence number - used to identify each OGM */
atomic_t ogm_seqno;
+ /** @reschedule_work: recover OGM schedule after schedule error */
+ struct delayed_work reschedule_work;
+
/** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
struct mutex ogm_buff_mutex;
};
@@ -300,7 +304,7 @@ struct batadv_frag_table_entry {
u16 seqno;
/** @size: accumulated size of packets in list */
- u16 size;
+ size_t size;
/** @total_size: expected size of the assembled packet */
u16 total_size;
@@ -451,7 +455,7 @@ struct batadv_orig_node {
* @tt_buff_len: length of the last tt changeset this node received
* from the orig node
*/
- s16 tt_buff_len;
+ u16 tt_buff_len;
/** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */
spinlock_t tt_buff_lock;
@@ -992,7 +996,7 @@ struct batadv_priv_tt {
* @last_changeset_len: length of last tt changeset this host has
* generated
*/
- s16 last_changeset_len;
+ u16 last_changeset_len;
/**
* @last_changeset_lock: lock protecting last_changeset &
@@ -1023,6 +1027,12 @@ struct batadv_priv_bla {
atomic_t num_requests;
/**
+ * @num_requests_lock: locks update num_requests +
+ * batadv_backbone_gw::state + batadv_backbone_gw::wait_periods update
+ */
+ spinlock_t num_requests_lock;
+
+ /**
* @claim_hash: hash table containing mesh nodes this host has claimed
*/
struct batadv_hashtable *claim_hash;
@@ -1319,15 +1329,21 @@ struct batadv_tp_vars {
/** @role: receiver/sender modi */
enum batadv_tp_meter_role role;
- /** @sending: sending binary semaphore: 1 if sending, 0 is not */
- atomic_t sending;
+ /**
+ * @send_result: 0 when sending is ongoing and otherwise
+ * enum batadv_tp_meter_reason
+ */
+ atomic_t send_result;
- /** @reason: reason for a stopped session */
- enum batadv_tp_meter_reason reason;
+ /** @receiving: receiving binary semaphore: 1 if receiving, 0 is not */
+ atomic_t receiving;
/** @finish_work: work item for the finishing procedure */
struct delayed_work finish_work;
+ /** @finished: completion signaled when a sender thread exits */
+ struct completion finished;
+
/** @test_length: test length in milliseconds */
u32 test_length;
@@ -1662,6 +1678,27 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_BLA
+enum batadv_bla_backbone_gw_state {
+ /**
+ * @BATADV_BLA_BACKBONE_GW_STOPPED: backbone gw is being removed
+ * and it must not longer work on requests
+ */
+ BATADV_BLA_BACKBONE_GW_STOPPED,
+
+ /**
+ * @BATADV_BLA_BACKBONE_GW_UNSYNCED: backbone was detected out
+ * of sync and a request was send. No traffic is forwarded until the
+ * situation is resolved
+ */
+ BATADV_BLA_BACKBONE_GW_UNSYNCED,
+
+ /**
+ * @BATADV_BLA_BACKBONE_GW_SYNCED: backbone is consider to be in
+ * sync. traffic can be forwarded
+ */
+ BATADV_BLA_BACKBONE_GW_SYNCED,
+};
+
/**
* struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN
*/
@@ -1687,16 +1724,12 @@ struct batadv_bla_backbone_gw {
/**
* @wait_periods: grace time for bridge forward delays and bla group
* forming at bootup phase - no bcast traffic is formwared until it has
- * elapsed
+ * elapsed. Must only be access with num_requests_lock.
*/
- atomic_t wait_periods;
+ u8 wait_periods;
- /**
- * @request_sent: if this bool is set to true we are out of sync with
- * this backbone gateway - no bcast traffic is formwared until the
- * situation was resolved
- */
- atomic_t request_sent;
+ /** @state: sync state. Must only be access with num_requests_lock. */
+ enum batadv_bla_backbone_gw_state state;
/** @crc: crc16 checksum over all claims */
u16 crc;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 33d053d63407..1a6aa3f8d4d6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -154,6 +154,7 @@ struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+ spin_lock_init(&bt_sk(sk)->accept_q_lock);
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -214,6 +215,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
const struct cred *old_cred;
struct pid *old_pid;
+ struct bt_sock *par = bt_sk(parent);
BT_DBG("parent %p, sk %p", parent, sk);
@@ -224,9 +226,13 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
else
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ spin_lock_bh(&par->accept_q_lock);
+ list_add_tail(&bt_sk(sk)->accept_q, &par->accept_q);
+ sk_acceptq_added(parent);
+ spin_unlock_bh(&par->accept_q_lock);
+
/* Copy credentials from parent since for incoming connections the
* socket is allocated by the kernel.
*/
@@ -244,8 +250,6 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
bh_unlock_sock(sk);
else
release_sock(sk);
-
- sk_acceptq_added(parent);
}
EXPORT_SYMBOL(bt_accept_enqueue);
@@ -254,45 +258,72 @@ EXPORT_SYMBOL(bt_accept_enqueue);
*/
void bt_accept_unlink(struct sock *sk)
{
+ struct sock *parent = bt_sk(sk)->parent;
+
BT_DBG("sk %p state %d", sk, sk->sk_state);
+ spin_lock_bh(&bt_sk(parent)->accept_q_lock);
list_del_init(&bt_sk(sk)->accept_q);
- sk_acceptq_removed(bt_sk(sk)->parent);
+ sk_acceptq_removed(parent);
+ spin_unlock_bh(&bt_sk(parent)->accept_q_lock);
bt_sk(sk)->parent = NULL;
sock_put(sk);
}
EXPORT_SYMBOL(bt_accept_unlink);
+static struct sock *bt_accept_get(struct sock *parent, struct sock *sk)
+{
+ struct bt_sock *bt = bt_sk(parent);
+ struct sock *next = NULL;
+
+ /* accept_q is modified from child teardown paths too, so take a
+ * temporary reference before dropping the queue lock.
+ */
+ spin_lock_bh(&bt->accept_q_lock);
+
+ if (sk) {
+ if (bt_sk(sk)->parent != parent)
+ goto out;
+
+ if (!list_is_last(&bt_sk(sk)->accept_q, &bt->accept_q)) {
+ next = &list_next_entry(bt_sk(sk), accept_q)->sk;
+ sock_hold(next);
+ }
+ } else if (!list_empty(&bt->accept_q)) {
+ next = &list_first_entry(&bt->accept_q,
+ struct bt_sock, accept_q)->sk;
+ sock_hold(next);
+ }
+
+out:
+ spin_unlock_bh(&bt->accept_q_lock);
+ return next;
+}
+
struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
{
- struct bt_sock *s, *n;
- struct sock *sk;
+ struct sock *sk, *next;
BT_DBG("parent %p", parent);
restart:
- list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)s;
-
+ for (sk = bt_accept_get(parent, NULL); sk; sk = next) {
/* Prevent early freeing of sk due to unlink and sock_kill */
- sock_hold(sk);
lock_sock(sk);
/* Check sk has not already been unlinked via
* bt_accept_unlink() due to serialisation caused by sk locking
*/
- if (!bt_sk(sk)->parent) {
+ if (bt_sk(sk)->parent != parent) {
BT_DBG("sk %p, already unlinked", sk);
release_sock(sk);
sock_put(sk);
- /* Restart the loop as sk is no longer in the list
- * and also avoid a potential infinite loop because
- * list_for_each_entry_safe() is not thread safe.
- */
goto restart;
}
+ next = bt_accept_get(parent, sk);
+
/* sk is safely in the parent list so reduce reference count */
sock_put(sk);
@@ -309,7 +340,19 @@ restart:
if (newsock)
sock_graft(sk, newsock);
+ /* Hand the caller a reference taken while sk is
+ * still locked. bt_accept_unlink() just dropped
+ * the accept-queue reference; without this hold a
+ * concurrent teardown (e.g. l2cap_conn_del() ->
+ * l2cap_sock_kill()) could free sk between
+ * release_sock() and the caller using it. Every
+ * caller drops this with sock_put() when done.
+ */
+ sock_hold(sk);
+
release_sock(sk);
+ if (next)
+ sock_put(next);
return sk;
}
@@ -518,18 +561,28 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg);
static inline __poll_t bt_accept_poll(struct sock *parent)
{
- struct bt_sock *s, *n;
+ struct bt_sock *bt = bt_sk(parent);
+ struct bt_sock *s;
struct sock *sk;
+ __poll_t mask = 0;
+
+ spin_lock_bh(&bt->accept_q_lock);
+ list_for_each_entry(s, &bt->accept_q, accept_q) {
+ int state;
- list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
sk = (struct sock *)s;
- if (sk->sk_state == BT_CONNECTED ||
- (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) &&
- sk->sk_state == BT_CONNECT2))
- return EPOLLIN | EPOLLRDNORM;
+ state = READ_ONCE(sk->sk_state);
+
+ if (state == BT_CONNECTED ||
+ (test_bit(BT_SK_DEFER_SETUP, &bt->flags) &&
+ state == BT_CONNECT2)) {
+ mask = EPOLLIN | EPOLLRDNORM;
+ break;
+ }
}
+ spin_unlock_bh(&bt->accept_q_lock);
- return 0;
+ return mask;
}
__poll_t bt_sock_poll(struct file *file, struct socket *sock,
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 853c8d7644b5..0de5df690bd0 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -645,8 +645,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
goto failed;
}
- up_write(&bnep_session_sem);
strcpy(req->device, dev->name);
+ up_write(&bnep_session_sem);
return 0;
failed:
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index fd3aacdea512..aff8562a8690 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4438,6 +4438,9 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
events[4] |= 0x02; /* LE BIG Info Advertising Report */
}
+ if (ll_ext_feature_capable(hdev))
+ events[5] |= BIT(2);
+
if (le_cs_capable(hdev)) {
/* Channel Sounding events */
events[5] |= 0x08; /* LE CS Read Remote Supported Cap Complete event */
@@ -7413,9 +7416,6 @@ static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev,
sizeof(cp), &cp,
HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE,
HCI_CMD_TIMEOUT, NULL);
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_ALL_REMOTE_FEATURES,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
static int hci_le_read_remote_features_sync(struct hci_dev *hdev, void *data)
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 7cb2864fe872..d7af617cda45 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -751,6 +751,8 @@ static void iso_sock_cleanup_listen(struct sock *parent)
while ((sk = bt_accept_dequeue(parent, NULL))) {
iso_sock_close(sk);
iso_sock_kill(sk);
+ /* Drop the reference handed back by bt_accept_dequeue(). */
+ sock_put(sk);
}
/* If listening socket has a hcon, properly disconnect it */
@@ -1356,8 +1358,13 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock,
}
ch = bt_accept_dequeue(sk, newsock);
- if (ch)
+ if (ch) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps ch alive from here.
+ */
+ sock_put(ch);
break;
+ }
if (!timeo) {
err = -EAGAIN;
@@ -2593,6 +2600,11 @@ int iso_recv(struct hci_dev *hdev, u16 handle, struct sk_buff *skb, u16 flags)
break;
case ISO_END:
+ if (!conn->rx_len) {
+ BT_ERR("Unexpected end frame (len %d)", skb->len);
+ goto drop;
+ }
+
skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
skb->len);
conn->rx_len -= skb->len;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7701528f1167..fdccd62ccca8 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7274,7 +7274,7 @@ static void l2cap_ecred_reconfigure(struct l2cap_chan *chan)
chan->ident = l2cap_get_ident(conn);
l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ,
- sizeof(pdu), &pdu);
+ struct_size(pdu, scid, 1), pdu);
}
int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index cf590a67d364..b34e7da8d906 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -349,8 +349,13 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
}
nsk = bt_accept_dequeue(sk, newsock);
- if (nsk)
+ if (nsk) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps nsk alive from here.
+ */
+ sock_put(nsk);
break;
+ }
if (!timeo) {
err = -EAGAIN;
@@ -1475,22 +1480,54 @@ static void l2cap_sock_cleanup_listen(struct sock *parent)
BT_DBG("parent %p state %s", parent,
state_to_string(parent->sk_state));
- /* Close not yet accepted channels */
+ /* Close not yet accepted channels.
+ *
+ * bt_accept_dequeue() now returns sk with an extra reference held
+ * (taken while sk was still locked) so a concurrent l2cap_conn_del()
+ * -> l2cap_sock_kill() cannot free sk under us.
+ *
+ * cleanup_listen() runs under the parent sk lock, so unlike
+ * l2cap_sock_shutdown() we must NOT take conn->lock here: that would
+ * establish sk_lock -> conn->lock and invert the established
+ * conn->lock -> chan->lock -> sk_lock order (lockdep deadlock).
+ *
+ * Instead, briefly take the child sk lock to fetch and pin its chan.
+ * l2cap_conn_del() reaches the chan free only via
+ * l2cap_chan_del() -> l2cap_sock_teardown_cb(), which itself takes
+ * the child sk lock; holding it across l2cap_chan_hold_unless_zero()
+ * therefore guarantees the chan cannot be freed while we read and
+ * pin it (hold_unless_zero() additionally skips a chan already past
+ * its last reference). We then drop the sk lock before taking
+ * chan->lock, so sk and chan locks are never held together.
+ */
while ((sk = bt_accept_dequeue(parent, NULL))) {
- struct l2cap_chan *chan = l2cap_pi(sk)->chan;
+ struct l2cap_chan *chan;
+
+ lock_sock_nested(sk, L2CAP_NESTING_NORMAL);
+ chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan);
+ release_sock(sk);
+ if (!chan) {
+ /* l2cap_conn_del() already tearing this child down */
+ sock_put(sk);
+ continue;
+ }
BT_DBG("child chan %p state %s", chan,
state_to_string(chan->state));
- l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
-
__clear_chan_timer(chan);
l2cap_chan_close(chan, ECONNRESET);
- l2cap_sock_kill(sk);
-
+ /* l2cap_conn_del() may already have killed this socket
+ * (it sets SOCK_DEAD); skip the duplicate to avoid a
+ * double sock_put()/l2cap_chan_put().
+ */
+ if (!sock_flag(sk, SOCK_DEAD))
+ l2cap_sock_kill(sk);
l2cap_chan_unlock(chan);
+
l2cap_chan_put(chan);
+ sock_put(sk);
}
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index b05bb380e5f8..de5bd6b637b2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -9110,9 +9110,15 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data,
struct adv_info *adv_instance;
int err = 0;
struct mgmt_pending_cmd *cmd;
+ u16 expected_len;
BT_DBG("%s", hdev->name);
+ expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len);
+ if (expected_len != data_len)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA,
+ MGMT_STATUS_INVALID_PARAMS);
+
hci_dev_lock(hdev);
adv_instance = hci_find_adv_instance(hdev, cp->instance);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index be6639cd6f59..bd7d959c6e9e 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -180,6 +180,8 @@ static void rfcomm_sock_cleanup_listen(struct sock *parent)
while ((sk = bt_accept_dequeue(parent, NULL))) {
rfcomm_sock_close(sk);
rfcomm_sock_kill(sk);
+ /* Drop the reference handed back by bt_accept_dequeue(). */
+ sock_put(sk);
}
parent->sk_state = BT_CLOSED;
@@ -497,8 +499,13 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock,
}
nsk = bt_accept_dequeue(sk, newsock);
- if (nsk)
+ if (nsk) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps nsk alive from here.
+ */
+ sock_put(nsk);
break;
+ }
if (!timeo) {
err = -EAGAIN;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index eba44525d41d..f1799c6a6f87 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -502,6 +502,8 @@ static void sco_sock_cleanup_listen(struct sock *parent)
while ((sk = bt_accept_dequeue(parent, NULL))) {
sco_sock_close(sk);
sco_sock_kill(sk);
+ /* Drop the reference handed back by bt_accept_dequeue(). */
+ sock_put(sk);
}
parent->sk_state = BT_CLOSED;
@@ -765,8 +767,13 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock,
}
ch = bt_accept_dequeue(sk, newsock);
- if (ch)
+ if (ch) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps ch alive from here.
+ */
+ sock_put(ch);
break;
+ }
if (!timeo) {
err = -EAGAIN;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 881d866d687a..2eef4f3345cd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -4640,10 +4640,24 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
rcu_read_unlock();
}
-static void br_multicast_del_grps(struct net_bridge *br)
+static void br_multicast_enable_all_ports(struct net_bridge *br)
{
struct net_bridge_port *port;
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return;
+
+ list_for_each_entry(port, &br->port_list, list)
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
+}
+
+static void br_multicast_disable_all_ports(struct net_bridge *br)
+{
+ struct net_bridge_port *port;
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return;
+
list_for_each_entry(port, &br->port_list, list)
__br_multicast_disable_port_ctx(&port->multicast_ctx);
}
@@ -4651,7 +4665,6 @@ static void br_multicast_del_grps(struct net_bridge *br)
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- struct net_bridge_port *port;
bool change_snoopers = false;
int err = 0;
@@ -4668,7 +4681,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
change_snoopers = true;
- br_multicast_del_grps(br);
+ br_multicast_disable_all_ports(br);
goto unlock;
}
@@ -4676,8 +4689,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
goto unlock;
br_multicast_open(br);
- list_for_each_entry(port, &br->port_list, list)
- __br_multicast_enable_port_ctx(&port->multicast_ctx);
+ br_multicast_enable_all_ports(br);
change_snoopers = true;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 0ab1c94db4b9..0a394e5f4391 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -297,7 +297,11 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
goto free_skb;
}
- neigh_hh_bridge(&neigh->hh, skb);
+ if (neigh_hh_bridge(&neigh->hh, skb)) {
+ neigh_release(neigh);
+ goto free_skb;
+ }
+
skb->dev = br_indev;
ret = br_handle_frame_finish(net, sk, skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6fd5386a1d64..c04a4d0889ae 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1824,6 +1824,7 @@ static int br_fill_linkxstats(struct sk_buff *skb,
const struct net_device *dev,
int *prividx, int attr)
{
+ unsigned int limit = U16_MAX - nla_total_size(0);
struct nlattr *nla __maybe_unused;
struct net_bridge_port *p = NULL;
struct net_bridge_vlan_group *vg;
@@ -1841,6 +1842,7 @@ static int br_fill_linkxstats(struct sk_buff *skb,
p = br_port_get_rtnl(dev);
if (!p)
return 0;
+ limit -= nla_total_size_64bit(sizeof(p->stp_xstats));
br = p->br;
vg = nbp_vlan_group(p);
break;
@@ -1855,6 +1857,9 @@ static int br_fill_linkxstats(struct sk_buff *skb,
if (vg) {
u16 pvid;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ limit -= nla_total_size_64bit(sizeof(struct br_mcast_stats));
+#endif
pvid = br_get_pvid(vg);
list_for_each_entry(v, &vg->vlan_list, vlist) {
struct bridge_vlan_xstats vxi;
@@ -1862,6 +1867,11 @@ static int br_fill_linkxstats(struct sk_buff *skb,
if (++vl_idx < *prividx)
continue;
+
+ if (skb_tail_pointer(skb) - (unsigned char *)nest +
+ nla_total_size(sizeof(vxi)) >= limit)
+ goto nla_put_failure;
+
memset(&vxi, 0, sizeof(vxi));
vxi.vid = v->vid;
vxi.flags = v->flags;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 741360219552..f05c79f215ea 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -112,24 +112,22 @@ static struct pernet_operations broute_net_ops = {
static int __init ebtable_broute_init(void)
{
- int ret = ebt_register_template(&broute_table, broute_table_init);
+ int ret = register_pernet_subsys(&broute_net_ops);
if (ret)
return ret;
- ret = register_pernet_subsys(&broute_net_ops);
- if (ret) {
- ebt_unregister_template(&broute_table);
- return ret;
- }
+ ret = ebt_register_template(&broute_table, broute_table_init);
+ if (ret)
+ unregister_pernet_subsys(&broute_net_ops);
- return 0;
+ return ret;
}
static void __exit ebtable_broute_fini(void)
{
- unregister_pernet_subsys(&broute_net_ops);
ebt_unregister_template(&broute_table);
+ unregister_pernet_subsys(&broute_net_ops);
}
module_init(ebtable_broute_init);
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index dacd81b12e62..0fc03b07e62a 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -93,24 +93,22 @@ static struct pernet_operations frame_filter_net_ops = {
static int __init ebtable_filter_init(void)
{
- int ret = ebt_register_template(&frame_filter, frame_filter_table_init);
+ int ret = register_pernet_subsys(&frame_filter_net_ops);
if (ret)
return ret;
- ret = register_pernet_subsys(&frame_filter_net_ops);
- if (ret) {
- ebt_unregister_template(&frame_filter);
- return ret;
- }
+ ret = ebt_register_template(&frame_filter, frame_filter_table_init);
+ if (ret)
+ unregister_pernet_subsys(&frame_filter_net_ops);
- return 0;
+ return ret;
}
static void __exit ebtable_filter_fini(void)
{
- unregister_pernet_subsys(&frame_filter_net_ops);
ebt_unregister_template(&frame_filter);
+ unregister_pernet_subsys(&frame_filter_net_ops);
}
module_init(ebtable_filter_init);
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 0f2a8c6118d4..8a10375d8909 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -93,24 +93,22 @@ static struct pernet_operations frame_nat_net_ops = {
static int __init ebtable_nat_init(void)
{
- int ret = ebt_register_template(&frame_nat, frame_nat_table_init);
+ int ret = register_pernet_subsys(&frame_nat_net_ops);
if (ret)
return ret;
- ret = register_pernet_subsys(&frame_nat_net_ops);
- if (ret) {
- ebt_unregister_template(&frame_nat);
- return ret;
- }
+ ret = ebt_register_template(&frame_nat, frame_nat_table_init);
+ if (ret)
+ unregister_pernet_subsys(&frame_nat_net_ops);
return ret;
}
static void __exit ebtable_nat_fini(void)
{
- unregister_pernet_subsys(&frame_nat_net_ops);
ebt_unregister_template(&frame_nat);
+ unregister_pernet_subsys(&frame_nat_net_ops);
}
module_init(ebtable_nat_init);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index aea3e19875c6..b9f4daac09af 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -42,6 +42,7 @@
struct ebt_pernet {
struct list_head tables;
+ struct list_head dead_tables;
};
struct ebt_template {
@@ -1162,11 +1163,6 @@ free_newinfo:
static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
{
- mutex_lock(&ebt_mutex);
- list_del(&table->list);
- mutex_unlock(&ebt_mutex);
- audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries,
- AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
ebt_cleanup_entry, net, NULL);
if (table->private->nentries)
@@ -1267,13 +1263,15 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
for (i = 0; i < num_ops; i++)
ops[i].priv = table;
- list_add(&table->list, &ebt_net->tables);
- mutex_unlock(&ebt_mutex);
-
table->ops = ops;
ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret)
+ if (ret) {
+ synchronize_rcu();
__ebt_unregister_table(net, table);
+ } else {
+ list_add(&table->list, &ebt_net->tables);
+ }
+ mutex_unlock(&ebt_mutex);
audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
AUDIT_XT_OP_REGISTER, GFP_KERNEL);
@@ -1339,7 +1337,7 @@ void ebt_unregister_template(const struct ebt_table *t)
}
EXPORT_SYMBOL(ebt_unregister_template);
-static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
+void ebt_unregister_table_pre_exit(struct net *net, const char *name)
{
struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
struct ebt_table *t;
@@ -1348,30 +1346,36 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
list_for_each_entry(t, &ebt_net->tables, list) {
if (strcmp(t->name, name) == 0) {
+ list_move(&t->list, &ebt_net->dead_tables);
mutex_unlock(&ebt_mutex);
- return t;
+ nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks));
+ return;
}
}
mutex_unlock(&ebt_mutex);
- return NULL;
-}
-
-void ebt_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct ebt_table *table = __ebt_find_table(net, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
EXPORT_SYMBOL(ebt_unregister_table_pre_exit);
void ebt_unregister_table(struct net *net, const char *name)
{
- struct ebt_table *table = __ebt_find_table(net, name);
+ struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+ struct ebt_table *t;
- if (table)
- __ebt_unregister_table(net, table);
+ mutex_lock(&ebt_mutex);
+
+ list_for_each_entry(t, &ebt_net->dead_tables, list) {
+ if (strcmp(t->name, name) == 0) {
+ list_del(&t->list);
+ audit_log_nfcfg(t->name, AF_BRIDGE, t->private->nentries,
+ AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
+ __ebt_unregister_table(net, t);
+ mutex_unlock(&ebt_mutex);
+ return;
+ }
+ }
+
+ mutex_unlock(&ebt_mutex);
}
/* userspace just supplied us with counters */
@@ -2556,11 +2560,21 @@ static int __net_init ebt_pernet_init(struct net *net)
struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
INIT_LIST_HEAD(&ebt_net->tables);
+ INIT_LIST_HEAD(&ebt_net->dead_tables);
return 0;
}
+static void __net_exit ebt_pernet_exit(struct net *net)
+{
+ struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+
+ WARN_ON_ONCE(!list_empty(&ebt_net->tables));
+ WARN_ON_ONCE(!list_empty(&ebt_net->dead_tables));
+}
+
static struct pernet_operations ebt_net_ops = {
.init = ebt_pernet_init,
+ .exit = ebt_pernet_exit,
.id = &ebt_pernet_id,
.size = sizeof(struct ebt_pernet),
};
@@ -2569,19 +2583,20 @@ static int __init ebtables_init(void)
{
int ret;
- ret = xt_register_target(&ebt_standard_target);
+ ret = register_pernet_subsys(&ebt_net_ops);
if (ret < 0)
return ret;
- ret = nf_register_sockopt(&ebt_sockopts);
+
+ ret = xt_register_target(&ebt_standard_target);
if (ret < 0) {
- xt_unregister_target(&ebt_standard_target);
+ unregister_pernet_subsys(&ebt_net_ops);
return ret;
}
- ret = register_pernet_subsys(&ebt_net_ops);
+ ret = nf_register_sockopt(&ebt_sockopts);
if (ret < 0) {
- nf_unregister_sockopt(&ebt_sockopts);
xt_unregister_target(&ebt_standard_target);
+ unregister_pernet_subsys(&ebt_net_ops);
return ret;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 692e0b868822..9e64e82d0b63 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -115,6 +115,11 @@ static int __ceph_x_decrypt(const struct ceph_crypto_key *key, int usage_slot,
if (ret)
return ret;
+ if (plaintext_len < sizeof(*hdr)) {
+ pr_err("%s plaintext too small %d\n", __func__, plaintext_len);
+ return -EINVAL;
+ }
+
hdr = p + ceph_crypt_data_offset(key);
if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
pr_err("%s bad magic\n", __func__);
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 254ded0b05f6..521aec1d5fc0 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -47,7 +47,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
{
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket_list(struct crush_bucket_list *b)
@@ -55,14 +54,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
kfree(b->item_weights);
kfree(b->sum_weights);
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
{
kfree(b->h.items);
kfree(b->node_weights);
- kfree(b);
}
void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
@@ -70,14 +67,12 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
kfree(b->straws);
kfree(b->item_weights);
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
{
kfree(b->item_weights);
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket(struct crush_bucket *b)
@@ -99,6 +94,7 @@ void crush_destroy_bucket(struct crush_bucket *b)
crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
break;
}
+ kfree(b);
}
/**
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index c89e66d4fcb7..8b5b0587a0cf 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -72,8 +72,7 @@ static int crush_decode_uniform_bucket(void **p, void *end,
struct crush_bucket_uniform *b)
{
dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
- ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
- b->item_weight = ceph_decode_32(p);
+ ceph_decode_32_safe(p, end, b->item_weight, bad);
return 0;
bad:
return -EINVAL;
@@ -389,11 +388,15 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
goto fail;
if (arg->ids_size &&
- arg->ids_size != c->buckets[bucket_index]->size)
+ (!c->buckets[bucket_index] ||
+ arg->ids_size != c->buckets[bucket_index]->size))
goto e_inval;
}
- insert_choose_arg_map(&c->choose_args, arg_map);
+ if (!__insert_choose_arg_map(&c->choose_args, arg_map)) {
+ ret = -EEXIST;
+ goto fail;
+ }
}
return 0;
@@ -516,6 +519,10 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
b->id = ceph_decode_32(p);
b->type = ceph_decode_16(p);
b->alg = ceph_decode_8(p);
+ if (b->alg != alg) {
+ b->alg = 0;
+ goto bad;
+ }
b->hash = ceph_decode_8(p);
b->weight = ceph_decode_32(p);
b->size = ceph_decode_32(p);
@@ -1702,7 +1709,7 @@ static int osdmap_decode(void **p, void *end, bool msgr2,
ceph_decode_need(p, end, 3*sizeof(u32) +
map->max_osd*(struct_v >= 5 ? sizeof(u32) :
sizeof(u8)) +
- sizeof(*map->osd_weight), e_inval);
+ map->max_osd*sizeof(*map->osd_weight), e_inval);
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 14eb7812bda4..ecd659f79fd4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -172,7 +172,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
struct bpf_map *map;
smap = rcu_dereference(SDATA(selem)->smap);
- if (!(smap->map.map_flags & BPF_F_CLONE))
+ if (!smap || !(smap->map.map_flags & BPF_F_CLONE))
continue;
/* Note that for lockless listeners adding new element
@@ -531,10 +531,10 @@ err_free:
}
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
-static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
+static int diag_get(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_data *sdata, struct sk_buff *skb)
{
struct nlattr *nla_stg, *nla_value;
- struct bpf_local_storage_map *smap;
/* It cannot exceed max nlattr's payload */
BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
@@ -543,7 +543,6 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
if (!nla_stg)
return -EMSGSIZE;
- smap = rcu_dereference(sdata->smap);
if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
goto errout;
@@ -558,6 +557,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
sdata->data, true);
else
copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
+ check_and_init_map_value(&smap->map, nla_data(nla_value));
nla_nest_end(skb, nla_stg);
return 0;
@@ -596,9 +596,11 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
saved_len = skb->len;
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
smap = rcu_dereference(SDATA(selem)->smap);
+ if (!smap)
+ continue;
diag_size += nla_value_size(smap->map.value_size);
- if (nla_stgs && diag_get(SDATA(selem), skb))
+ if (nla_stgs && diag_get(smap, SDATA(selem), skb))
/* Continue to learn diag_size */
err = -EMSGSIZE;
}
@@ -665,7 +667,7 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
diag_size += nla_value_size(diag->maps[i]->value_size);
- if (nla_stgs && diag_get(sdata, skb))
+ if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb))
/* Continue to learn diag_size */
err = -EMSGSIZE;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 8bfa8313ef62..0c6c270d9f7d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6862,9 +6862,9 @@ static void skb_defer_free_flush(void)
#if defined(CONFIG_NET_RX_BUSY_POLL)
-static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
+static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout)
{
- if (!skip_schedule) {
+ if (!timeout) {
gro_normal_list(&napi->gro);
__napi_schedule(napi);
return;
@@ -6874,6 +6874,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
gro_flush_normal(&napi->gro, HZ >= 1000);
clear_bit(NAPI_STATE_SCHED, &napi->state);
+ hrtimer_start(&napi->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
}
enum {
@@ -6885,8 +6887,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
unsigned flags, u16 budget)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
- bool skip_schedule = false;
- unsigned long timeout;
+ unsigned long timeout = 0;
int rc;
/* Busy polling means there is a high chance device driver hard irq
@@ -6906,10 +6907,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
- timeout = napi_get_gro_flush_timeout(napi);
- if (napi->defer_hard_irqs_count && timeout) {
- hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
- skip_schedule = true;
+ if (napi->defer_hard_irqs_count) {
+ /* A short enough gro flush timeout and long enough
+ * poll can result in timer firing too early.
+ * Timer will be armed later if necessary.
+ */
+ timeout = napi_get_gro_flush_timeout(napi);
}
}
@@ -6924,7 +6927,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
trace_napi_poll(napi, rc, budget);
netpoll_poll_unlock(have_poll_lock);
if (rc == budget)
- __busy_poll_stop(napi, skip_schedule);
+ __busy_poll_stop(napi, timeout);
bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
}
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 468344739db2..4f71de44c0fb 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -241,6 +241,11 @@ net_devmem_bind_dmabuf(struct net_device *dev,
}
if (direction == DMA_TO_DEVICE) {
+ if (!IS_ALIGNED(dmabuf->size, PAGE_SIZE)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "TX dma-buf size must be a multiple of PAGE_SIZE");
+ goto err_unmap;
+ }
binding->tx_vec = kvmalloc_objs(struct net_iov *,
dmabuf->size / PAGE_SIZE);
if (!binding->tx_vec) {
@@ -267,6 +272,12 @@ net_devmem_bind_dmabuf(struct net_device *dev,
size_t len = sg_dma_len(sg);
struct net_iov *niov;
+ if (!IS_ALIGNED(len, PAGE_SIZE)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "dma-buf SG length must be PAGE_SIZE aligned");
+ goto err_free_chunks;
+ }
+
owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
dev_to_node(&dev->dev));
if (!owner) {
diff --git a/net/core/failover.c b/net/core/failover.c
index 11bb183c7a1b..e43c59cd6868 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -12,6 +12,7 @@
#include <uapi/linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <net/netdev_lock.h>
#include <net/failover.h>
static LIST_HEAD(failover_list);
@@ -221,8 +222,11 @@ failover_existing_slave_register(struct net_device *failover_dev)
for_each_netdev(net, dev) {
if (netif_is_failover(dev))
continue;
- if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
+ if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) {
+ netdev_lock_ops(dev);
failover_slave_register(dev);
+ netdev_unlock_ops(dev);
+ }
}
rtnl_unlock();
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 80a3b702a2d4..9590877b0714 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1654,15 +1654,24 @@ err_prog_put:
return err;
}
+static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu)
+{
+ struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
+ struct bpf_prog *prog = aux->prog;
+
+ bpf_release_orig_filter(prog);
+ bpf_prog_free(prog);
+}
+
void sk_reuseport_prog_free(struct bpf_prog *prog)
{
if (!prog)
return;
- if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
- bpf_prog_put(prog);
+ if (bpf_prog_was_classic(prog))
+ call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu);
else
- bpf_prog_destroy(prog);
+ bpf_prog_put(prog);
}
static inline int __bpf_try_make_writable(struct sk_buff *skb,
@@ -5481,7 +5490,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
char *optval, int *optlen,
bool getopt)
{
- if (sk->sk_protocol != IPPROTO_TCP)
+ if (!sk_is_tcp(sk))
return -EINVAL;
switch (optname) {
@@ -5688,6 +5697,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ /*
+ * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters
+ * CA_EVENT_TX_START in bpf_tcp_cc.
+ */
+ if (level == SOL_TCP && optname == TCP_NODELAY)
+ return -EOPNOTSUPP;
+
+ return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = {
+ .func = bpf_sk_setsockopt_nodelay,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
int, optname, char *, optval, int, optlen)
{
@@ -5833,6 +5866,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
if (!is_locked_tcp_sock_ops(bpf_sock))
return -EOPNOTSUPP;
+ /* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */
+ if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB ||
+ bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) &&
+ level == SOL_TCP && optname == TCP_NODELAY)
+ return -EOPNOTSUPP;
+
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}
@@ -6443,6 +6482,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
* against MTU of FIB lookup resulting net_device
*/
dev = dev_get_by_index_rcu(net, params->ifindex);
+ if (unlikely(!dev))
+ return -ENODEV;
if (!is_skb_forwardable(dev, skb))
rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
@@ -7443,7 +7484,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
- if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ if (sk_fullsock(sk) && sk_is_tcp(sk))
return (unsigned long)sk;
return (unsigned long)NULL;
@@ -11915,7 +11956,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
*/
BTF_TYPE_EMIT(struct tcp6_sock);
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_family == AF_INET6)
+ sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6)
return (unsigned long)sk;
return (unsigned long)NULL;
@@ -11931,7 +11972,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
{
- if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ if (sk && sk_fullsock(sk) && sk_is_tcp(sk))
return (unsigned long)sk;
return (unsigned long)NULL;
diff --git a/net/core/gro.c b/net/core/gro.c
index 31d21de5b15a..a84753983467 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -109,6 +109,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (p->pp_recycle != skb->pp_recycle)
return -ETOOMANYREFS;
+ if (skb_zcopy(p) || skb_zcopy(skb))
+ return -ETOOMANYREFS;
+
if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) ||
NAPI_GRO_CB(skb)->flush))
return -E2BIG;
@@ -213,10 +216,12 @@ done:
p->data_len += len;
p->truesize += delta_truesize;
p->len += len;
+ skb_shinfo(p)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG;
if (lp != p) {
lp->data_len += len;
lp->truesize += delta_truesize;
lp->len += len;
+ skb_shinfo(lp)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG;
}
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
@@ -244,6 +249,8 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
p->truesize += skb->truesize;
p->len += skb->len;
+ skb_shinfo(p)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 84faace50ac2..3f4a17fa5713 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -319,6 +319,8 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
lockdep_assert_irqs_disabled();
dev = np->dev;
+ /* npinfo->txq belongs to np->dev, so retries must stay bound to it. */
+ skb->dev = dev;
rcu_read_lock();
npinfo = rcu_dereference_bh(dev->npinfo);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index df042da422ef..511c25bf6f2a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -6328,8 +6328,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
0, &filters, &idxattr, &prividx, extack);
if (err < 0) {
- /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
- WARN_ON(err == -EMSGSIZE);
+ /* -EMSGSIZE implies BUG in if_nlmsg_stats_size
+ * or a too big nested attribute.
+ */
kfree_skb(nskb);
} else {
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7dad68e3b518..44ac121cfccb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2248,6 +2248,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
skb_frag_ref(skb, i);
}
skb_shinfo(n)->nr_frags = i;
+ skb_shinfo(n)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
}
if (skb_has_frag_list(skb)) {
@@ -4349,6 +4350,8 @@ onlymerged:
tgt->ip_summed = CHECKSUM_PARTIAL;
skb->ip_summed = CHECKSUM_PARTIAL;
+ skb_shinfo(tgt)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+
skb_len_add(skb, -shiftlen);
skb_len_add(tgt, shiftlen);
@@ -4959,7 +4962,8 @@ normal:
skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
+ skb_shinfo(nskb)->flags |= (skb_shinfo(head_skb)->flags |
+ skb_shinfo(frag_skb)->flags) &
SKBFL_SHARED_FRAG;
if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
@@ -4976,6 +4980,9 @@ normal:
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
+
+ skb_shinfo(nskb)->flags |= skb_shinfo(frag_skb)->flags & SKBFL_SHARED_FRAG;
+
if (!skb_headlen(list_skb)) {
BUG_ON(!nfrags);
} else {
@@ -6200,6 +6207,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
from_shinfo->frags,
from_shinfo->nr_frags * sizeof(skb_frag_t));
to_shinfo->nr_frags += from_shinfo->nr_frags;
+ if (from_shinfo->nr_frags)
+ to_shinfo->flags |= from_shinfo->flags & SKBFL_SHARED_FRAG;
if (!skb_cloned(from))
from_shinfo->nr_frags = 0;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6187a83bd741..e1850caf1a71 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1268,12 +1268,19 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
const struct proto_ops *ops = NULL;
+ struct sk_psock *psock;
struct socket *sock;
int copied;
trace_sk_data_ready(sk);
rcu_read_lock();
+ psock = sk_psock(sk);
+ if (psock && tls_sw_has_ctx_rx(sk)) {
+ psock->saved_data_ready(sk);
+ rcu_read_unlock();
+ return;
+ }
sock = READ_ONCE(sk->sk_socket);
if (likely(sock))
ops = READ_ONCE(sock->ops);
@@ -1283,8 +1290,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
- struct sk_psock *psock;
-
rcu_read_lock();
psock = sk_psock(sk);
if (psock)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 02a68be3002a..99e3789492a0 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1630,18 +1630,23 @@ void sock_map_unhash(struct sock *sk)
void (*saved_unhash)(struct sock *sk);
struct sk_psock *psock;
+retry:
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
+ if (unlikely(saved_unhash == sock_map_unhash))
+ goto retry;
} else {
saved_unhash = psock->saved_unhash;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
+
+ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
+ return;
}
- if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
- return;
+
if (saved_unhash)
saved_unhash(sk);
}
@@ -1652,20 +1657,25 @@ void sock_map_destroy(struct sock *sk)
void (*saved_destroy)(struct sock *sk);
struct sk_psock *psock;
+retry:
rcu_read_lock();
psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
+ if (unlikely(saved_destroy == sock_map_destroy))
+ goto retry;
} else {
saved_destroy = psock->saved_destroy;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
sk_psock_stop(psock);
sk_psock_put(sk, psock);
+
+ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
+ return;
}
- if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
- return;
+
if (saved_destroy)
saved_destroy(sk);
}
@@ -1676,32 +1686,33 @@ void sock_map_close(struct sock *sk, long timeout)
void (*saved_close)(struct sock *sk, long timeout);
struct sk_psock *psock;
+retry:
lock_sock(sk);
rcu_read_lock();
- psock = sk_psock(sk);
+ psock = sk_psock_get(sk);
if (likely(psock)) {
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
- psock = sk_psock_get(sk);
- if (unlikely(!psock))
- goto no_psock;
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
+
+ /* Make sure we do not recurse. This is a bug.
+ * Leak the socket instead of crashing on a stack overflow.
+ */
+ if (WARN_ON_ONCE(saved_close == sock_map_close))
+ return;
} else {
saved_close = READ_ONCE(sk->sk_prot)->close;
-no_psock:
rcu_read_unlock();
release_sock(sk);
+
+ if (unlikely(saved_close == sock_map_close))
+ goto retry;
}
- /* Make sure we do not recurse. This is a bug.
- * Leak the socket instead of crashing on a stack overflow.
- */
- if (WARN_ON_ONCE(saved_close == sock_map_close))
- return;
saved_close(sk, timeout);
}
EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index 8bb98d3ea3db..a3a2cc6480a0 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -92,7 +92,7 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start,
u32 mask;
if (end <= start)
- return true;
+ return false;
if (start % 32) {
mask = ethnl_upper_bits(start);
@@ -105,11 +105,11 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start,
start_word++;
}
- if (!memchr_inv(map + start_word, '\0',
- (end_word - start_word) * sizeof(u32)))
+ if (memchr_inv(map + start_word, '\0',
+ (end_word - start_word) * sizeof(u32)))
return true;
if (end % 32 == 0)
- return true;
+ return false;
return map[end_word] & ethnl_lower_bits(end);
}
diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c
index d4e6887055ab..ddc6eab701ed 100644
--- a/net/ethtool/phy.c
+++ b/net/ethtool/phy.c
@@ -76,6 +76,7 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info,
struct nlattr **tb = info->attrs;
struct phy_device_node *pdn;
struct phy_device *phydev;
+ int ret;
/* RTNL is held by the caller */
phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PHY_HEADER,
@@ -88,8 +89,19 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info,
return -EOPNOTSUPP;
rep_data->phyindex = phydev->phyindex;
+
rep_data->name = kstrdup(dev_name(&phydev->mdio.dev), GFP_KERNEL);
- rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL);
+ if (!rep_data->name)
+ return -ENOMEM;
+
+ if (phydev->drv) {
+ rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL);
+ if (!rep_data->drvname) {
+ ret = -ENOMEM;
+ goto err_free_name;
+ }
+ }
+
rep_data->upstream_type = pdn->upstream_type;
if (pdn->upstream_type == PHY_UPSTREAM_PHY) {
@@ -97,15 +109,33 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info,
rep_data->upstream_index = upstream->phyindex;
}
- if (pdn->parent_sfp_bus)
+ if (pdn->parent_sfp_bus) {
rep_data->upstream_sfp_name = kstrdup(sfp_get_name(pdn->parent_sfp_bus),
GFP_KERNEL);
+ if (!rep_data->upstream_sfp_name) {
+ ret = -ENOMEM;
+ goto err_free_drvname;
+ }
+ }
- if (phydev->sfp_bus)
+ if (phydev->sfp_bus) {
rep_data->downstream_sfp_name = kstrdup(sfp_get_name(phydev->sfp_bus),
GFP_KERNEL);
+ if (!rep_data->downstream_sfp_name) {
+ ret = -ENOMEM;
+ goto err_free_upstream_sfp;
+ }
+ }
return 0;
+
+err_free_upstream_sfp:
+ kfree(rep_data->upstream_sfp_name);
+err_free_drvname:
+ kfree(rep_data->drvname);
+err_free_name:
+ kfree(rep_data->name);
+ return ret;
}
static int phy_fill_reply(struct sk_buff *skb,
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index d09875b33588..b514e43766ef 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -163,8 +163,8 @@ void hsr_del_nodes(struct list_head *node_db)
struct hsr_node *tmp;
list_for_each_entry_safe(node, tmp, node_db, mac_list) {
- list_del(&node->mac_list);
- hsr_free_node(node);
+ list_del_rcu(&node->mac_list);
+ call_rcu(&node->rcu_head, hsr_free_node_rcu);
}
}
@@ -889,7 +889,10 @@ int hsr_get_node_data(struct hsr_priv *hsr,
if (node->addr_B_port != HSR_PT_NONE) {
port = hsr_port_get_hsr(hsr, node->addr_B_port);
- *addr_b_ifindex = port->dev->ifindex;
+ if (port)
+ *addr_b_ifindex = port->dev->ifindex;
+ else
+ *addr_b_ifindex = -1;
} else {
*addr_b_ifindex = -1;
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 008edc7f6688..791e15063237 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
*/
if (prog_ops_moff(prog) !=
offsetof(struct tcp_congestion_ops, release))
- return &bpf_sk_setsockopt_proto;
+ return &bpf_sk_setsockopt_nodelay_proto;
return NULL;
case BPF_FUNC_getsockopt:
/* Since get/setsockopt is usually expected to
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7eeff658b467..23e921d313b3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -961,6 +961,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
if (IS_ERR(rt))
goto out_unlock;
+ if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ goto ende;
+
/* peer icmp_ratelimit */
if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 928654c34156..dbcd37dfdc15 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1108,7 +1108,7 @@ static void reqsk_timer_handler(struct timer_list *t)
if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
/* delete timer */
- __inet_csk_reqsk_queue_drop(sk_listener, nreq, true);
+ __inet_csk_reqsk_queue_drop(sk_listener, nreq, false);
goto no_ownership;
}
@@ -1134,7 +1134,7 @@ no_ownership:
}
drop:
- __inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
+ __inet_csk_reqsk_queue_drop(oreq->rsk_listener, oreq, true);
reqsk_put(oreq);
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 97ead883e4a1..ad2259678c78 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1501,13 +1501,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
static void __arpt_unregister_table(struct net *net, struct xt_table *table)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
+ struct xt_table_info *private = table->private;
struct module *table_owner = table->me;
+ void *loc_cpu_entry;
struct arpt_entry *iter;
- private = xt_unregister_table(table);
-
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
@@ -1515,6 +1513,7 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table)
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
+ kfree(table);
}
int arpt_register_table(struct net *net,
@@ -1522,13 +1521,11 @@ int arpt_register_table(struct net *net,
const struct arpt_replace *repl,
const struct nf_hook_ops *template_ops)
{
- struct nf_hook_ops *ops;
- unsigned int num_ops;
- int ret, i;
- struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
- void *loc_cpu_entry;
+ struct xt_table_info *newinfo;
struct xt_table *new_table;
+ void *loc_cpu_entry;
+ int ret;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
@@ -1543,7 +1540,7 @@ int arpt_register_table(struct net *net,
return ret;
}
- new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct arpt_entry *iter;
@@ -1553,46 +1550,12 @@ int arpt_register_table(struct net *net,
return PTR_ERR(new_table);
}
- num_ops = hweight32(table->valid_hooks);
- if (num_ops == 0) {
- ret = -EINVAL;
- goto out_free;
- }
-
- ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
- if (!ops) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- for (i = 0; i < num_ops; i++)
- ops[i].priv = new_table;
-
- new_table->ops = ops;
-
- ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret != 0)
- goto out_free;
-
return ret;
-
-out_free:
- __arpt_unregister_table(net, new_table);
- return ret;
-}
-
-void arpt_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
-EXPORT_SYMBOL(arpt_unregister_table_pre_exit);
void arpt_unregister_table(struct net *net, const char *name)
{
- struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
+ struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_ARP, name);
if (table)
__arpt_unregister_table(net, table);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 78cd5ee24448..370b635e3523 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -43,7 +43,7 @@ static int arptable_filter_table_init(struct net *net)
static void __net_exit arptable_filter_net_pre_exit(struct net *net)
{
- arpt_unregister_table_pre_exit(net, "filter");
+ xt_unregister_table_pre_exit(net, NFPROTO_ARP, "filter");
}
static void __net_exit arptable_filter_net_exit(struct net *net)
@@ -58,32 +58,33 @@ static struct pernet_operations arptable_filter_net_ops = {
static int __init arptable_filter_init(void)
{
- int ret = xt_register_template(&packet_filter,
- arptable_filter_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table);
- if (IS_ERR(arpfilter_ops)) {
- xt_unregister_template(&packet_filter);
+ if (IS_ERR(arpfilter_ops))
return PTR_ERR(arpfilter_ops);
- }
ret = register_pernet_subsys(&arptable_filter_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_filter,
+ arptable_filter_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_filter);
- kfree(arpfilter_ops);
- return ret;
+ unregister_pernet_subsys(&arptable_filter_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(arpfilter_ops);
return ret;
}
static void __exit arptable_filter_fini(void)
{
- unregister_pernet_subsys(&arptable_filter_net_ops);
xt_unregister_template(&packet_filter);
+ unregister_pernet_subsys(&arptable_filter_net_ops);
kfree(arpfilter_ops);
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 23c8deff8095..5cbdb0815857 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1704,12 +1704,10 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
static void __ipt_unregister_table(struct net *net, struct xt_table *table)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
+ struct xt_table_info *private = table->private;
struct module *table_owner = table->me;
struct ipt_entry *iter;
-
- private = xt_unregister_table(table);
+ void *loc_cpu_entry;
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
@@ -1718,19 +1716,18 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table)
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
+ kfree(table);
}
int ipt_register_table(struct net *net, const struct xt_table *table,
const struct ipt_replace *repl,
const struct nf_hook_ops *template_ops)
{
- struct nf_hook_ops *ops;
- unsigned int num_ops;
- int ret, i;
- struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
- void *loc_cpu_entry;
+ struct xt_table_info *newinfo;
struct xt_table *new_table;
+ void *loc_cpu_entry;
+ int ret;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
@@ -1745,7 +1742,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
return ret;
}
- new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct ipt_entry *iter;
@@ -1755,51 +1752,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
return PTR_ERR(new_table);
}
- /* No template? No need to do anything. This is used by 'nat' table, it registers
- * with the nat core instead of the netfilter core.
- */
- if (!template_ops)
- return 0;
-
- num_ops = hweight32(table->valid_hooks);
- if (num_ops == 0) {
- ret = -EINVAL;
- goto out_free;
- }
-
- ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
- if (!ops) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- for (i = 0; i < num_ops; i++)
- ops[i].priv = new_table;
-
- new_table->ops = ops;
-
- ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret != 0)
- goto out_free;
-
return ret;
-
-out_free:
- __ipt_unregister_table(net, new_table);
- return ret;
-}
-
-void ipt_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
void ipt_unregister_table_exit(struct net *net, const char *name)
{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
+ struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV4, name);
if (table)
__ipt_unregister_table(net, table);
@@ -1887,7 +1845,6 @@ static void __exit ip_tables_fini(void)
}
EXPORT_SYMBOL(ipt_register_table);
-EXPORT_SYMBOL(ipt_unregister_table_pre_exit);
EXPORT_SYMBOL(ipt_unregister_table_exit);
EXPORT_SYMBOL(ipt_do_table);
module_init(ip_tables_init);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 3ab908b74795..672d7da1071d 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -61,7 +61,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
static void __net_exit iptable_filter_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "filter");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "filter");
}
static void __net_exit iptable_filter_net_exit(struct net *net)
@@ -77,32 +77,33 @@ static struct pernet_operations iptable_filter_net_ops = {
static int __init iptable_filter_init(void)
{
- int ret = xt_register_template(&packet_filter,
- iptable_filter_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table);
- if (IS_ERR(filter_ops)) {
- xt_unregister_template(&packet_filter);
+ if (IS_ERR(filter_ops))
return PTR_ERR(filter_ops);
- }
ret = register_pernet_subsys(&iptable_filter_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_filter,
+ iptable_filter_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_filter);
- kfree(filter_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+ goto err_free;
}
return 0;
+err_free:
+ kfree(filter_ops);
+ return ret;
}
static void __exit iptable_filter_fini(void)
{
- unregister_pernet_subsys(&iptable_filter_net_ops);
xt_unregister_template(&packet_filter);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
kfree(filter_ops);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 385d945d8ebe..13d25d9a4610 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -96,7 +96,7 @@ static int iptable_mangle_table_init(struct net *net)
static void __net_exit iptable_mangle_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "mangle");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "mangle");
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
@@ -111,32 +111,33 @@ static struct pernet_operations iptable_mangle_net_ops = {
static int __init iptable_mangle_init(void)
{
- int ret = xt_register_template(&packet_mangler,
- iptable_mangle_table_init);
- if (ret < 0)
- return ret;
+ int ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
- if (IS_ERR(mangle_ops)) {
- xt_unregister_template(&packet_mangler);
- ret = PTR_ERR(mangle_ops);
- return ret;
- }
+ if (IS_ERR(mangle_ops))
+ return PTR_ERR(mangle_ops);
ret = register_pernet_subsys(&iptable_mangle_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_mangler,
+ iptable_mangle_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_mangler);
- kfree(mangle_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(mangle_ops);
return ret;
}
static void __exit iptable_mangle_fini(void)
{
- unregister_pernet_subsys(&iptable_mangle_net_ops);
xt_unregister_template(&packet_mangler);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
kfree(mangle_ops);
}
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 625a1ca13b1b..a0df72554025 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -119,8 +119,11 @@ static int iptable_nat_table_init(struct net *net)
}
ret = ipt_nat_register_lookups(net);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat");
+ synchronize_rcu();
ipt_unregister_table_exit(net, "nat");
+ }
kfree(repl);
return ret;
@@ -129,6 +132,7 @@ static int iptable_nat_table_init(struct net *net)
static void __net_exit iptable_nat_net_pre_exit(struct net *net)
{
ipt_nat_unregister_lookups(net);
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat");
}
static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 0e7f53964d0a..2745c22f4034 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -53,7 +53,7 @@ static int iptable_raw_table_init(struct net *net)
static void __net_exit iptable_raw_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "raw");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "raw");
}
static void __net_exit iptable_raw_net_exit(struct net *net)
@@ -77,32 +77,32 @@ static int __init iptable_raw_init(void)
pr_info("Enabling raw table before defrag\n");
}
- ret = xt_register_template(table,
- iptable_raw_table_init);
- if (ret < 0)
- return ret;
-
rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table);
- if (IS_ERR(rawtable_ops)) {
- xt_unregister_template(table);
+ if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
- }
ret = register_pernet_subsys(&iptable_raw_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(table,
+ iptable_raw_table_init);
if (ret < 0) {
- xt_unregister_template(table);
- kfree(rawtable_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(rawtable_ops);
return ret;
}
static void __exit iptable_raw_fini(void)
{
+ xt_unregister_template(&packet_raw);
unregister_pernet_subsys(&iptable_raw_net_ops);
kfree(rawtable_ops);
- xt_unregister_template(&packet_raw);
}
module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index d885443cb267..491894511c54 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -50,7 +50,7 @@ static int iptable_security_table_init(struct net *net)
static void __net_exit iptable_security_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "security");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "security");
}
static void __net_exit iptable_security_net_exit(struct net *net)
@@ -65,33 +65,34 @@ static struct pernet_operations iptable_security_net_ops = {
static int __init iptable_security_init(void)
{
- int ret = xt_register_template(&security_table,
- iptable_security_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table);
- if (IS_ERR(sectbl_ops)) {
- xt_unregister_template(&security_table);
+ if (IS_ERR(sectbl_ops))
return PTR_ERR(sectbl_ops);
- }
ret = register_pernet_subsys(&iptable_security_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&security_table,
+ iptable_security_table_init);
if (ret < 0) {
- xt_unregister_template(&security_table);
- kfree(sectbl_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(sectbl_ops);
return ret;
}
static void __exit iptable_security_fini(void)
{
+ xt_unregister_template(&security_table);
unregister_pernet_subsys(&iptable_security_net_ops);
kfree(sectbl_ops);
- xt_unregister_template(&security_table);
}
module_init(iptable_security_init);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5aaf9c62c8e1..68e88cb3e55c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -391,7 +391,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
* in, reject the frame as invalid
*/
err = -EINVAL;
- if (iphlen > length)
+ if (iphlen > length || iphlen < sizeof(*iph))
goto error_free;
if (iphlen >= sizeof(*iph)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bc1296f0ea69..3d62d45d84bd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1272,7 +1272,7 @@ static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
__func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
skb->dev ? skb->dev->name : "?");
kfree_skb(skb);
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return 0;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 432fa28e47d4..389a7cc17110 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -299,9 +299,6 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
-DEFINE_PER_CPU(u32, tcp_tw_isn);
-EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
-
long sysctl_tcp_mem[3] __read_mostly;
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index a97cdf3e6af4..0a4b38b315fe 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -116,7 +116,8 @@ struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk,
{
struct tcp_ao_key *key;
- hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) {
+ hlist_for_each_entry_rcu(key, &ao->head, node,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk)) {
if ((sndid >= 0 && key->sndid != sndid) ||
(rcvid >= 0 && key->rcvid != rcvid))
continue;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d5c9e65d9760..de9f68a9c0cf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -7589,6 +7589,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
+ u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
const struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
@@ -7599,20 +7600,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
- u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
- isn = __this_cpu_read(tcp_tw_isn);
- if (isn) {
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- __this_cpu_write(tcp_tw_isn, 0);
- } else {
+ /* If isn is non-zero, this SYN originally matched a TIME_WAIT socket.
+ * TW sockets are converted to open requests without limitations,
+ * we skip the queue limits and syncookie checks in the block below.
+ */
+ if (!isn) {
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c0526cc03980..fdc81150ff6c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2198,6 +2198,7 @@ lookup:
}
}
+ isn = 0;
process:
if (static_branch_unlikely(&ip4_min_ttl)) {
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
@@ -2227,6 +2228,7 @@ process:
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = isn;
skb->dev = NULL;
@@ -2313,7 +2315,6 @@ do_time_wait:
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
- __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f9d8755705f7..6e4bb411dc04 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2626,6 +2626,7 @@ static int tcp_clone_payload(struct sock *sk, struct sk_buff *to,
todo = min_t(int, skb_frag_size(fragfrom),
probe_size - len);
len += todo;
+ skb_shinfo(to)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
if (lastfrag &&
skb_frag_page(fragfrom) == skb_frag_page(lastfrag) &&
skb_frag_off(fragfrom) == skb_frag_off(lastfrag) +
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a0813d425b71..29651b1a0bc7 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -482,11 +482,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
struct sock *sk = gso_skb->sk;
unsigned int sum_truesize = 0;
struct sk_buff *segs, *seg;
- __be16 newlen, msslen;
struct udphdr *uh;
unsigned int mss;
bool copy_dtor;
__sum16 check;
+ __be16 newlen;
int ret = 0;
mss = skb_shinfo(gso_skb)->gso_size;
@@ -555,15 +555,6 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
return segs;
}
- msslen = htons(sizeof(*uh) + mss);
-
- /* GSO partial and frag_list segmentation only requires splitting
- * the frame into an MSS multiple and possibly a remainder, both
- * cases return a GSO skb. So update the mss now.
- */
- if (skb_is_gso(segs))
- mss *= skb_shinfo(segs)->gso_segs;
-
seg = segs;
uh = udp_hdr(seg);
@@ -586,7 +577,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (!seg->next)
break;
- uh->len = msslen;
+ uh->len = newlen;
uh->check = check;
if (seg->ip_summed == CHECKSUM_PARTIAL)
@@ -599,9 +590,12 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
uh = udp_hdr(seg);
}
- /* last packet can be partial gso_size, account for that in checksum */
- newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
- seg->data_len);
+ /* Unless skb fits perfectly as GSO_PARTIAL, the trailing
+ * segment may not be full MSS, account for that in the checksum
+ */
+ if (!skb_is_gso(seg))
+ newlen = htons(skb_tail_pointer(seg) -
+ skb_transport_header(seg) + seg->data_len);
check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
uh->len = newlen;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 03cbce842c1a..cf90f933ca1a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -910,16 +910,27 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
{
+ enum skb_drop_reason drop_reason;
struct ioam6_trace_hdr *trace;
struct ioam6_namespace *ns;
+ struct inet6_dev *idev;
struct ioam6_hdr *hdr;
+ drop_reason = SKB_DROP_REASON_IP_INHDR;
+
/* Bad alignment (must be 4n-aligned) */
if (optoff & 3)
goto drop;
+ /* Does the device still have IPv6 configuration? */
+ idev = __in6_dev_get(skb->dev);
+ if (!idev) {
+ drop_reason = SKB_DROP_REASON_IPV6DISABLED;
+ goto drop;
+ }
+
/* Ignore if IOAM is not enabled on ingress */
- if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled))
+ if (!READ_ONCE(idev->cnf.ioam6_enabled))
goto ignore;
/* Truncated Option header */
@@ -955,9 +966,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len))
goto drop;
- /* Trace pointer may have changed */
- trace = (struct ioam6_trace_hdr *)(skb_network_header(skb)
- + optoff + sizeof(*hdr));
+ /* Trace and hdr pointers may have changed */
+ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
ioam6_fill_trace_data(skb, ns, trace, true);
@@ -972,7 +983,7 @@ ignore:
return true;
drop:
- kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
+ kfree_skb_reason(skb, drop_reason);
return false;
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c92f98c6f6ec..b1ccdf0dc646 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -36,11 +36,11 @@
/* FL hash table */
#define FL_MAX_PER_SOCK 32
-#define FL_MAX_SIZE 4096
+#define FL_MAX_SIZE 8192
#define FL_HASH_MASK 255
#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
-static atomic_t fl_size = ATOMIC_INIT(0);
+static int fl_size;
static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
static void ip6_fl_gc(struct timer_list *unused);
@@ -162,8 +162,9 @@ static void ip6_fl_gc(struct timer_list *unused)
ttd = fl->expires;
if (time_after_eq(now, ttd)) {
*flp = fl->next;
+ fl_size--;
+ fl->fl_net->ipv6.flowlabel_count--;
fl_free(fl);
- atomic_dec(&fl_size);
continue;
}
if (!sched || time_before(ttd, sched))
@@ -172,7 +173,7 @@ static void ip6_fl_gc(struct timer_list *unused)
flp = &fl->next;
}
}
- if (!sched && atomic_read(&fl_size))
+ if (!sched && fl_size)
sched = now + FL_MAX_LINGER;
if (sched) {
mod_timer(&ip6_fl_gc_timer, sched);
@@ -196,7 +197,8 @@ static void __net_exit ip6_fl_purge(struct net *net)
atomic_read(&fl->users) == 0) {
*flp = fl->next;
fl_free(fl);
- atomic_dec(&fl_size);
+ fl_size--;
+ net->ipv6.flowlabel_count--;
continue;
}
flp = &fl->next;
@@ -210,10 +212,10 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
{
struct ip6_flowlabel *lfl;
+ lockdep_assert_held(&ip6_fl_lock);
+
fl->label = label & IPV6_FLOWLABEL_MASK;
- rcu_read_lock();
- spin_lock_bh(&ip6_fl_lock);
if (label == 0) {
for (;;) {
fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK;
@@ -235,8 +237,6 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
lfl = __fl_lookup(net, fl->label);
if (lfl) {
atomic_inc(&lfl->users);
- spin_unlock_bh(&ip6_fl_lock);
- rcu_read_unlock();
return lfl;
}
}
@@ -244,9 +244,8 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
fl->lastuse = jiffies;
fl->next = fl_ht[FL_HASH(fl->label)];
rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
- atomic_inc(&fl_size);
- spin_unlock_bh(&ip6_fl_lock);
- rcu_read_unlock();
+ fl_size++;
+ net->ipv6.flowlabel_count++;
return NULL;
}
@@ -464,10 +463,17 @@ done:
static int mem_check(struct sock *sk)
{
- int room = FL_MAX_SIZE - atomic_read(&fl_size);
+ const int unpriv_total_limit = FL_MAX_SIZE - (FL_MAX_SIZE / 4);
+ const int unpriv_user_limit = unpriv_total_limit / 2;
+ struct net *net = sock_net(sk);
+ int room;
struct ipv6_fl_socklist *sfl;
int count = 0;
+ lockdep_assert_held(&ip6_fl_lock);
+
+ room = FL_MAX_SIZE - fl_size;
+
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
return 0;
@@ -478,7 +484,9 @@ static int mem_check(struct sock *sk)
if (room <= 0 ||
((count >= FL_MAX_PER_SOCK ||
- (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+ (count > 0 && room < FL_MAX_SIZE / 2) ||
+ room < FL_MAX_SIZE / 4 ||
+ net->ipv6.flowlabel_count >= unpriv_user_limit) &&
!capable(CAP_NET_ADMIN)))
return -ENOBUFS;
@@ -692,11 +700,19 @@ release:
if (!sfl1)
goto done;
+ rcu_read_lock();
+ spin_lock_bh(&ip6_fl_lock);
err = mem_check(sk);
+ if (err == 0)
+ fl1 = fl_intern(net, fl, freq->flr_label);
+ else
+ fl1 = NULL;
+ spin_unlock_bh(&ip6_fl_lock);
+ rcu_read_unlock();
+
if (err != 0)
goto done;
- fl1 = fl_intern(net, fl, freq->flr_label);
if (fl1)
goto recheck;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d585ac3c1113..9d9c3763f2f5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1713,12 +1713,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
+ struct xt_table_info *private = table->private;
struct module *table_owner = table->me;
struct ip6t_entry *iter;
-
- private = xt_unregister_table(table);
+ void *loc_cpu_entry;
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
@@ -1727,19 +1725,18 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
+ kfree(table);
}
int ip6t_register_table(struct net *net, const struct xt_table *table,
const struct ip6t_replace *repl,
const struct nf_hook_ops *template_ops)
{
- struct nf_hook_ops *ops;
- unsigned int num_ops;
- int ret, i;
- struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
- void *loc_cpu_entry;
+ struct xt_table_info *newinfo;
struct xt_table *new_table;
+ void *loc_cpu_entry;
+ int ret;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
@@ -1754,7 +1751,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
return ret;
}
- new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct ip6t_entry *iter;
@@ -1764,48 +1761,12 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
return PTR_ERR(new_table);
}
- if (!template_ops)
- return 0;
-
- num_ops = hweight32(table->valid_hooks);
- if (num_ops == 0) {
- ret = -EINVAL;
- goto out_free;
- }
-
- ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
- if (!ops) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- for (i = 0; i < num_ops; i++)
- ops[i].priv = new_table;
-
- new_table->ops = ops;
-
- ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret != 0)
- goto out_free;
-
return ret;
-
-out_free:
- __ip6t_unregister_table(net, new_table);
- return ret;
-}
-
-void ip6t_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
void ip6t_unregister_table_exit(struct net *net, const char *name)
{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
+ struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV6, name);
if (table)
__ip6t_unregister_table(net, table);
@@ -1894,7 +1855,6 @@ static void __exit ip6_tables_fini(void)
}
EXPORT_SYMBOL(ip6t_register_table);
-EXPORT_SYMBOL(ip6t_unregister_table_pre_exit);
EXPORT_SYMBOL(ip6t_unregister_table_exit);
EXPORT_SYMBOL(ip6t_do_table);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e7a3fb9355ee..450dd53846a2 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -168,6 +168,10 @@ static int hbh_mt6_check(const struct xt_mtchk_param *par)
pr_debug("unknown flags %X\n", optsinfo->invflags);
return -EINVAL;
}
+ if (optsinfo->optsnr > IP6T_OPTS_OPTSNR) {
+ pr_debug("too many supported opts specified\n");
+ return -EINVAL;
+ }
if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
pr_debug("Not strict - not implemented");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index e8992693e14a..b074fc477676 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -60,7 +60,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
static void __net_exit ip6table_filter_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "filter");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "filter");
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
@@ -76,32 +76,32 @@ static struct pernet_operations ip6table_filter_net_ops = {
static int __init ip6table_filter_init(void)
{
- int ret = xt_register_template(&packet_filter,
- ip6table_filter_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table);
- if (IS_ERR(filter_ops)) {
- xt_unregister_template(&packet_filter);
+ if (IS_ERR(filter_ops))
return PTR_ERR(filter_ops);
- }
ret = register_pernet_subsys(&ip6table_filter_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_filter, ip6table_filter_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_filter);
- kfree(filter_ops);
- return ret;
+ unregister_pernet_subsys(&ip6table_filter_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(filter_ops);
return ret;
}
static void __exit ip6table_filter_fini(void)
{
- unregister_pernet_subsys(&ip6table_filter_net_ops);
xt_unregister_template(&packet_filter);
+ unregister_pernet_subsys(&ip6table_filter_net_ops);
kfree(filter_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 8dd4cd0c47bd..e6ee036a9b2c 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -89,7 +89,7 @@ static int ip6table_mangle_table_init(struct net *net)
static void __net_exit ip6table_mangle_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "mangle");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "mangle");
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
@@ -104,32 +104,33 @@ static struct pernet_operations ip6table_mangle_net_ops = {
static int __init ip6table_mangle_init(void)
{
- int ret = xt_register_template(&packet_mangler,
- ip6table_mangle_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
- if (IS_ERR(mangle_ops)) {
- xt_unregister_template(&packet_mangler);
+ if (IS_ERR(mangle_ops))
return PTR_ERR(mangle_ops);
- }
ret = register_pernet_subsys(&ip6table_mangle_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_mangler,
+ ip6table_mangle_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_mangler);
- kfree(mangle_ops);
- return ret;
+ unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(mangle_ops);
return ret;
}
static void __exit ip6table_mangle_fini(void)
{
- unregister_pernet_subsys(&ip6table_mangle_net_ops);
xt_unregister_template(&packet_mangler);
+ unregister_pernet_subsys(&ip6table_mangle_net_ops);
kfree(mangle_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 5be723232df8..c2394e2c94b5 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -121,8 +121,11 @@ static int ip6table_nat_table_init(struct net *net)
}
ret = ip6t_nat_register_lookups(net);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat");
+ synchronize_rcu();
ip6t_unregister_table_exit(net, "nat");
+ }
kfree(repl);
return ret;
@@ -131,6 +134,7 @@ static int ip6table_nat_table_init(struct net *net)
static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
{
ip6t_nat_unregister_lookups(net);
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat");
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index fc9f6754028f..3b161ee875bc 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -52,7 +52,7 @@ static int ip6table_raw_table_init(struct net *net)
static void __net_exit ip6table_raw_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "raw");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "raw");
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
@@ -75,31 +75,31 @@ static int __init ip6table_raw_init(void)
pr_info("Enabling raw table before defrag\n");
}
- ret = xt_register_template(table, ip6table_raw_table_init);
- if (ret < 0)
- return ret;
-
/* Register hooks */
rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table);
- if (IS_ERR(rawtable_ops)) {
- xt_unregister_template(table);
+ if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
- }
ret = register_pernet_subsys(&ip6table_raw_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(table, ip6table_raw_table_init);
if (ret < 0) {
- kfree(rawtable_ops);
- xt_unregister_template(table);
- return ret;
+ unregister_pernet_subsys(&ip6table_raw_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(rawtable_ops);
return ret;
}
static void __exit ip6table_raw_fini(void)
{
- unregister_pernet_subsys(&ip6table_raw_net_ops);
xt_unregister_template(&packet_raw);
+ unregister_pernet_subsys(&ip6table_raw_net_ops);
kfree(rawtable_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 4df14a9bae78..4bd5d97b8ab6 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -49,7 +49,7 @@ static int ip6table_security_table_init(struct net *net)
static void __net_exit ip6table_security_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "security");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "security");
}
static void __net_exit ip6table_security_net_exit(struct net *net)
@@ -64,32 +64,33 @@ static struct pernet_operations ip6table_security_net_ops = {
static int __init ip6table_security_init(void)
{
- int ret = xt_register_template(&security_table,
- ip6table_security_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table);
- if (IS_ERR(sectbl_ops)) {
- xt_unregister_template(&security_table);
+ if (IS_ERR(sectbl_ops))
return PTR_ERR(sectbl_ops);
- }
ret = register_pernet_subsys(&ip6table_security_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&security_table,
+ ip6table_security_table_init);
if (ret < 0) {
- kfree(sectbl_ops);
- xt_unregister_template(&security_table);
- return ret;
+ unregister_pernet_subsys(&ip6table_security_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(sectbl_ops);
return ret;
}
static void __exit ip6table_security_fini(void)
{
- unregister_pernet_subsys(&ip6table_security_net_ops);
xt_unregister_template(&security_table);
+ unregister_pernet_subsys(&ip6table_security_net_ops);
kfree(sectbl_ops);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e3d355d1fbd6..b106e5fef9cb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6933,7 +6933,7 @@ int __init ip6_route_init(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
ret = bpf_iter_register();
if (ret)
- goto out_register_late_subsys;
+ goto out_register_notifier;
#endif
for_each_possible_cpu(cpu) {
@@ -6946,6 +6946,10 @@ int __init ip6_route_init(void)
out:
return ret;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+out_register_notifier:
+ unregister_netdevice_notifier(&ip6_route_dev_notifier);
+#endif
out_register_late_subsys:
rtnl_unregister_all(PF_INET6);
unregister_pernet_subsys(&ip6_route_net_late_ops);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d13d49bfef19..36d75fb50a70 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1839,6 +1839,7 @@ lookup:
}
}
+ isn = 0;
process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
@@ -1868,6 +1869,7 @@ process:
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = isn;
skb->dev = NULL;
@@ -1956,7 +1958,6 @@ do_time_wait:
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
- __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 157fc23ce4e1..1455f67e01dd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1360,7 +1360,7 @@ static void l2tp_session_unhash(struct l2tp_session *session)
spin_lock_bh(&pn->l2tp_session_idr_lock);
/* Remove from the per-tunnel list */
- list_del_init(&session->list);
+ list_del_rcu(&session->list);
/* Remove from per-net IDR */
if (tunnel->version == L2TP_HDR_VER_3) {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7b77d57c9f96..f9ee9947a94d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2344,8 +2344,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta->sta.max_sp = params->max_sp;
}
- ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab,
- params->ext_capab_len);
+ if (params->ext_capab)
+ ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab,
+ params->ext_capab_len);
/*
* cfg80211 validates this (1-2007) and allows setting the AID
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0a0f27836d57..b98ddfa3003e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8164,6 +8164,7 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata,
"No active links for TID %d", tid);
return -EINVAL;
}
+ pos += map_size;
} else {
map = 0;
}
@@ -8182,7 +8183,6 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata,
default:
return -EINVAL;
}
- pos += map_size;
}
return 0;
}
@@ -11232,6 +11232,9 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata,
control = get_unaligned_le16(pos);
link_id = control & IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID;
+ if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS)
+ continue;
+
link = sdata_dereference(sdata->link[link_id], sdata);
if (!link)
continue;
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 2b3632c6008a..77894d997113 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -34,6 +34,22 @@
#include "led.h"
#include "wep.h"
+static const u8 empty_non_inheritance[] = {
+ WLAN_EID_EXTENSION, 1, WLAN_EID_EXT_NON_INHERITANCE,
+ /*
+ * cfg80211_is_element_inherited() hardcodes elements that
+ * cannot be inherited, so we just need an empty one to be
+ * calling it at all.
+ */
+};
+
+struct ieee80211_elem_defrag {
+ const struct element *elem;
+ /* container start/len */
+ const u8 *start;
+ size_t len;
+};
+
struct ieee80211_elems_parse {
/* must be first for kfree to work */
struct ieee802_11_elems elems;
@@ -41,11 +57,7 @@ struct ieee80211_elems_parse {
/* The basic Multi-Link element in the original elements */
const struct element *ml_basic_elem;
- /* The reconfiguration Multi-Link element in the original elements */
- const struct element *ml_reconf_elem;
-
- /* The EPCS Multi-Link element in the original elements */
- const struct element *ml_epcs_elem;
+ struct ieee80211_elem_defrag ml_reconf, ml_epcs;
bool multi_link_inner;
bool skip_vendor;
@@ -162,10 +174,14 @@ ieee80211_parse_extension_element(u32 *crc,
}
break;
case IEEE80211_ML_CONTROL_TYPE_RECONF:
- elems_parse->ml_reconf_elem = elem;
+ elems_parse->ml_reconf.elem = elem;
+ elems_parse->ml_reconf.start = params->start;
+ elems_parse->ml_reconf.len = params->len;
break;
case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS:
- elems_parse->ml_epcs_elem = elem;
+ elems_parse->ml_epcs.elem = elem;
+ elems_parse->ml_epcs.start = params->start;
+ elems_parse->ml_epcs.len = params->len;
break;
default:
break;
@@ -916,7 +932,7 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse,
{
struct ieee802_11_elems *elems = &elems_parse->elems;
struct ieee80211_mle_per_sta_profile *prof;
- const struct element *tmp;
+ const struct element *tmp, *ret;
ssize_t ml_len;
const u8 *end;
@@ -986,50 +1002,40 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse,
sub->from_ap = params->from_ap;
sub->link_id = -1;
- return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- sub->start, sub->len);
-}
-
-static void
-ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse)
-{
- struct ieee802_11_elems *elems = &elems_parse->elems;
- ssize_t ml_len;
+ ret = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ sub->start, sub->len);
+ if (ret)
+ return ret;
- ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem,
- elems->ie_start,
- elems->total_len,
- elems_parse->scratch_pos,
- elems_parse->scratch +
- elems_parse->scratch_len -
- elems_parse->scratch_pos,
- WLAN_EID_FRAGMENT);
- if (ml_len < 0)
- return;
- elems->ml_reconf = (void *)elems_parse->scratch_pos;
- elems->ml_reconf_len = ml_len;
- elems_parse->scratch_pos += ml_len;
+ /*
+ * Since we know we want and found a profile, apply an empty
+ * non-inheritance if the profile didn't have one, so that any
+ * element that shouldn't be inherited by spec isn't.
+ */
+ return (const void *)empty_non_inheritance;
}
-static void
-ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse)
+static const void *
+ieee80211_mle_defrag(struct ieee80211_elems_parse *elems_parse,
+ struct ieee80211_elem_defrag *defrag,
+ size_t *out_len)
{
- struct ieee802_11_elems *elems = &elems_parse->elems;
+ const void *ret;
ssize_t ml_len;
- ml_len = cfg80211_defragment_element(elems_parse->ml_epcs_elem,
- elems->ie_start,
- elems->total_len,
+ ml_len = cfg80211_defragment_element(defrag->elem,
+ defrag->start, defrag->len,
elems_parse->scratch_pos,
elems_parse->scratch +
elems_parse->scratch_len -
elems_parse->scratch_pos,
WLAN_EID_FRAGMENT);
if (ml_len < 0)
- return;
- elems->ml_epcs = (void *)elems_parse->scratch_pos;
- elems->ml_epcs_len = ml_len;
+ return NULL;
+ ret = elems_parse->scratch_pos;
+ *out_len = ml_len;
elems_parse->scratch_pos += ml_len;
+ return ret;
}
struct ieee802_11_elems *
@@ -1042,6 +1048,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
size_t scratch_len = 3 * params->len;
bool multi_link_inner = false;
+ BUILD_BUG_ON(sizeof(empty_non_inheritance) != empty_non_inheritance[1] + 2);
BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0);
/* cannot parse for both a specific link and non-transmitted BSS */
@@ -1089,6 +1096,17 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
sub.start, nontx_len);
+ /*
+ * If it's a non-transmitted BSS, we shouldn't pick
+ * any elements in the outer parsing that shouldn't
+ * be inherited. If the profile has a non-inheritance
+ * element this automatically happens, but if not then
+ * provide an empty one so that the hard-coded elements
+ * in cfg80211_is_element_inherited() are ignored, but
+ * it must be called.
+ */
+ if (params->bss->transmitted_bss && !non_inherit)
+ non_inherit = (const void *)empty_non_inheritance;
} else {
/* must always parse to get elems_parse->ml_basic_elem */
non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params,
@@ -1109,9 +1127,12 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
_ieee802_11_parse_elems_full(&sub, elems_parse, NULL);
}
- ieee80211_mle_defrag_reconf(elems_parse);
-
- ieee80211_mle_defrag_epcs(elems_parse);
+ elems->ml_reconf = ieee80211_mle_defrag(elems_parse,
+ &elems_parse->ml_reconf,
+ &elems->ml_reconf_len);
+ elems->ml_epcs = ieee80211_mle_defrag(elems_parse,
+ &elems_parse->ml_epcs,
+ &elems->ml_epcs_len);
if (elems->tim && !elems->parse_error) {
const struct ieee80211_tim_ie *tim_ie = elems->tim;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d18e962126ce..3fb40449c6c5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4984,6 +4984,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 sa[ETH_ALEN];
} addrs __aligned(2);
struct ieee80211_sta_rx_stats *stats;
+ u32 encoded_rate;
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
* to a common data structure; drivers can implement that per queue
@@ -5091,11 +5092,14 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
/* push the addresses in front */
memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs));
+ /* capture before mesh forward may memset or free skb->cb */
+ encoded_rate = sta_stats_encode_rate(status);
+
res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
switch (res) {
case RX_QUEUED:
stats->last_rx = jiffies;
- stats->last_rate = sta_stats_encode_rate(status);
+ stats->last_rate = encoded_rate;
return true;
case RX_CONTINUE:
break;
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 8a16672b94e2..4cc16cbeb328 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -14,7 +14,7 @@
struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
{
- if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
+ if (sk && sk_fullsock(sk) && sk_is_tcp(sk) && sk_is_mptcp(sk))
return mptcp_sk(mptcp_subflow_ctx(sk)->conn);
return NULL;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 3c152bf66cd5..3e770c7407e1 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -364,7 +364,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
spin_lock_bh(&msk->pm.lock);
- if (!mptcp_pm_should_add_signal_addr(msk)) {
+ /* The cancel path (mptcp_pm_del_add_timer()) can race with this
+ * callback. Once cancel updates retrans_times to MAX, suppress further
+ * retransmissions here. If this callback acquires pm.lock first, one
+ * final transmit attempt is still possible.
+ */
+ if (entry->retrans_times < ADD_ADDR_RETRANS_MAX &&
+ !mptcp_pm_should_add_signal_addr(msk)) {
pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id);
mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk);
@@ -414,8 +420,12 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
/* Note: entry might have been removed by another thread.
* We hold rcu_read_lock() to ensure it is not freed under us.
*/
- if (stop_timer)
- sk_stop_timer_sync(sk, &entry->add_timer);
+ if (stop_timer) {
+ if (check_id)
+ sk_stop_timer(sk, &entry->add_timer);
+ else
+ sk_stop_timer_sync(sk, &entry->add_timer);
+ }
rcu_read_unlock();
return entry;
@@ -882,6 +892,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
struct mptcp_addr_info *addr, bool *echo,
bool *drop_other_suboptions)
{
+ bool skip_add_addr = false;
int ret = false;
u8 add_addr;
u8 family;
@@ -903,24 +914,49 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
}
*echo = mptcp_pm_should_add_signal_echo(msk);
- port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
-
- family = *echo ? msk->pm.remote.family : msk->pm.local.family;
- if (remaining < mptcp_add_addr_len(family, *echo, port))
- goto out_unlock;
-
if (*echo) {
*addr = msk->pm.remote;
add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
+ port = !!msk->pm.remote.port;
+ family = msk->pm.remote.family;
} else {
*addr = msk->pm.local;
add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
+ port = !!msk->pm.local.port;
+ family = msk->pm.local.family;
}
- WRITE_ONCE(msk->pm.addr_signal, add_addr);
+
+ if (remaining < mptcp_add_addr_len(family, *echo, port)) {
+ struct net *net = sock_net((struct sock *)msk);
+
+ if (!*drop_other_suboptions)
+ goto out_unlock;
+
+ if (*echo) {
+ MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP);
+ } else {
+ skip_add_addr = true;
+ MPTCP_INC_STATS(net, MPTCP_MIB_ADDADDRTXDROP);
+ }
+ goto drop_signal_mark;
+ }
+
ret = true;
+drop_signal_mark:
+ WRITE_ONCE(msk->pm.addr_signal, add_addr);
+
out_unlock:
spin_unlock_bh(&msk->pm.lock);
+
+ /* On pure-ACK option-space exhaustion, stop retrying this ADD_ADDR:
+ * clear the signal bit, cancel the matching retransmission timer, and
+ * let the PM state machine progress.
+ */
+ if (skip_add_addr) {
+ mptcp_pm_del_add_timer(msk, addr, true);
+ mptcp_pm_subflow_established(msk);
+ }
return ret;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4546a8b09884..a72a6ad6ee8b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -397,12 +397,26 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
return false;
}
- /* old data, keep it simple and drop the whole pkt, sender
- * will retransmit as needed, if needed.
+ /* Completely old data? */
+ if (!after64(MPTCP_SKB_CB(skb)->end_seq, msk->ack_seq)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
+ mptcp_drop(sk, skb);
+ return false;
+ }
+
+ /* Partial packet: map_seq < ack_seq < end_seq.
+ * Skip the already-acked bytes and enqueue the new data.
*/
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
- mptcp_drop(sk, skb);
- return false;
+ copy_len = MPTCP_SKB_CB(skb)->end_seq - msk->ack_seq;
+ MPTCP_SKB_CB(skb)->offset += msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+ MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq -
+ MPTCP_SKB_CB(skb)->map_seq;
+ msk->bytes_received += copy_len;
+ WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
+
+ skb_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ return true;
}
static void mptcp_stop_rtx_timer(struct sock *sk)
@@ -3473,6 +3487,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
/* for fallback's sake */
WRITE_ONCE(msk->ack_seq, 0);
+ atomic64_set(&msk->rcv_wnd_sent, 0);
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 1cf608e7357b..87b5796d0135 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -67,6 +67,12 @@ static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
return 0;
}
+static void __mptcp_subflow_set_rcvbuf(struct sock *ssk, int val)
+{
+ WRITE_ONCE(ssk->sk_rcvbuf, val);
+ tcp_set_rcvbuf(ssk, val);
+}
+
static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
{
struct mptcp_subflow_context *subflow;
@@ -100,7 +106,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
case SO_RCVBUF:
case SO_RCVBUFFORCE:
ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
+ __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf);
break;
case SO_MARK:
if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
@@ -1560,7 +1566,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
}
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
- WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
+ __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf);
}
if (sock_flag(sk, SOCK_LINGER)) {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index c5a26236a0bb..3706b4a85a0f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1613,6 +1613,7 @@ dump_last:
((dump_type == DUMP_ALL) ==
!!(set->type->features & IPSET_DUMP_LAST))) {
write_unlock_bh(&ip_set_ref_lock);
+ set = NULL;
continue;
}
pr_debug("List set: %s\n", set->name);
@@ -1648,13 +1649,13 @@ dump_last:
if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
goto nla_put_failure;
+ if (set->variant->uref)
+ set->variant->uref(set, cb, true);
ret = set->variant->head(set, skb);
if (ret < 0)
goto release_refcount;
if (dump_flags & IPSET_FLAG_LIST_HEADER)
goto next_set;
- if (set->variant->uref)
- set->variant->uref(set, cb, true);
fallthrough;
default:
ret = set->variant->list(set, skb, cb);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index b79e5dd2af03..04e4627ddfc1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -386,8 +386,9 @@ static void
mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
{
int i;
+ u8 pos = smp_load_acquire(&n->pos);
- for (i = 0; i < n->pos; i++)
+ for (i = 0; i < pos; i++)
if (test_bit(i, n->used))
ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
}
@@ -490,7 +491,7 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
- u8 htable_bits = t->htable_bits;
+ u8 pos, htable_bits = t->htable_bits;
spin_lock_bh(&t->hregion[r].lock);
for (i = ahash_bucket_start(r, htable_bits);
@@ -498,7 +499,8 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
- for (j = 0, d = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0, d = 0; j < pos; j++) {
if (!test_bit(j, n->used)) {
d++;
continue;
@@ -534,7 +536,7 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
/* Still try to delete expired elements. */
continue;
tmp->size = n->size - AHASH_INIT_SIZE;
- for (j = 0, d = 0; j < n->pos; j++) {
+ for (j = 0, d = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
@@ -623,7 +625,7 @@ mtype_resize(struct ip_set *set, bool retried)
{
struct htype *h = set->data;
struct htable *t, *orig;
- u8 htable_bits;
+ u8 pos, htable_bits;
size_t hsize, dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
@@ -685,7 +687,8 @@ retry:
n = __ipset_dereference(hbucket(orig, i));
if (!n)
continue;
- for (j = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
@@ -809,9 +812,10 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
{
struct htype *h = set->data;
const struct htable *t;
- u32 i, j, r;
struct hbucket *n;
struct mtype_elem *data;
+ u32 i, j, r;
+ u8 pos;
t = rcu_dereference_bh(h->table);
for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
@@ -820,7 +824,8 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
n = rcu_dereference_bh(hbucket(t, i));
if (!n)
continue;
- for (j = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, set->dsize);
@@ -848,6 +853,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
bool flag_exist = flags & IPSET_FLAG_EXIST;
bool deleted = false, forceadd = false, reuse = false;
u32 r, key, multi = 0, elements, maxelem;
+ u8 npos = 0;
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
@@ -889,7 +895,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ext_size(AHASH_INIT_SIZE, set->dsize);
goto copy_elem;
}
- for (i = 0; i < n->pos; i++) {
+ npos = smp_load_acquire(&n->pos);
+ for (i = 0; i < npos; i++) {
if (!test_bit(i, n->used)) {
/* Reuse first deleted entry */
if (j == -1) {
@@ -933,7 +940,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (elements >= maxelem)
goto set_full;
/* Create a new slot */
- if (n->pos >= n->size) {
+ if (npos >= n->size) {
#ifdef IP_SET_HASH_WITH_MULTI
if (h->bucketsize >= AHASH_MAX_TUNED)
goto set_full;
@@ -962,7 +969,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
copy_elem:
- j = n->pos++;
+ j = npos++;
data = ahash_data(n, j, set->dsize);
copy_data:
t->hregion[r].elements++;
@@ -985,6 +992,8 @@ overwrite_extensions:
if (SET_WITH_TIMEOUT(set))
ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
smp_mb__before_atomic();
+ /* Ensure all data writes are visible before updating position */
+ smp_store_release(&n->pos, npos);
set_bit(j, n->used);
if (old != ERR_PTR(-ENOENT)) {
rcu_assign_pointer(hbucket(t, key), n);
@@ -1043,6 +1052,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, j, k, r, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
size_t dsize = set->dsize;
+ u8 pos;
/* Userspace add and resize is excluded by the mutex.
* Kernespace add does not trigger resize.
@@ -1058,7 +1068,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
n = rcu_dereference_bh(hbucket(t, key));
if (!n)
goto out;
- for (i = 0, k = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0, k = 0; i < pos; i++) {
if (!test_bit(i, n->used)) {
k++;
continue;
@@ -1072,8 +1083,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ret = 0;
clear_bit(i, n->used);
smp_mb__after_atomic();
- if (i + 1 == n->pos)
- n->pos--;
+ if (i + 1 == pos)
+ smp_store_release(&n->pos, --pos);
t->hregion[r].elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
@@ -1094,11 +1105,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
x->flags = flags;
}
}
- for (; i < n->pos; i++) {
+ for (; i < pos; i++) {
if (!test_bit(i, n->used))
k++;
}
- if (k == n->pos) {
+ if (k == pos) {
t->hregion[r].ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
@@ -1109,7 +1120,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (!tmp)
goto out;
tmp->size = n->size - AHASH_INIT_SIZE;
- for (j = 0, k = 0; j < n->pos; j++) {
+ for (j = 0, k = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
@@ -1170,6 +1181,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
int ret, i, j = 0;
#endif
u32 key, multi = 0;
+ u8 pos;
pr_debug("test by nets\n");
for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
@@ -1187,7 +1199,8 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
n = rcu_dereference_bh(hbucket(t, key));
if (!n)
continue;
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
@@ -1221,6 +1234,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct mtype_elem *data;
int i, ret = 0;
u32 key, multi = 0;
+ u8 pos;
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
@@ -1243,7 +1257,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ret = 0;
goto out;
}
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
@@ -1360,6 +1375,7 @@ mtype_list(const struct ip_set *set,
/* We assume that one hash bucket fills into one page */
void *incomplete;
int i, ret = 0;
+ u8 pos;
atd = nla_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
@@ -1378,7 +1394,8 @@ mtype_list(const struct ip_set *set,
cb->args[IPSET_CB_ARG0], t, n);
if (!n)
continue;
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index a22ec1a6f6ec..e26ca2a370e3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -150,7 +150,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++, i++) {
+ for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
if (i > IPSET_MAX_RANGE) {
hash_ipmark4_data_next(&h->next, &e);
@@ -162,6 +162,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
ret = 0;
+
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index e977b5a9c48d..41ca24a22a02 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -186,7 +186,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to;) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++, i++) {
@@ -203,6 +203,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = 0;
}
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 39a01934b153..b9ac2efaa15c 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -182,7 +182,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to;) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++, i++) {
@@ -199,6 +199,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = 0;
}
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5c6de605a9fb..2d6652d43199 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -274,7 +274,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
p = port;
ip2 = ip2_from;
}
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to;) {
e.ip = htonl(ip);
for (; p <= port_to; p++) {
e.port = htons(p);
@@ -298,6 +298,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip2 = ip2_from;
}
p = port;
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c7c7f6a7a9f6..bd9cae44d214 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -327,18 +327,22 @@ ip_vs_use_count_dec(void)
/* Service hashing:
* Operation Locking order
* ---------------------------------------------------------------------------
- * add table service_mutex, svc_resize_sem(W)
- * del table service_mutex
- * move between tables svc_resize_sem(W), seqcount_t(W), bit lock
- * add/del service service_mutex, bit lock
+ * add first table service_mutex
+ * attach new table service_mutex
+ * add/del service service_mutex, RCU, bit lock
+ * move between tables (rehash) svc_resize_sem(W), seqcount_t(W), bit lock
+ * replace old with attached svc_resize_sem(W), svc_replace_sem(W)
* find service RCU, seqcount_t(R)
* walk services(blocking) service_mutex, svc_resize_sem(R)
* walk services(non-blocking) RCU, seqcount_t(R)
+ * walk services(non-blocking) svc_resize_sem(R), RCU, seqcount_t(R)
+ * walk services(non-blocking) svc_replace_sem(R), RCU, seqcount_t(R)
+ * del table service_mutex after stopped work
*
- * - new tables are linked/unlinked under service_mutex and svc_resize_sem
- * - new table is linked on resizing and all operations can run in parallel
- * in 2 tables until the new table is registered as current one
- * - two contexts can modify buckets: config and table resize, both in
+ * - new table is attached on resizing under service_mutex and all operations
+ * can run in parallel in 2 tables until the new table is registered as current
+ * one
+ * - two contexts can modify buckets: config and table resize (work), both in
* process context
* - only table resizer can move entries, so we do not protect t->seqc[]
* items with t->lock[]
@@ -346,9 +350,13 @@ ip_vs_use_count_dec(void)
* services are moved to new table
* - move operations may disturb readers: find operation will not miss entries
* but walkers may see same entry twice if they are forced to retry chains
- * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to hold
- * service_mutex to disallow new tables to be installed or to check
+ * or to walk the newly attached second table
+ * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to check
* svc_table_changes and repeat the RCU read section if new table is installed
+ * - walkers may serialize with the whole resizing process (svc_resize_sem)
+ * to prevent seeing same service twice or just with the svc_table
+ * replace (svc_replace_sem) when we can see entries twice but we
+ * prefer to run concurrently with the rehashing.
*/
/*
@@ -387,9 +395,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/* increase its refcnt because it is referenced by the svc table */
atomic_inc(&svc->refcnt);
+ /* We know if new table is attached under service_mutex but rely on
+ * RCU to hold the old table to be freed in resizer
+ */
+ rcu_read_lock();
+
+ /* This can be the old or the new table */
+ t = rcu_dereference(ipvs->svc_table);
+
/* New entries go into recent table */
- t = rcu_dereference_protected(ipvs->svc_table, 1);
- t = rcu_dereference_protected(t->new_tbl, 1);
+ t = rcu_dereference(t->new_tbl);
if (svc->fwmark == 0) {
/*
@@ -410,6 +425,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
hlist_bl_add_head_rcu(&svc->s_list, head);
hlist_bl_unlock(head);
+ rcu_read_unlock();
+
return 1;
}
@@ -432,7 +449,13 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
return 0;
}
- t = rcu_dereference_protected(ipvs->svc_table, 1);
+ /* We know if new table is attached under service_mutex but rely on
+ * RCU to hold the old table to be freed in resizer
+ */
+ rcu_read_lock();
+
+ /* This can be the old or the new table */
+ t = rcu_dereference(ipvs->svc_table);
hash_key = READ_ONCE(svc->hash_key);
/* We need to lock the bucket in the right table */
if (ip_vs_rht_same_table(t, hash_key)) {
@@ -443,13 +466,13 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
/* Moved to new table ? */
if (hash_key != hash_key2) {
hlist_bl_unlock(head);
- t = rcu_dereference_protected(t->new_tbl, 1);
+ t = rcu_dereference(t->new_tbl);
head = t->buckets + (hash_key2 & t->mask);
hlist_bl_lock(head);
}
} else {
/* It is already moved to new table */
- t = rcu_dereference_protected(t->new_tbl, 1);
+ t = rcu_dereference(t->new_tbl);
head = t->buckets + (hash_key & t->mask);
hlist_bl_lock(head);
}
@@ -459,6 +482,8 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
svc->flags &= ~IP_VS_SVC_F_HASHED;
atomic_dec(&svc->refcnt);
hlist_bl_unlock(head);
+
+ rcu_read_unlock();
return 1;
}
@@ -666,15 +691,14 @@ static void svc_resize_work_handler(struct work_struct *work)
goto unlock_sem;
more_work = false;
clear_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags);
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
+ if (!READ_ONCE(ipvs->enable))
goto unlock_m;
t = rcu_dereference_protected(ipvs->svc_table, 1);
/* Do nothing if table is removed */
if (!t)
goto unlock_m;
- /* New table needs to be registered? BUG! */
- if (t != rcu_dereference_protected(t->new_tbl, 1))
+ /* New table already attached? BUG! */
+ if (t != rcu_access_pointer(t->new_tbl))
goto unlock_m;
lfactor = sysctl_svc_lfactor(ipvs);
@@ -691,6 +715,7 @@ static void svc_resize_work_handler(struct work_struct *work)
/* Flip the table_id */
t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK;
+ /* Attach new table */
rcu_assign_pointer(t->new_tbl, t_new);
/* Allow add/del to new_tbl while moving from old table */
mutex_unlock(&ipvs->service_mutex);
@@ -698,8 +723,8 @@ static void svc_resize_work_handler(struct work_struct *work)
ip_vs_rht_for_each_bucket(t, bucket, head) {
same_bucket:
if (++limit >= 16) {
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE,
+ /* Check if work is stopped */
+ if (test_bit(IP_VS_WORK_SVC_NORESIZE,
&ipvs->work_flags))
goto unlock_sem;
if (resched_score >= 100) {
@@ -764,16 +789,12 @@ same_bucket:
goto same_bucket;
}
- /* Tables can be switched only under service_mutex */
- while (!mutex_trylock(&ipvs->service_mutex)) {
- cond_resched();
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
- goto unlock_sem;
- }
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
- goto unlock_m;
+ /* Serialize with readers that don't like svc_table changes */
+ down_write(&ipvs->svc_replace_sem);
+
+ /* Check if work is stopped to avoid synchronize_rcu() */
+ if (test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
+ goto unlock_repl;
rcu_assign_pointer(ipvs->svc_table, t_new);
/* Inform readers that new table is installed */
@@ -781,8 +802,8 @@ same_bucket:
atomic_inc(&ipvs->svc_table_changes);
t_free = t;
-unlock_m:
- mutex_unlock(&ipvs->service_mutex);
+unlock_repl:
+ up_write(&ipvs->svc_replace_sem);
unlock_sem:
up_write(&ipvs->svc_resize_sem);
@@ -801,6 +822,11 @@ out:
test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
return;
queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 1);
+ return;
+
+unlock_m:
+ mutex_unlock(&ipvs->service_mutex);
+ goto unlock_sem;
}
static inline void
@@ -1691,6 +1717,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_pe *pe = NULL;
int ret_hooks = -1;
int ret = 0;
+ bool grow;
/* increase the module use count */
if (!ip_vs_use_count_inc())
@@ -1732,16 +1759,25 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
}
#endif
- t = rcu_dereference_protected(ipvs->svc_table, 1);
+ /* The old table can be freed, protect it with RCU */
+ rcu_read_lock();
+ t = rcu_dereference(ipvs->svc_table);
if (!t) {
int lfactor = sysctl_svc_lfactor(ipvs);
int new_size = ip_vs_svc_desired_size(ipvs, NULL, lfactor);
+ rcu_read_unlock();
t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor);
if (!t_new) {
ret = -ENOMEM;
goto out_err;
}
+ grow = false;
+ } else {
+ /* Even the currently attached new table may need to grow */
+ t = rcu_dereference(t->new_tbl);
+ grow = ip_vs_get_num_services(ipvs) + 1 > t->u_thresh;
+ rcu_read_unlock();
}
if (!rcu_dereference_protected(ipvs->conn_tab, 1)) {
@@ -1800,6 +1836,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
goto out_err;
if (t_new) {
+ /* Add table for first time */
clear_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags);
rcu_assign_pointer(ipvs->svc_table, t_new);
t_new = NULL;
@@ -1831,8 +1868,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
ip_vs_svc_hash(svc);
/* Schedule resize work */
- if (t && ip_vs_get_num_services(ipvs) > t->u_thresh &&
- !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags))
+ if (grow && !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags))
queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work,
1);
@@ -2054,7 +2090,6 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
return -EEXIST;
ipvs = svc->ipvs;
ip_vs_unlink_service(svc, false);
- t = rcu_dereference_protected(ipvs->svc_table, 1);
/* Drop the table if no more services */
ns = ip_vs_get_num_services(ipvs);
@@ -2062,6 +2097,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/* Stop the resizer and drop the tables */
set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags);
cancel_delayed_work_sync(&ipvs->svc_resize_work);
+ t = rcu_dereference_protected(ipvs->svc_table, 1);
if (t) {
rcu_assign_pointer(ipvs->svc_table, NULL);
/* Inform readers that table is removed */
@@ -2075,11 +2111,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
t = p;
}
}
- } else if (ns <= t->l_thresh &&
- !test_and_set_bit(IP_VS_WORK_SVC_RESIZE,
- &ipvs->work_flags)) {
- queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work,
- 1);
+ } else {
+ bool shrink;
+
+ rcu_read_lock();
+ t = rcu_dereference(ipvs->svc_table);
+ /* Even the currently attached new table may need to shrink */
+ t = rcu_dereference(t->new_tbl);
+ shrink = ns <= t->l_thresh;
+ rcu_read_unlock();
+ if (shrink && !test_and_set_bit(IP_VS_WORK_SVC_RESIZE,
+ &ipvs->work_flags))
+ queue_delayed_work(system_unbound_wq,
+ &ipvs->svc_resize_work, 1);
}
return 0;
}
@@ -2184,17 +2228,21 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
struct ip_vs_service *svc;
struct hlist_bl_node *e;
struct ip_vs_dest *dest;
- int old_gen, new_gen;
+ int old_gen;
if (event != NETDEV_DOWN || !ipvs)
return NOTIFY_DONE;
IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
+ /* Allow concurrent rehashing on resize but to avoid loop
+ * serialize with installing the new table.
+ */
+ down_read(&ipvs->svc_replace_sem);
+
old_gen = atomic_read(&ipvs->svc_table_changes);
rcu_read_lock();
-repeat:
smp_rmb(); /* ipvs->svc_table and svc_table_changes */
ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) {
hlist_bl_for_each_entry_rcu(svc, e, head, s_list) {
@@ -2207,17 +2255,17 @@ repeat:
}
resched_score++;
if (resched_score >= 100) {
- resched_score = 0;
cond_resched_rcu();
- new_gen = atomic_read(&ipvs->svc_table_changes);
- /* New table installed ? */
- if (old_gen != new_gen) {
- old_gen = new_gen;
- goto repeat;
- }
+ /* Flushed? So no more dev refs */
+ if (atomic_read(&ipvs->svc_table_changes) != old_gen)
+ goto done;
+ resched_score = 0;
}
}
+
+done:
rcu_read_unlock();
+ up_read(&ipvs->svc_replace_sem);
return NOTIFY_DONE;
}
@@ -2244,6 +2292,10 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
struct ip_vs_service *svc;
struct hlist_bl_node *e;
+ /* svc_table can not be replaced (svc_replace_sem) or
+ * removed (service_mutex)
+ */
+ down_read(&ipvs->svc_replace_sem);
rcu_read_lock();
ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) {
@@ -2259,6 +2311,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
}
rcu_read_unlock();
+ up_read(&ipvs->svc_replace_sem);
ip_vs_zero_stats(&ipvs->tot_stats->s);
return 0;
@@ -3062,6 +3115,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
u32 sum;
int i;
+ /* Info for conns */
rcu_read_lock();
t = rcu_dereference(ipvs->conn_tab);
@@ -3123,6 +3177,12 @@ repeat_conn:
}
after_conns:
+ rcu_read_unlock();
+
+ /* Info for services */
+ down_read(&ipvs->svc_replace_sem);
+ rcu_read_lock();
+
t = rcu_dereference(ipvs->svc_table);
count = ip_vs_get_num_services(ipvs);
@@ -3133,9 +3193,7 @@ after_conns:
if (!count)
goto after_svc;
old_gen = atomic_read(&ipvs->svc_table_changes);
- loops = 0;
-repeat_svc:
smp_rmb(); /* ipvs->svc_table and svc_table_changes */
memset(counts, 0, sizeof(counts));
ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, pt) {
@@ -3157,15 +3215,10 @@ repeat_svc:
if (resched_score >= 100) {
resched_score = 0;
cond_resched_rcu();
- new_gen = atomic_read(&ipvs->svc_table_changes);
- /* New table installed ? */
- if (old_gen != new_gen) {
- /* Too many changes? */
- if (++loops >= 5)
- goto after_svc;
- old_gen = new_gen;
- goto repeat_svc;
- }
+ /* Flushed? */
+ if (atomic_read(&ipvs->svc_table_changes) !=
+ old_gen)
+ goto after_svc;
}
counts[count]++;
}
@@ -3184,6 +3237,9 @@ repeat_svc:
}
after_svc:
+ rcu_read_unlock();
+ up_read(&ipvs->svc_replace_sem);
+
seq_printf(seq, "Stats thread slots:\t%d (max %lu)\n",
ipvs->est_kt_count, ipvs->est_max_threads);
seq_printf(seq, "Stats chain max len:\t%d\n", ipvs->est_chain_max);
@@ -3191,7 +3247,6 @@ after_svc:
ipvs->est_chain_max * IPVS_EST_CHAIN_FACTOR *
IPVS_EST_NTICKS);
- rcu_read_unlock();
return 0;
}
@@ -3503,7 +3558,7 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
int ret = 0;
lockdep_assert_held(&ipvs->svc_resize_sem);
- /* All service modifications are disabled, go ahead */
+ /* All svc_table modifications are disabled, go ahead */
ip_vs_rht_walk_buckets(ipvs->svc_table, head) {
hlist_bl_for_each_entry(svc, e, head, s_list) {
/* Only expose IPv4 entries to old interface */
@@ -3687,7 +3742,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
pr_err("length: %u != %zu\n", *len, size);
return -EINVAL;
}
- /* Protect against table resizer moving the entries.
+ /* Prevent modifications to the list with services.
* Try reverse locking, so that we do not hold the mutex
* while waiting for semaphore.
*/
@@ -4029,6 +4084,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
int start = cb->args[0];
int idx = 0;
+ /* Make sure we do not see same service twice during resize */
down_read(&ipvs->svc_resize_sem);
rcu_read_lock();
ip_vs_rht_walk_buckets_safe_rcu(ipvs->svc_table, head) {
@@ -5072,6 +5128,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
/* Initialize service_mutex, svc_table per netns */
__mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key);
init_rwsem(&ipvs->svc_resize_sem);
+ init_rwsem(&ipvs->svc_replace_sem);
INIT_DELAYED_WORK(&ipvs->svc_resize_work, svc_resize_work_handler);
atomic_set(&ipvs->svc_table_changes, 0);
RCU_INIT_POINTER(ipvs->svc_table, NULL);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4f39bf7c843f..75e53fde6b29 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -72,6 +72,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, NULL);
write_pnet(&exp->net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
exp->zone = ct->zone;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b08189226320..8ba5b22a1eef 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1811,14 +1811,17 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
+ struct nf_conntrack_helper *assign_helper;
+
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
ct->master = exp->master;
- if (exp->helper) {
+ assign_helper = rcu_dereference(exp->assign_helper);
+ if (assign_helper) {
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
- rcu_assign_pointer(help->helper, exp->helper);
+ rcu_assign_pointer(help->helper, assign_helper);
}
#ifdef CONFIG_NF_CONNTRACK_MARK
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 24d0576d84b7..8e943efbdf0a 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -344,6 +344,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
helper = rcu_dereference(help->helper);
rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, NULL);
write_pnet(&exp->net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
exp->zone = ct->zone;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 3f5c50455b71..b2fe6554b9cf 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245);
+ rcu_assign_pointer(exp->assign_helper, &nf_conntrack_helper_h245);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3 : NULL,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
nathook = rcu_dereference(nfct_h323_nat_hook);
@@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_UDP, NULL, &port);
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_ras);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
@@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a715304a53d8..17e971bd4c74 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -321,8 +321,8 @@ __printf(3, 4)
void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
const char *fmt, ...)
{
+ const char *helper_name = "(null)";
const struct nf_conn_help *help;
- const struct nf_conntrack_helper *helper;
struct va_format vaf;
va_list args;
@@ -331,14 +331,17 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
vaf.fmt = fmt;
vaf.va = &args;
- /* Called from the helper function, this call never fails */
help = nfct_help(ct);
+ if (help) {
+ const struct nf_conntrack_helper *helper;
- /* rcu_read_lock()ed by nf_hook_thresh */
- helper = rcu_dereference(help->helper);
+ helper = rcu_dereference(help->helper);
+ if (helper)
+ helper_name = helper->name;
+ }
nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
- "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
+ "helper %s dropping packet: %pV ", helper_name, &vaf);
va_end(args);
}
@@ -400,6 +403,11 @@ static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
this = rcu_dereference_protected(exp->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
+ if (this == me)
+ return true;
+
+ this = rcu_dereference_protected(exp->assign_helper,
+ lockdep_is_held(&nf_conntrack_expect_lock));
return this == me;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eda5fe4a75c8..befa7e83ee49 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2634,6 +2634,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
+ const struct nf_conntrack_helper *assign_helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask);
@@ -2860,6 +2861,7 @@ static int
ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
u32 portid, u32 report)
{
+ struct nf_conntrack_helper *assign_helper = NULL;
struct nlattr *cda[CTA_EXPECT_MAX+1];
struct nf_conntrack_tuple tuple, mask;
struct nf_conntrack_expect *exp;
@@ -2870,13 +2872,26 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
if (err < 0)
return err;
+ if (!cda[CTA_EXPECT_TUPLE] || !cda[CTA_EXPECT_MASK])
+ return -EINVAL;
+
err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
ct, &tuple, &mask);
if (err < 0)
return err;
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ assign_helper = __nf_conntrack_helper_find(helpname,
+ nf_ct_l3num(ct),
+ tuple.dst.protonum);
+ if (!assign_helper)
+ return -EOPNOTSUPP;
+ }
+
exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
- &tuple, &mask);
+ assign_helper, &tuple, &mask);
if (IS_ERR(exp))
return PTR_ERR(exp);
@@ -3515,6 +3530,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
+ const struct nf_conntrack_helper *assign_helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
@@ -3568,6 +3584,7 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
exp->zone = ct->zone;
#endif
rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, assign_helper);
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
@@ -3623,7 +3640,7 @@ ctnetlink_create_expect(struct net *net,
ct = nf_ct_tuplehash_to_ctrack(h);
rcu_read_lock();
- exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask);
+ exp = ctnetlink_alloc_expect(cda, ct, NULL, &tuple, &mask);
if (IS_ERR(exp)) {
err = PTR_ERR(exp);
goto err_rcu;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1eb55907d470..e69941f1a101 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1366,6 +1366,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
goto store_cseq;
}
+ helper = rcu_dereference(nfct_help(ct)->helper);
+ if (!helper)
+ return NF_DROP;
+
exp = nf_ct_expect_alloc(ct);
if (!exp) {
nf_ct_helper_log(skb, ct, "cannot alloc expectation");
@@ -1376,14 +1380,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
if (sip_direct_signalling)
saddr = &ct->tuplehash[!dir].tuple.src.u3;
- helper = rcu_dereference(nfct_help(ct)->helper);
- if (!helper)
- return NF_DROP;
-
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
- rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, helper);
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
hooks = rcu_dereference(nf_nat_sip_hooks);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a6c81c04b3a5..57b450024a99 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -61,6 +61,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
struct nf_hook_state *state = &entry->state;
/* Release those devices we held, or Alexey will kill me. */
+ dev_put(entry->skb_dev);
dev_put(state->in);
dev_put(state->out);
if (state->sk)
@@ -102,6 +103,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
return false;
+ dev_hold(entry->skb_dev);
dev_hold(state->in);
dev_hold(state->out);
@@ -202,11 +204,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
*entry = (struct nf_queue_entry) {
.skb = skb,
+ .skb_dev = skb->dev,
.state = *state,
.hook_index = index,
.size = sizeof(*entry) + route_key_size,
};
-
__nf_queue_entry_init_physdevs(entry);
if (!nf_queue_entry_get_refs(entry)) {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 58304fd1f70f..984a0eb9e149 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1212,6 +1212,8 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
if (physinif == ifindex || physoutif == ifindex)
return 1;
#endif
+ if (entry->skb_dev && entry->skb_dev->ifindex == ifindex)
+ return 1;
if (entry->state.in)
if (entry->state.in->ifindex == ifindex)
return 1;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 60ee8d932fcb..fa2cc556331c 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1334,6 +1334,8 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
+
+ nf_ct_expect_put(exp);
}
static const struct nla_policy nft_ct_expect_policy[NFTA_CT_EXPECT_MAX + 1] = {
diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c
index 03ffb1159fc1..d14ca157910b 100644
--- a/net/netfilter/nft_inner.c
+++ b/net/netfilter/nft_inner.c
@@ -163,7 +163,6 @@ static int nft_inner_parse_l2l3(const struct nft_inner *priv,
return -1;
if (fragoff == 0) {
- thoff = nhoff + sizeof(_ip6h);
ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
ctx->inner_thoff = thoff;
ctx->l4proto = l4proto;
@@ -247,8 +246,8 @@ static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
- local_bh_enable();
local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
+ local_bh_enable();
return false;
}
*tun_ctx = *this_cpu_tun_ctx;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2c67c2e6b132..4e6708c23922 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -55,6 +55,9 @@ static struct list_head xt_templates[NFPROTO_NUMPROTO];
struct xt_pernet {
struct list_head tables[NFPROTO_NUMPROTO];
+
+ /* stash area used during netns exit */
+ struct list_head dead_tables[NFPROTO_NUMPROTO];
};
struct compat_delta {
@@ -1472,11 +1475,9 @@ struct xt_counters *xt_counters_alloc(unsigned int counters)
}
EXPORT_SYMBOL(xt_counters_alloc);
-struct xt_table_info *
-xt_replace_table(struct xt_table *table,
- unsigned int num_counters,
- struct xt_table_info *newinfo,
- int *error)
+static struct xt_table_info *
+do_replace_table(struct xt_table *table, unsigned int num_counters,
+ struct xt_table_info *newinfo, int *error)
{
struct xt_table_info *private;
unsigned int cpu;
@@ -1531,30 +1532,54 @@ xt_replace_table(struct xt_table *table,
}
}
- audit_log_nfcfg(table->name, table->af, private->number,
- !private->number ? AUDIT_XT_OP_REGISTER :
- AUDIT_XT_OP_REPLACE,
- GFP_KERNEL);
+ return private;
+}
+
+struct xt_table_info *
+xt_replace_table(struct xt_table *table, unsigned int num_counters,
+ struct xt_table_info *newinfo,
+ int *error)
+{
+ struct xt_table_info *private;
+
+ private = do_replace_table(table, num_counters, newinfo, error);
+ if (private)
+ audit_log_nfcfg(table->name, table->af, private->number,
+ AUDIT_XT_OP_REPLACE,
+ GFP_KERNEL);
+
return private;
}
EXPORT_SYMBOL_GPL(xt_replace_table);
struct xt_table *xt_register_table(struct net *net,
const struct xt_table *input_table,
+ const struct nf_hook_ops *template_ops,
struct xt_table_info *bootstrap,
struct xt_table_info *newinfo)
{
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+ struct xt_table *t, *table = NULL;
+ struct nf_hook_ops *ops = NULL;
struct xt_table_info *private;
- struct xt_table *t, *table;
- int ret;
+ unsigned int num_ops;
+ int ret = -EINVAL;
+
+ num_ops = hweight32(input_table->valid_hooks);
+ if (num_ops == 0)
+ goto out;
+
+ ret = -ENOMEM;
+ if (template_ops) {
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ goto out;
+ }
/* Don't add one object to multiple lists. */
table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
- if (!table) {
- ret = -ENOMEM;
+ if (!table)
goto out;
- }
mutex_lock(&xt[table->af].mutex);
/* Don't autoload: we'd eat our tail... */
@@ -1568,7 +1593,7 @@ struct xt_table *xt_register_table(struct net *net,
/* Simplifies replace_table code. */
table->private = bootstrap;
- if (!xt_replace_table(table, 0, newinfo, &ret))
+ if (!do_replace_table(table, 0, newinfo, &ret))
goto unlock;
private = table->private;
@@ -1577,34 +1602,122 @@ struct xt_table *xt_register_table(struct net *net,
/* save number of initial entries */
private->initial_entries = private->number;
+ if (ops) {
+ int i;
+
+ for (i = 0; i < num_ops; i++)
+ ops[i].priv = table;
+
+ ret = nf_register_net_hooks(net, ops, num_ops);
+ if (ret != 0) {
+ mutex_unlock(&xt[table->af].mutex);
+ /* nf_register_net_hooks() might have published a
+ * base chain before internal error unwind.
+ */
+ synchronize_rcu();
+ goto out;
+ }
+
+ table->ops = ops;
+ }
+
+ audit_log_nfcfg(table->name, table->af, private->number,
+ AUDIT_XT_OP_REGISTER, GFP_KERNEL);
+
list_add(&table->list, &xt_net->tables[table->af]);
mutex_unlock(&xt[table->af].mutex);
return table;
unlock:
mutex_unlock(&xt[table->af].mutex);
- kfree(table);
out:
+ kfree(table);
+ kfree(ops);
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(xt_register_table);
-void *xt_unregister_table(struct xt_table *table)
+/**
+ * xt_unregister_table_pre_exit - pre-shutdown unregister of a table
+ * @net: network namespace
+ * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6)
+ * @name: name of the table to unregister
+ *
+ * Unregisters the specified netfilter table from the given network namespace
+ * and also unregisters the hooks from netfilter core: no new packets will be
+ * processed.
+ *
+ * This must be called prior to xt_unregister_table_exit() from the pernet
+ * .pre_exit callback. After this call, the table is no longer visible to
+ * the get/setsockopt path. In case of rmmod, module exit path must have
+ * called xt_unregister_template() prior to unregistering pernet ops to
+ * prevent re-instantiation of the table.
+ *
+ * See also: xt_unregister_table_exit()
+ */
+void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name)
{
- struct xt_table_info *private;
+ struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+ struct xt_table *t;
- mutex_lock(&xt[table->af].mutex);
- private = table->private;
- list_del(&table->list);
- mutex_unlock(&xt[table->af].mutex);
- audit_log_nfcfg(table->name, table->af, private->number,
- AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
- kfree(table->ops);
- kfree(table);
+ mutex_lock(&xt[af].mutex);
+ list_for_each_entry(t, &xt_net->tables[af], list) {
+ if (strcmp(t->name, name) == 0) {
+ list_move(&t->list, &xt_net->dead_tables[af]);
+ mutex_unlock(&xt[af].mutex);
- return private;
+ if (t->ops) /* nat table registers with nat core, t->ops is NULL. */
+ nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks));
+ return;
+ }
+ }
+ mutex_unlock(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_table_pre_exit);
+
+/**
+ * xt_unregister_table_exit - remove a table during namespace teardown
+ * @net: the network namespace from which to unregister the table
+ * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6)
+ * @name: name of the table to unregister
+ *
+ * Completes the unregister process for a table. This must be called from
+ * the pernet ops .exit callback. This is the second stage after
+ * xt_unregister_table_pre_exit().
+ *
+ * pair with xt_unregister_table_pre_exit() during namespace shutdown.
+ *
+ * Return: the unregistered table or NULL if the table was never
+ * instantiated. The caller needs to kfree() the table after it
+ * has removed the family specific matches/targets.
+ */
+struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name)
+{
+ struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+ struct xt_table *table;
+
+ mutex_lock(&xt[af].mutex);
+ list_for_each_entry(table, &xt_net->dead_tables[af], list) {
+ struct nf_hook_ops *ops = NULL;
+
+ if (strcmp(table->name, name) != 0)
+ continue;
+
+ list_del(&table->list);
+
+ audit_log_nfcfg(table->name, table->af, table->private->number,
+ AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
+ swap(table->ops, ops);
+ mutex_unlock(&xt[af].mutex);
+
+ kfree(ops);
+ return table;
+ }
+ mutex_unlock(&xt[af].mutex);
+
+ return NULL;
}
-EXPORT_SYMBOL_GPL(xt_unregister_table);
+EXPORT_SYMBOL_GPL(xt_unregister_table_exit);
#endif
#ifdef CONFIG_PROC_FS
@@ -2051,8 +2164,10 @@ static int __net_init xt_net_init(struct net *net)
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
int i;
- for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
INIT_LIST_HEAD(&xt_net->tables[i]);
+ INIT_LIST_HEAD(&xt_net->dead_tables[i]);
+ }
return 0;
}
@@ -2061,8 +2176,10 @@ static void __net_exit xt_net_exit(struct net *net)
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
int i;
- for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
WARN_ON_ONCE(!list_empty(&xt_net->tables[i]));
+ WARN_ON_ONCE(!list_empty(&xt_net->dead_tables[i]));
+ }
}
static struct pernet_operations xt_net_ops = {
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d251d894afd4..0da39eaed255 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1972,8 +1972,10 @@ int genlmsg_multicast_allns(const struct genl_family *family,
struct sk_buff *skb, u32 portid,
unsigned int group)
{
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ if (WARN_ON_ONCE(group >= family->n_mcgrps)) {
+ kfree_skb(skb);
return -EINVAL;
+ }
group = family->mcgrp_offset + group;
return genlmsg_mcast(skb, portid, group);
@@ -1986,8 +1988,10 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb,
struct net *net = genl_info_net(info);
struct sock *sk = net->genl_sock;
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ if (WARN_ON_ONCE(group >= family->n_mcgrps)) {
+ kfree_skb(skb);
return;
+ }
group = family->mcgrp_offset + group;
nlmsg_notify(sk, skb, info->snd_portid, group,
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index c42642075685..e7e8490a53d8 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -83,6 +83,14 @@ struct vport *ovs_netdev_link(struct vport *vport, bool tunnel)
}
rtnl_lock();
+ /* Do not link devices that are not registered to avoid a potential
+ * race with the NETDEV_UNREGISTER notification in dp_device_event().
+ */
+ if (vport->dev->reg_state != NETREG_REGISTERED) {
+ err = -ENODEV;
+ goto error_put_unlock;
+ }
+
err = netdev_master_upper_dev_link(vport->dev,
get_dpdev(vport->dp),
NULL, NULL, NULL);
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 4dbf0914df7d..706927139393 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -671,8 +671,23 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
/* Look for an existing pipe handle */
sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
- if (sknode)
- return sk_receive_skb(sknode, skb, 1);
+ if (sknode) {
+ int rc;
+
+ /* pep_do_rcv() runs from two contexts: from softirq via
+ * phonet_rcv() -> __sk_receive_skb() with BH disabled,
+ * and from process context via
+ * release_sock() -> __release_sock(), which drops
+ * the listener slock with spin_unlock_bh() before draining
+ * the backlog. The child pipe slock is taken below via
+ * bh_lock_sock_nested(), which does not itself disable BH, so
+ * disable BH here to keep both acquire contexts consistent.
+ */
+ local_bh_disable();
+ rc = sk_receive_skb(sknode, skb, 1);
+ local_bh_enable();
+ return rc;
+ }
switch (hdr->message_id) {
case PNS_PEP_CONNECT_REQ:
diff --git a/net/rds/message.c b/net/rds/message.c
index 25fedcb3cd00..7feb0eb6537d 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -448,6 +448,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
for (i = 0; i < rm->data.op_nents; i++)
put_page(sg_page(&rm->data.op_sg[i]));
+ rm->data.op_nents = 0;
mmp = &rm->data.op_mmp_znotifier->z_mmp;
mm_unaccount_pinned_pages(mmp);
ret = -EFAULT;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 654e23d13e3d..5830b31a1f37 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -198,8 +198,13 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
write_lock_bh(&sock->sk->sk_callback_lock);
- /* done under the callback_lock to serialize with write_space */
+ /* done under the callback_lock to serialize with write_space.
+ * Set t_sock inside rds_tcp_tc_list_lock so readers walking
+ * rds_tcp_tc_list under the same lock cannot observe an
+ * entry whose t_sock is NULL.
+ */
spin_lock(&rds_tcp_tc_list_lock);
+ tc->t_sock = sock;
list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
#if IS_ENABLED(CONFIG_IPV6)
rds6_tcp_tc_count++;
@@ -211,8 +216,6 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
/* accepted sockets need our listen data ready undone */
if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
sock->sk->sk_data_ready = sock->sk->sk_user_data;
-
- tc->t_sock = sock;
if (!tc->t_rtn)
tc->t_rtn = net_generic(sock_net(sock->sk), rds_tcp_netid);
tc->t_cpath = cp;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 27c2aa2dd023..98f2165159d7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -213,8 +213,6 @@ struct rxrpc_skb_priv {
struct {
u16 offset; /* Offset of data */
u16 len; /* Length of data */
- u8 flags;
-#define RXRPC_RX_VERIFIED 0x01
};
struct {
rxrpc_seq_t first_ack; /* First packet in acks table */
@@ -309,15 +307,16 @@ struct rxrpc_security {
struct sk_buff *challenge);
/* verify a response */
- int (*verify_response)(struct rxrpc_connection *,
- struct sk_buff *);
+ int (*verify_response)(struct rxrpc_connection *conn,
+ struct sk_buff *response_skb,
+ void *response, unsigned int len);
/* clear connection security */
void (*clear)(struct rxrpc_connection *);
/* Default ticket -> key decoder */
int (*default_decode_ticket)(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int ticket_offset, unsigned int ticket_len,
+ void *ticket, unsigned int ticket_len,
struct key **_key);
};
@@ -774,6 +773,11 @@ struct rxrpc_call {
struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */
struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */
struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */
+ void *rx_dec_buffer; /* Decryption buffer */
+ unsigned short rx_dec_bsize; /* rx_dec_buffer size */
+ unsigned short rx_dec_offset; /* Decrypted packet data offset */
+ unsigned short rx_dec_len; /* Decrypted packet data len */
+ rxrpc_seq_t rx_dec_seq; /* Packet in decryption buffer */
rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */
rxrpc_seq_t rx_consumed; /* Highest packet consumed */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index fdd683261226..fec59d9338b9 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -332,25 +332,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK;
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- sp->hdr.securityIndex != 0 &&
- skb_cloned(skb)) {
- /* Unshare the packet so that it can be
- * modified by in-place decryption.
- */
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
-
- if (nskb) {
- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared);
- rxrpc_input_call_packet(call, nskb);
- rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx);
- } else {
- /* OOM - Drop the packet. */
- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem);
- }
- } else {
- rxrpc_input_call_packet(call, skb);
- }
+ rxrpc_input_call_packet(call, skb);
rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
did_receive = true;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f035f486c139..fcb9d38bb521 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -152,6 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
spin_lock_init(&call->notify_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
+ call->rx_pkt_offset = USHRT_MAX;
call->tx_total_len = -1;
call->tx_jumbo_max = 1;
call->next_rx_timo = 20 * HZ;
@@ -553,6 +554,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call)
rxrpc_purge_queue(&call->recvmsg_queue);
rxrpc_purge_queue(&call->rx_queue);
rxrpc_purge_queue(&call->rx_oos_queue);
+ kfree(call->rx_dec_buffer);
}
/*
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index a2130d25aaa9..c96ca615b787 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -243,27 +243,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
static int rxrpc_verify_response(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
+ unsigned int len = skb->len - sizeof(struct rxrpc_wire_header);
+ void *buffer;
int ret;
- if (skb_cloned(skb)) {
- /* Copy the packet if shared so that we can do in-place
- * decryption.
- */
- struct sk_buff *nskb = skb_copy(skb, GFP_NOFS);
+ buffer = kmalloc(len, GFP_NOFS);
+ if (!buffer)
+ return -ENOMEM;
- if (nskb) {
- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared);
- ret = conn->security->verify_response(conn, nskb);
- rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy);
- } else {
- /* OOM - Drop the packet. */
- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem);
- ret = -ENOMEM;
- }
- } else {
- ret = conn->security->verify_response(conn, skb);
- }
+ ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), buffer, len);
+ if (ret < 0)
+ goto out;
+
+ ret = conn->security->verify_response(conn, skb, buffer, len);
+out:
+ kfree(buffer);
return ret;
}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 0a260df45d25..0b39046bdc61 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -32,9 +32,6 @@ static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- sp->flags |= RXRPC_RX_VERIFIED;
return 0;
}
@@ -57,9 +54,10 @@ static int none_sendmsg_respond_to_challenge(struct sk_buff *challenge,
}
static int none_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *response_skb,
+ void *response, unsigned int len)
{
- return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ return rxrpc_abort_conn(conn, response_skb, RX_PROTOCOL_ERROR, -EPROTO,
rxrpc_eproto_rxnull_response);
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index e1f7513a46db..c940600117a4 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -147,15 +147,52 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
}
/*
- * Decrypt and verify a DATA packet.
+ * Decrypt and verify a DATA packet. The content of the packet is pulled out
+ * into a flat buffer rather than decrypting in place in the skbuff. This also
+ * has the advantage of aligning the buffer correctly for the crypto routines.
+ *
+ * We keep track of the sequence number of the packet currently decrypted into
+ * the buffer in ->rx_dec_seq. If MSG_PEEK is used and steps onto a new
+ * packet, subsequent recvmsg() calls will have to go back and re-decrypt the
+ * current packet.
*/
static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ int ret;
- if (sp->flags & RXRPC_RX_VERIFIED)
- return 0;
- return call->security->verify_packet(call, skb);
+ if (sp->len > call->rx_dec_bsize) {
+ /* Make sure we can hold a 1412-byte jumbo subpacket and make
+ * sure that the buffer size is aligned to a crypto blocksize.
+ */
+ size_t size = clamp(round_up(sp->len, 32), 2048, 65535);
+ void *buffer = krealloc(call->rx_dec_buffer, size, GFP_NOFS);
+
+ if (!buffer)
+ return -ENOMEM;
+ call->rx_dec_buffer = buffer;
+ call->rx_dec_bsize = size;
+ }
+
+ ret = -EFAULT;
+ if (skb_copy_bits(skb, sp->offset, call->rx_dec_buffer, sp->len) < 0)
+ goto err;
+
+ call->rx_dec_offset = 0;
+ call->rx_dec_len = sp->len;
+ call->rx_dec_seq = sp->hdr.seq;
+ ret = call->security->verify_packet(call, skb);
+ if (ret < 0)
+ goto err;
+ return 0;
+
+err:
+ kfree(call->rx_dec_buffer);
+ call->rx_dec_buffer = NULL;
+ call->rx_dec_bsize = 0;
+ call->rx_dec_offset = 0;
+ call->rx_dec_len = 0;
+ return ret;
}
/*
@@ -283,16 +320,21 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (msg)
sock_recv_timestamp(msg, sock->sk, skb);
- if (rx_pkt_offset == 0) {
+ if (call->rx_dec_seq != sp->hdr.seq ||
+ !call->rx_dec_buffer) {
ret2 = rxrpc_verify_data(call, skb);
trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
- sp->offset, sp->len, ret2);
+ call->rx_dec_offset,
+ call->rx_dec_len, ret2);
if (ret2 < 0) {
ret = ret2;
goto out;
}
- rx_pkt_offset = sp->offset;
- rx_pkt_len = sp->len;
+ }
+
+ if (rx_pkt_offset == USHRT_MAX) {
+ rx_pkt_offset = call->rx_dec_offset;
+ rx_pkt_len = call->rx_dec_len;
} else {
trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
rx_pkt_offset, rx_pkt_len, 0);
@@ -304,10 +346,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (copy > remain)
copy = remain;
if (copy > 0) {
- ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter,
- copy);
- if (ret2 < 0) {
- ret = ret2;
+ ret2 = copy_to_iter(call->rx_dec_buffer + rx_pkt_offset,
+ copy, iter);
+ if (ret2 != copy) {
+ ret = -EFAULT;
goto out;
}
@@ -328,7 +370,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
/* The whole packet has been transferred. */
if (sp->hdr.flags & RXRPC_LAST_PACKET)
ret = 1;
- rx_pkt_offset = 0;
+ rx_pkt_offset = USHRT_MAX;
rx_pkt_len = 0;
skb = skb_peek_next(skb, &call->recvmsg_queue);
diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c
index 0d5e654da918..a1ee102abae1 100644
--- a/net/rxrpc/rxgk.c
+++ b/net/rxrpc/rxgk.c
@@ -473,15 +473,20 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxgk_header *hdr;
struct krb5_buffer metadata;
- unsigned int offset = sp->offset, len = sp->len;
+ unsigned int len = call->rx_dec_len;
size_t data_offset = 0, data_len = len;
+ void *data = call->rx_dec_buffer, *p = data;
u32 ac = 0;
int ret = -ENOMEM;
_enter("");
- crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE,
- &data_offset, &data_len);
+ if (crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE,
+ &data_offset, &data_len) < 0) {
+ ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
+ rxgk_abort_1_short_header);
+ goto put_gk;
+ }
hdr = kzalloc_obj(*hdr, GFP_NOFS);
if (!hdr)
@@ -496,16 +501,15 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call,
metadata.len = sizeof(*hdr);
metadata.data = hdr;
- ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata,
- skb, &offset, &len, &ac);
+ ret = rxgk_verify_mic(gk->krb5, gk->rx_Kc, &metadata, &p, &len, &ac);
kfree(hdr);
if (ret < 0) {
if (ret != -ENOMEM)
rxrpc_abort_eproto(call, skb, ac,
rxgk_abort_1_verify_mic_eproto);
} else {
- sp->offset = offset;
- sp->len = len;
+ call->rx_dec_offset = p - data;
+ call->rx_dec_len = len;
}
put_gk:
@@ -522,49 +526,53 @@ static int rxgk_verify_packet_encrypted(struct rxrpc_call *call,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxgk_header hdr;
- unsigned int offset = sp->offset, len = sp->len;
+ struct rxgk_header *hdr;
+ unsigned int offset = 0, len = call->rx_dec_len;
+ void *data = call->rx_dec_buffer, *p = data;
int ret;
u32 ac = 0;
_enter("");
- ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac);
+ if (crypto_krb5_check_data_len(gk->krb5, KRB5_ENCRYPT_MODE,
+ len, sizeof(*hdr)) < 0) {
+ ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
+ rxgk_abort_2_short_header);
+ goto error;
+ }
+
+ ret = rxgk_decrypt(gk->krb5, gk->rx_enc, &p, &len, &ac);
if (ret < 0) {
if (ret != -ENOMEM)
rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto);
goto error;
}
+ offset = p - data;
- if (len < sizeof(hdr)) {
+ if (len < sizeof(*hdr)) {
ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
rxgk_abort_2_short_header);
goto error;
}
/* Extract the header from the skb */
- ret = skb_copy_bits(skb, offset, &hdr, sizeof(hdr));
- if (ret < 0) {
- ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
- rxgk_abort_2_short_encdata);
- goto error;
- }
- offset += sizeof(hdr);
- len -= sizeof(hdr);
-
- if (ntohl(hdr.epoch) != call->conn->proto.epoch ||
- ntohl(hdr.cid) != call->cid ||
- ntohl(hdr.call_number) != call->call_id ||
- ntohl(hdr.seq) != sp->hdr.seq ||
- ntohl(hdr.sec_index) != call->security_ix ||
- ntohl(hdr.data_len) > len) {
+ hdr = data + offset;
+ offset += sizeof(*hdr);
+ len -= sizeof(*hdr);
+
+ if (ntohl(hdr->epoch) != call->conn->proto.epoch ||
+ ntohl(hdr->cid) != call->cid ||
+ ntohl(hdr->call_number) != call->call_id ||
+ ntohl(hdr->seq) != sp->hdr.seq ||
+ ntohl(hdr->sec_index) != call->security_ix ||
+ ntohl(hdr->data_len) > len) {
ret = rxrpc_abort_eproto(call, skb, RXGK_SEALEDINCON,
rxgk_abort_2_short_data);
goto error;
}
- sp->offset = offset;
- sp->len = ntohl(hdr.data_len);
+ call->rx_dec_offset = offset;
+ call->rx_dec_len = ntohl(hdr->data_len);
ret = 0;
error:
rxgk_put(gk);
@@ -1076,11 +1084,12 @@ static int rxgk_sendmsg_respond_to_challenge(struct sk_buff *challenge,
* unsigned int call_numbers<>;
* };
*/
-static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn,
- const struct krb5_enctype *krb5,
- struct sk_buff *skb,
- __be32 *p, __be32 *end)
+static int rxgk_verify_authenticator(struct rxrpc_connection *conn,
+ const struct krb5_enctype *krb5,
+ struct sk_buff *skb,
+ void *auth, unsigned int auth_len)
{
+ __be32 *p = auth, *end = auth + auth_len;
u32 app_len, call_count, level, epoch, cid, i;
_enter("");
@@ -1144,37 +1153,6 @@ static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn,
}
/*
- * Extract the authenticator and verify it.
- */
-static int rxgk_verify_authenticator(struct rxrpc_connection *conn,
- const struct krb5_enctype *krb5,
- struct sk_buff *skb,
- unsigned int auth_offset, unsigned int auth_len)
-{
- void *auth;
- __be32 *p;
- int ret;
-
- auth = kmalloc(auth_len, GFP_NOFS);
- if (!auth)
- return -ENOMEM;
-
- ret = skb_copy_bits(skb, auth_offset, auth, auth_len);
- if (ret < 0) {
- ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
- rxgk_abort_resp_short_auth);
- goto error;
- }
-
- p = auth;
- ret = rxgk_do_verify_authenticator(conn, krb5, skb, p,
- p + auth_len / sizeof(*p));
-error:
- kfree(auth);
- return ret;
-}
-
-/*
* Verify a response.
*
* struct RXGK_Response {
@@ -1184,49 +1162,45 @@ error:
* };
*/
static int rxgk_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ void *buffer, unsigned int len)
{
const struct krb5_enctype *krb5;
struct rxrpc_key_token *token;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxgk_response rhdr;
+ struct rxgk_response *rhdr;
struct rxgk_context *gk;
struct key *key = NULL;
- unsigned int offset = sizeof(struct rxrpc_wire_header);
- unsigned int len = skb->len - sizeof(struct rxrpc_wire_header);
- unsigned int token_offset, token_len;
- unsigned int auth_offset, auth_len;
+ unsigned int resp_token_len, auth_len;
+ void *resp_token, *auth;
__be32 xauth_len;
int ret, ec;
_enter("{%d}", conn->debug_id);
/* Parse the RXGK_Response object */
- if (sizeof(rhdr) + sizeof(__be32) > len)
+ if (len < sizeof(*rhdr) + sizeof(__be32))
goto short_packet;
-
- if (skb_copy_bits(skb, offset, &rhdr, sizeof(rhdr)) < 0)
- goto short_packet;
- offset += sizeof(rhdr);
- len -= sizeof(rhdr);
-
- token_offset = offset;
- token_len = ntohl(rhdr.token_len);
- if (token_len > len ||
- xdr_round_up(token_len) + sizeof(__be32) > len)
+ rhdr = buffer;
+ buffer += sizeof(*rhdr);
+ len -= sizeof(*rhdr);
+
+ resp_token = buffer;
+ resp_token_len = ntohl(rhdr->token_len);
+ if (resp_token_len > len ||
+ xdr_round_up(resp_token_len) + sizeof(__be32) > len)
goto short_packet;
- trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, token_len);
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, resp_token_len);
- offset += xdr_round_up(token_len);
- len -= xdr_round_up(token_len);
+ buffer += xdr_round_up(resp_token_len);
+ len -= xdr_round_up(resp_token_len);
- if (skb_copy_bits(skb, offset, &xauth_len, sizeof(xauth_len)) < 0)
- goto short_packet;
- offset += sizeof(xauth_len);
+ xauth_len = *(__be32 *)buffer;
+ buffer += sizeof(xauth_len);
len -= sizeof(xauth_len);
- auth_offset = offset;
+ auth = buffer;
auth_len = ntohl(xauth_len);
if (auth_len > len)
goto short_packet;
@@ -1241,7 +1215,7 @@ static int rxgk_verify_response(struct rxrpc_connection *conn,
* to the app to deal with - which might mean a round trip to
* userspace.
*/
- ret = rxgk_extract_token(conn, skb, token_offset, token_len, &key);
+ ret = rxgk_extract_token(conn, skb, resp_token, resp_token_len, &key);
if (ret < 0)
goto out;
@@ -1255,7 +1229,7 @@ static int rxgk_verify_response(struct rxrpc_connection *conn,
*/
token = key->payload.data[0];
conn->security_level = token->rxgk->level;
- conn->rxgk.start_time = __be64_to_cpu(rhdr.start_time);
+ conn->rxgk.start_time = __be64_to_cpu(rhdr->start_time);
gk = rxgk_generate_transport_key(conn, token->rxgk, sp->hdr.cksum, GFP_NOFS);
if (IS_ERR(gk)) {
@@ -1265,18 +1239,18 @@ static int rxgk_verify_response(struct rxrpc_connection *conn,
krb5 = gk->krb5;
- trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, token_len);
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum,
+ resp_token_len);
/* Decrypt, parse and verify the authenticator. */
- ret = rxgk_decrypt_skb(krb5, gk->resp_enc, skb,
- &auth_offset, &auth_len, &ec);
+ ret = rxgk_decrypt(krb5, gk->resp_enc, &auth, &auth_len, &ec);
if (ret < 0) {
rxrpc_abort_conn(conn, skb, RXGK_SEALEDINCON, ret,
rxgk_abort_resp_auth_dec);
goto out_gk;
}
- ret = rxgk_verify_authenticator(conn, krb5, skb, auth_offset, auth_len);
+ ret = rxgk_verify_authenticator(conn, krb5, skb, auth, auth_len);
if (ret < 0)
goto out_gk;
diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c
index 0ef2a29eb695..200a30064fae 100644
--- a/net/rxrpc/rxgk_app.c
+++ b/net/rxrpc/rxgk_app.c
@@ -40,7 +40,7 @@
* };
*/
int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int ticket_offset, unsigned int ticket_len,
+ void *buffer, unsigned int ticket_len,
struct key **_key)
{
struct rxrpc_key_token *token;
@@ -49,7 +49,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
size_t pre_ticket_len, payload_len;
unsigned int klen, enctype;
void *payload, *ticket;
- __be32 *t, *p, *q, tmp[2];
+ __be32 *t, *p, *q, *tmp;
int ret;
_enter("");
@@ -59,10 +59,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
rxgk_abort_resp_short_yfs_tkt);
/* Get the session key length */
- ret = skb_copy_bits(skb, ticket_offset, tmp, sizeof(tmp));
- if (ret < 0)
- return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
- rxgk_abort_resp_short_yfs_klen);
+ tmp = buffer;
enctype = ntohl(tmp[0]);
klen = ntohl(tmp[1]);
@@ -84,12 +81,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
* it.
*/
ticket = payload + pre_ticket_len;
- ret = skb_copy_bits(skb, ticket_offset, ticket, ticket_len);
- if (ret < 0) {
- ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
- rxgk_abort_resp_short_yfs_tkt);
- goto error;
- }
+ memcpy(ticket, buffer, ticket_len);
/* Fill out the form header. */
p = payload;
@@ -131,7 +123,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
goto error;
}
- /* Ticket read in with skb_copy_bits above */
+ /* Ticket appended above. */
q += xdr_round_up(ticket_len) / 4;
if (WARN_ON((unsigned long)q - (unsigned long)payload != payload_len)) {
ret = -EIO;
@@ -182,14 +174,15 @@ error:
* [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-afs-08 sec 6.1]
*/
int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int token_offset, unsigned int token_len,
+ void *token, unsigned int token_len,
struct key **_key)
{
const struct krb5_enctype *krb5;
const struct krb5_buffer *server_secret;
struct crypto_aead *token_enc = NULL;
struct key *server_key;
- unsigned int ticket_offset, ticket_len;
+ unsigned int ticket_len;
+ void *ticket;
u32 kvno, enctype;
int ret, ec = 0;
@@ -197,24 +190,23 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
__be32 kvno;
__be32 enctype;
__be32 token_len;
- } container;
+ } *container;
- if (token_len < sizeof(container))
+ if (token_len < sizeof(*container))
goto short_packet;
/* Decode the RXGK_TokenContainer object. This tells us which server
* key we should be using. We can then fetch the key, get the secret
* and set up the crypto to extract the token.
*/
- if (skb_copy_bits(skb, token_offset, &container, sizeof(container)) < 0)
- goto short_packet;
+ container = token;
+ token += sizeof(*container);
- kvno = ntohl(container.kvno);
- enctype = ntohl(container.enctype);
- ticket_len = ntohl(container.token_len);
- ticket_offset = token_offset + sizeof(container);
+ kvno = ntohl(container->kvno);
+ enctype = ntohl(container->enctype);
+ ticket_len = ntohl(container->token_len);
- if (ticket_len > xdr_round_down(token_len - sizeof(container)))
+ if (ticket_len > xdr_round_down(token_len - sizeof(*container)))
goto short_packet;
_debug("KVNO %u", kvno);
@@ -237,8 +229,8 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
* gain access to K0, from which we can derive the transport key and
* thence decode the authenticator.
*/
- ret = rxgk_decrypt_skb(krb5, token_enc, skb,
- &ticket_offset, &ticket_len, &ec);
+ ticket = token;
+ ret = rxgk_decrypt(krb5, token_enc, &ticket, &ticket_len, &ec);
crypto_free_aead(token_enc);
token_enc = NULL;
if (ret < 0) {
@@ -248,7 +240,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
return ret;
}
- ret = conn->security->default_decode_ticket(conn, skb, ticket_offset,
+ ret = conn->security->default_decode_ticket(conn, skb, ticket,
ticket_len, _key);
if (ret < 0)
goto cant_get_token;
diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h
index 1e257d7ab8ec..3deed5863f5a 100644
--- a/net/rxrpc/rxgk_common.h
+++ b/net/rxrpc/rxgk_common.h
@@ -41,10 +41,10 @@ struct rxgk_context {
* rxgk_app.c
*/
int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int ticket_offset, unsigned int ticket_len,
+ void *ticket, unsigned int ticket_len,
struct key **_key);
int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int token_offset, unsigned int token_len,
+ void *token, unsigned int token_len,
struct key **_key);
/*
@@ -62,31 +62,30 @@ int rxgk_set_up_token_cipher(const struct krb5_buffer *server_key,
gfp_t gfp);
/*
- * Apply decryption and checksumming functions to part of an skbuff. The
- * offset and length are updated to reflect the actual content of the encrypted
+ * Apply decryption and checksumming functions a flat data buffer. The data
+ * point and length are updated to reflect the actual content of the encrypted
* region.
*/
-static inline
-int rxgk_decrypt_skb(const struct krb5_enctype *krb5,
- struct crypto_aead *aead,
- struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len,
- int *_error_code)
+static inline int rxgk_decrypt(const struct krb5_enctype *krb5,
+ struct crypto_aead *aead,
+ void **_data, unsigned int *_len,
+ int *_error_code)
{
- struct scatterlist sg[16];
+ struct scatterlist sg[1];
size_t offset = 0, len = *_len;
- int nr_sg, ret;
+ int ret;
- sg_init_table(sg, ARRAY_SIZE(sg));
- nr_sg = skb_to_sgvec(skb, sg, *_offset, len);
- if (unlikely(nr_sg < 0))
- return nr_sg;
+ sg_init_one(sg, *_data, len);
- ret = crypto_krb5_decrypt(krb5, aead, sg, nr_sg,
- &offset, &len);
+ ret = crypto_krb5_decrypt(krb5, aead, sg, 1, &offset, &len);
switch (ret) {
case 0:
- *_offset += offset;
+ if (offset & 3) {
+ *_error_code = RXGK_INCONSISTENCY;
+ ret = -EPROTO;
+ break;
+ }
+ *_data += offset;
*_len = len;
break;
case -EBADMSG: /* Checksum mismatch. */
@@ -106,31 +105,26 @@ int rxgk_decrypt_skb(const struct krb5_enctype *krb5,
}
/*
- * Check the MIC on a region of an skbuff. The offset and length are updated
- * to reflect the actual content of the secure region.
+ * Check the MIC on a flat buffer. The data pointer and length are updated to
+ * reflect the actual content of the secure region.
*/
static inline
-int rxgk_verify_mic_skb(const struct krb5_enctype *krb5,
- struct crypto_shash *shash,
- const struct krb5_buffer *metadata,
- struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len,
- u32 *_error_code)
+int rxgk_verify_mic(const struct krb5_enctype *krb5,
+ struct crypto_shash *shash,
+ const struct krb5_buffer *metadata,
+ void **_data, unsigned int *_len,
+ u32 *_error_code)
{
- struct scatterlist sg[16];
+ struct scatterlist sg[1];
size_t offset = 0, len = *_len;
- int nr_sg, ret;
+ int ret;
- sg_init_table(sg, ARRAY_SIZE(sg));
- nr_sg = skb_to_sgvec(skb, sg, *_offset, len);
- if (unlikely(nr_sg < 0))
- return nr_sg;
+ sg_init_one(sg, *_data, len);
- ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, nr_sg,
- &offset, &len);
+ ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, 1, &offset, &len);
switch (ret) {
case 0:
- *_offset += offset;
+ *_data += offset;
*_len = len;
break;
case -EBADMSG: /* Checksum mismatch */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index cba7935977f0..6fbd883401ac 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -430,27 +430,25 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_seq_t seq,
struct skcipher_request *req)
{
- struct rxkad_level1_hdr sechdr;
+ struct rxkad_level1_hdr *sechdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
- struct scatterlist sg[16];
- u32 data_size, buf;
+ struct scatterlist sg[1];
+ void *data = call->rx_dec_buffer;
+ u32 len = sp->len, data_size, buf;
u16 check;
int ret;
_enter("");
- if (sp->len < 8)
+ if (len < 8)
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_1_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
*/
- sg_init_table(sg, ARRAY_SIZE(sg));
- ret = skb_to_sgvec(skb, sg, sp->offset, 8);
- if (unlikely(ret < 0))
- return ret;
+ sg_init_one(sg, data, len);
/* start the decryption afresh */
memset(&iv, 0, sizeof(iv));
@@ -464,13 +462,11 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
return ret;
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
- return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
- rxkad_abort_1_short_encdata);
- sp->offset += sizeof(sechdr);
- sp->len -= sizeof(sechdr);
+ sechdr = data;
+ call->rx_dec_offset = sizeof(*sechdr);
+ len -= sizeof(*sechdr);
- buf = ntohl(sechdr.data_size);
+ buf = ntohl(sechdr->data_size);
data_size = buf & 0xffff;
check = buf >> 16;
@@ -479,10 +475,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
if (check != 0)
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_1_short_check);
- if (data_size > sp->len)
+ if (data_size > len)
return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
rxkad_abort_1_short_data);
- sp->len = data_size;
+ call->rx_dec_len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
@@ -496,43 +492,28 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct skcipher_request *req)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr sechdr;
+ struct rxkad_level2_hdr *sechdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
- struct scatterlist _sg[4], *sg;
- u32 data_size, buf;
+ struct scatterlist sg[1];
+ void *data = call->rx_dec_buffer;
+ u32 len = sp->len, data_size, buf;
u16 check;
- int nsg, ret;
+ int ret;
- _enter(",{%d}", sp->len);
+ _enter(",{%d}", len);
- if (sp->len < 8)
+ if (len < 8)
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_2_short_header);
/* Don't let the crypto algo see a misaligned length. */
- sp->len = round_down(sp->len, 8);
+ len = round_down(len, 8);
- /* Decrypt the skbuff in-place. TODO: We really want to decrypt
- * directly into the target buffer.
+ /* Decrypt in place in the call's decryption buffer. TODO: We really
+ * want to decrypt directly into the target buffer.
*/
- sg = _sg;
- nsg = skb_shinfo(skb)->nr_frags + 1;
- if (nsg <= 4) {
- nsg = 4;
- } else {
- sg = kmalloc_objs(*sg, nsg, GFP_NOIO);
- if (!sg)
- return -ENOMEM;
- }
-
- sg_init_table(sg, nsg);
- ret = skb_to_sgvec(skb, sg, sp->offset, sp->len);
- if (unlikely(ret < 0)) {
- if (sg != _sg)
- kfree(sg);
- return ret;
- }
+ sg_init_one(sg, data, len);
/* decrypt from the session key */
token = call->conn->key->payload.data[0];
@@ -540,11 +521,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x);
+ skcipher_request_set_crypt(req, sg, sg, len, iv.x);
ret = crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
- if (sg != _sg)
- kfree(sg);
if (ret < 0) {
if (ret == -ENOMEM)
return ret;
@@ -553,13 +532,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
}
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
- return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
- rxkad_abort_2_short_len);
- sp->offset += sizeof(sechdr);
- sp->len -= sizeof(sechdr);
+ sechdr = data;
+ call->rx_dec_offset = sizeof(*sechdr);
+ len -= sizeof(*sechdr);
- buf = ntohl(sechdr.data_size);
+ buf = ntohl(sechdr->data_size);
data_size = buf & 0xffff;
check = buf >> 16;
@@ -569,17 +546,18 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_2_short_check);
- if (data_size > sp->len)
+ if (data_size > len)
return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
rxkad_abort_2_short_data);
- sp->len = data_size;
+ call->rx_dec_len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
}
/*
- * Verify the security on a received packet and the subpackets therein.
+ * Verify the security on a received (sub)packet. If the packet needs
+ * modifying (e.g. decrypting), it must be copied.
*/
static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
@@ -985,7 +963,6 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
*_expiry = 0;
ASSERT(server_key->payload.data[0] != NULL);
- ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
memcpy(&iv, &server_key->payload.data[2], sizeof(iv));
@@ -1134,14 +1111,15 @@ unlock:
* verify a response
*/
static int rxkad_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ void *buffer, unsigned int len)
{
struct rxkad_response *response;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
struct key *server_key;
time64_t expiry;
- void *ticket = NULL;
+ void *ticket;
u32 version, kvno, ticket_len, level;
__be32 csum;
int ret, i;
@@ -1164,13 +1142,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
}
}
- ret = -ENOMEM;
- response = kzalloc_obj(struct rxkad_response, GFP_NOFS);
- if (!response)
- goto error;
-
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- response, sizeof(*response)) < 0) {
+ response = buffer;
+ if (len < sizeof(*response)) {
ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
rxkad_abort_resp_short);
goto error;
@@ -1182,6 +1155,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
+ buffer += sizeof(*response);
+ len -= sizeof(*response);
+
if (version != RXKAD_VERSION) {
ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
rxkad_abort_resp_version);
@@ -1201,13 +1177,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
}
/* extract the kerberos ticket and decrypt and decode it */
- ret = -ENOMEM;
- ticket = kmalloc(ticket_len, GFP_NOFS);
- if (!ticket)
- goto error;
-
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
- ticket, ticket_len) < 0) {
+ ticket = buffer;
+ if (ticket_len > len) {
ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
rxkad_abort_resp_short_tkt);
goto error;
@@ -1287,8 +1258,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
error:
- kfree(ticket);
- kfree(response);
key_put(server_key);
_leave(" = %d", ret);
return ret;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 8c9a0400c862..0f953bd46b58 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -243,6 +243,20 @@ static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
return q->dequeue(sch);
}
+static void cbs_reset(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ /* Nothing to do if we couldn't create the underlying qdisc */
+ if (!q->qdisc)
+ return;
+
+ qdisc_reset(q->qdisc);
+ qdisc_watchdog_cancel(&q->watchdog);
+ q->credits = 0;
+ q->last = 0;
+}
+
static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
[TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
};
@@ -540,7 +554,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.dequeue = cbs_dequeue,
.peek = qdisc_peek_dequeued,
.init = cbs_init,
- .reset = qdisc_reset_queue,
+ .reset = cbs_reset,
.destroy = cbs_destroy,
.change = cbs_change,
.dump = cbs_dump,
diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c
index 241e6a46bd00..a22489c14458 100644
--- a/net/sched/sch_dualpi2.c
+++ b/net/sched/sch_dualpi2.c
@@ -938,6 +938,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt,
int err;
sch->flags |= TCQ_F_DEQUEUE_DROPS;
+ hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS_PINNED_SOFT);
q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1), extack);
@@ -950,8 +952,6 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt,
q->sch = sch;
dualpi2_reset_default(sch);
- hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED_SOFT);
if (opt && nla_len(opt)) {
err = dualpi2_change(sch, opt, extack);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 58d0d9747f0b..1d2568bb6bc2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1986,6 +1986,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_unlock;
iov_iter_revert(&msg->msg_iter, err);
+
+ /* sctp_sendmsg_to_asoc() may have released the socket
+ * lock (sctp_wait_for_sndbuf), during which other
+ * associations on ep->asocs could have been peeled
+ * off or freed. @asoc itself is revalidated by the
+ * base.dead and base.sk checks in sctp_wait_for_sndbuf,
+ * so re-derive the cached cursor from it.
+ */
+ tmp = list_next_entry(asoc, asocs);
}
goto out_unlock;
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
index 94bc9c7382ea..dea9270f3e57 100644
--- a/net/shaper/shaper.c
+++ b/net/shaper/shaper.c
@@ -21,6 +21,8 @@
#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK
+static_assert(NET_SHAPER_ID_UNSPEC == NET_SHAPER_MAX_HANDLE_ID + 1);
+
struct net_shaper_hierarchy {
struct xarray shapers;
};
@@ -90,6 +92,12 @@ static int net_shaper_handle_size(void)
nla_total_size(sizeof(u32)));
}
+static int net_shaper_group_reply_size(void)
+{
+ return nla_total_size(sizeof(u32)) + /* NET_SHAPER_A_IFINDEX */
+ net_shaper_handle_size(); /* NET_SHAPER_A_HANDLE */
+}
+
static int net_shaper_fill_binding(struct sk_buff *msg,
const struct net_shaper_binding *binding,
u32 type)
@@ -130,35 +138,58 @@ handle_nest_cancel:
return -EMSGSIZE;
}
+static void net_shaper_copy(struct net_shaper *dst,
+ const struct net_shaper *src)
+{
+ WRITE_ONCE(dst->parent.scope, READ_ONCE(src->parent.scope));
+ WRITE_ONCE(dst->parent.id, READ_ONCE(src->parent.id));
+ WRITE_ONCE(dst->handle.scope, READ_ONCE(src->handle.scope));
+ WRITE_ONCE(dst->handle.id, READ_ONCE(src->handle.id));
+
+ WRITE_ONCE(dst->metric, READ_ONCE(src->metric));
+ WRITE_ONCE(dst->bw_min, READ_ONCE(src->bw_min));
+ WRITE_ONCE(dst->bw_max, READ_ONCE(src->bw_max));
+ WRITE_ONCE(dst->burst, READ_ONCE(src->burst));
+ WRITE_ONCE(dst->priority, READ_ONCE(src->priority));
+ WRITE_ONCE(dst->weight, READ_ONCE(src->weight));
+
+ /* private fields are only used on the write path under the lock */
+ data_race(dst->leaves = src->leaves);
+}
+
static int
net_shaper_fill_one(struct sk_buff *msg,
const struct net_shaper_binding *binding,
const struct net_shaper *shaper,
const struct genl_info *info)
{
+ struct net_shaper cur;
void *hdr;
hdr = genlmsg_iput(msg, info);
if (!hdr)
return -EMSGSIZE;
+ /* Make a copy to avoid data races */
+ net_shaper_copy(&cur, shaper);
+
if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) ||
- net_shaper_fill_handle(msg, &shaper->parent,
+ net_shaper_fill_handle(msg, &cur.parent,
NET_SHAPER_A_PARENT) ||
- net_shaper_fill_handle(msg, &shaper->handle,
+ net_shaper_fill_handle(msg, &cur.handle,
NET_SHAPER_A_HANDLE) ||
- ((shaper->bw_min || shaper->bw_max || shaper->burst) &&
- nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) ||
- (shaper->bw_min &&
- nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) ||
- (shaper->bw_max &&
- nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) ||
- (shaper->burst &&
- nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) ||
- (shaper->priority &&
- nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) ||
- (shaper->weight &&
- nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight)))
+ ((cur.bw_min || cur.bw_max || cur.burst) &&
+ nla_put_u32(msg, NET_SHAPER_A_METRIC, cur.metric)) ||
+ (cur.bw_min &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MIN, cur.bw_min)) ||
+ (cur.bw_max &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MAX, cur.bw_max)) ||
+ (cur.burst &&
+ nla_put_uint(msg, NET_SHAPER_A_BURST, cur.burst)) ||
+ (cur.priority &&
+ nla_put_u32(msg, NET_SHAPER_A_PRIORITY, cur.priority)) ||
+ (cur.weight &&
+ nla_put_u32(msg, NET_SHAPER_A_WEIGHT, cur.weight)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -275,25 +306,24 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle,
parent->id = 0;
}
-/*
- * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as
- * it's cleared by xa_store().
- */
-#define NET_SHAPER_NOT_VALID XA_MARK_1
-
static struct net_shaper *
net_shaper_lookup(struct net_shaper_binding *binding,
const struct net_shaper_handle *handle)
{
u32 index = net_shaper_handle_to_index(handle);
struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper *cur;
hierarchy = net_shaper_hierarchy_rcu(binding);
- if (!hierarchy || xa_get_mark(&hierarchy->shapers, index,
- NET_SHAPER_NOT_VALID))
+ if (!hierarchy)
+ return NULL;
+
+ cur = xa_load(&hierarchy->shapers, index);
+ /* Check valid before reading fields */
+ if (!cur || !smp_load_acquire(&cur->valid))
return NULL;
- return xa_load(&hierarchy->shapers, index);
+ return cur;
}
/* Allocate on demand the per device shaper's hierarchy container.
@@ -348,7 +378,7 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding,
handle->id == NET_SHAPER_ID_UNSPEC) {
u32 min, max;
- handle->id = NET_SHAPER_ID_MASK - 1;
+ handle->id = NET_SHAPER_MAX_HANDLE_ID;
max = net_shaper_handle_to_index(handle);
handle->id = 0;
min = net_shaper_handle_to_index(handle);
@@ -370,13 +400,10 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding,
goto free_id;
}
- /* Mark 'tentative' shaper inside the hierarchy container.
- * xa_set_mark is a no-op if the previous store fails.
+ /* Insert as 'tentative' (no VALID mark). The mark will be set by
+ * net_shaper_commit() once the driver-side configuration succeeds.
*/
- xa_lock(&hierarchy->shapers);
- prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL);
- __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID);
- xa_unlock(&hierarchy->shapers);
+ prev = xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL);
if (xa_err(prev)) {
NL_SET_ERR_MSG(extack, "Can't insert shaper into device store");
kfree_rcu(cur, rcu);
@@ -410,12 +437,10 @@ static void net_shaper_commit(struct net_shaper_binding *binding,
if (WARN_ON_ONCE(!cur))
continue;
- /* Successful update: drop the tentative mark
- * and update the hierarchy container.
- */
- __xa_clear_mark(&hierarchy->shapers, index,
- NET_SHAPER_NOT_VALID);
- *cur = shapers[i];
+ /* Successful update: update the hierarchy container... */
+ net_shaper_copy(cur, &shapers[i]);
+ /* ... publish to lockless readers. */
+ smp_store_release(&cur->valid, true);
}
xa_unlock(&hierarchy->shapers);
}
@@ -431,10 +456,11 @@ static void net_shaper_rollback(struct net_shaper_binding *binding)
return;
xa_lock(&hierarchy->shapers);
- xa_for_each_marked(&hierarchy->shapers, index, cur,
- NET_SHAPER_NOT_VALID) {
+ xa_for_each(&hierarchy->shapers, index, cur) {
+ if (cur->valid)
+ continue;
__xa_erase(&hierarchy->shapers, index);
- kfree(cur);
+ kfree_rcu(cur, rcu);
}
xa_unlock(&hierarchy->shapers);
}
@@ -465,10 +491,21 @@ static int net_shaper_parse_handle(const struct nlattr *attr,
* shaper (any other value).
*/
id_attr = tb[NET_SHAPER_A_HANDLE_ID];
- if (id_attr)
+ if (id_attr) {
id = nla_get_u32(id_attr);
- else if (handle->scope == NET_SHAPER_SCOPE_NODE)
+ } else if (handle->scope == NET_SHAPER_SCOPE_NODE) {
id = NET_SHAPER_ID_UNSPEC;
+ } else if (handle->scope == NET_SHAPER_SCOPE_QUEUE) {
+ NL_SET_ERR_ATTR_MISS(info->extack, attr,
+ NET_SHAPER_A_HANDLE_ID);
+ return -EINVAL;
+ }
+
+ if (id && handle->scope == NET_SHAPER_SCOPE_NETDEV) {
+ NL_SET_ERR_MSG_ATTR(info->extack, id_attr,
+ "Netdev scope is a singleton, must use ID 0");
+ return -EINVAL;
+ }
handle->id = id;
return 0;
@@ -836,7 +873,12 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb,
goto out_unlock;
for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index,
- U32_MAX, XA_PRESENT)); ctx->start_index++) {
+ U32_MAX, XA_PRESENT));
+ ctx->start_index++) {
+ /* Check valid before reading fields */
+ if (!smp_load_acquire(&shaper->valid))
+ continue;
+
ret = net_shaper_fill_one(skb, binding, shaper, info);
if (ret)
break;
@@ -932,6 +974,46 @@ static int net_shaper_handle_cmp(const struct net_shaper_handle *a,
return memcmp(a, b, sizeof(*a));
}
+static int net_shaper_parse_leaves(struct net_shaper_binding *binding,
+ struct genl_info *info,
+ const struct net_shaper *node,
+ struct net_shaper *leaves,
+ int leaves_count)
+{
+ struct nlattr *attr;
+ int i, j, ret, rem;
+
+ i = 0;
+ nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ if (WARN_ON_ONCE(i >= leaves_count))
+ return -EINVAL;
+
+ ret = net_shaper_parse_leaf(binding, attr, info,
+ node, &leaves[i]);
+ if (ret)
+ return ret;
+
+ /* Reject duplicates */
+ for (j = 0; j < i; j++) {
+ if (net_shaper_handle_cmp(&leaves[i].handle,
+ &leaves[j].handle))
+ continue;
+
+ NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr,
+ "Duplicate leaf shaper %d:%d",
+ leaves[i].handle.scope,
+ leaves[i].handle.id);
+ return -EINVAL;
+ }
+
+ i++;
+ }
+
+ return 0;
+}
+
static int net_shaper_parent_from_leaves(int leaves_count,
const struct net_shaper *leaves,
struct net_shaper *node,
@@ -964,15 +1046,22 @@ static int __net_shaper_group(struct net_shaper_binding *binding,
int i, ret;
if (node->handle.scope == NET_SHAPER_SCOPE_NODE) {
+ struct net_shaper *cur = NULL;
+
new_node = node->handle.id == NET_SHAPER_ID_UNSPEC;
- if (!new_node && !net_shaper_lookup(binding, &node->handle)) {
- /* The related attribute is not available when
- * reaching here from the delete() op.
- */
- NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists",
- node->handle.scope, node->handle.id);
- return -ENOENT;
+ if (!new_node) {
+ cur = net_shaper_lookup(binding, &node->handle);
+ if (!cur) {
+ /* The related attribute is not available
+ * when reaching here from the delete() op.
+ */
+ NL_SET_ERR_MSG_FMT(extack,
+ "Node shaper %d:%d does not exist",
+ node->handle.scope,
+ node->handle.id);
+ return -ENOENT;
+ }
}
/* When unspecified, the node parent scope is inherited from
@@ -986,6 +1075,15 @@ static int __net_shaper_group(struct net_shaper_binding *binding,
return ret;
}
+ if (cur && net_shaper_handle_cmp(&cur->parent,
+ &node->parent)) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Cannot reparent node shaper %d:%d",
+ node->handle.scope,
+ node->handle.id);
+ return -EOPNOTSUPP;
+ }
+
} else {
net_shaper_default_parent(&node->handle, &node->parent);
}
@@ -1162,7 +1260,7 @@ static int net_shaper_group_send_reply(struct net_shaper_binding *binding,
free_msg:
/* Should never happen as msg is pre-allocated with enough space. */
WARN_ONCE(true, "calculated message payload length (%d)",
- net_shaper_handle_size());
+ net_shaper_group_reply_size());
nlmsg_free(msg);
return -EMSGSIZE;
}
@@ -1172,10 +1270,9 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info)
struct net_shaper **old_nodes, *leaves, node = {};
struct net_shaper_hierarchy *hierarchy;
struct net_shaper_binding *binding;
- int i, ret, rem, leaves_count;
+ int i, ret, leaves_count;
int old_nodes_count = 0;
struct sk_buff *msg;
- struct nlattr *attr;
if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES))
return -EINVAL;
@@ -1203,26 +1300,19 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info)
if (ret)
goto free_leaves;
- i = 0;
- nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES,
- genlmsg_data(info->genlhdr),
- genlmsg_len(info->genlhdr), rem) {
- if (WARN_ON_ONCE(i >= leaves_count))
- goto free_leaves;
-
- ret = net_shaper_parse_leaf(binding, attr, info,
- &node, &leaves[i]);
- if (ret)
- goto free_leaves;
- i++;
- }
+ ret = net_shaper_parse_leaves(binding, info, &node,
+ leaves, leaves_count);
+ if (ret)
+ goto free_leaves;
/* Prepare the msg reply in advance, to avoid device operation
* rollback on allocation failure.
*/
- msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL);
- if (!msg)
+ msg = genlmsg_new(net_shaper_group_reply_size(), GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
goto free_leaves;
+ }
hierarchy = net_shaper_hierarchy_setup(binding);
if (!hierarchy) {
diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c
index 9b29be3ef19a..76eff85ec66d 100644
--- a/net/shaper/shaper_nl_gen.c
+++ b/net/shaper/shaper_nl_gen.c
@@ -11,10 +11,15 @@
#include <uapi/linux/net_shaper.h>
+/* Integer value ranges */
+static const struct netlink_range_validation net_shaper_a_handle_id_range = {
+ .max = NET_SHAPER_MAX_HANDLE_ID,
+};
+
/* Common nested types */
const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = {
[NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3),
- [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &net_shaper_a_handle_id_range),
};
const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = {
diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h
index 42c46c52c775..2406652a9014 100644
--- a/net/shaper/shaper_nl_gen.h
+++ b/net/shaper/shaper_nl_gen.h
@@ -12,6 +12,8 @@
#include <uapi/linux/net_shaper.h>
+#define NET_SHAPER_MAX_HANDLE_ID 67108862
+
/* Common nested types */
extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1];
extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1];
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 185dbed7de5d..dffbd529762d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1400,7 +1400,8 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc,
int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
- if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
+ if (ini->ism_dev[i] &&
+ ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
ini->ism_selected = i;
return 0;
}
@@ -3054,18 +3055,17 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname,
smc = smc_sk(sk);
+ /* pre-fetch user data outside the lock */
+ if (optname == SMC_LIMIT_HS) {
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
+ return -EFAULT;
+ }
+
lock_sock(sk);
switch (optname) {
case SMC_LIMIT_HS:
- if (optlen < sizeof(int)) {
- rc = -EINVAL;
- break;
- }
- if (copy_from_sockptr(&val, optval, sizeof(int))) {
- rc = -EFAULT;
- break;
- }
-
smc->limit_smc_hs = !!val;
rc = 0;
break;
diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
index a9a6e3c1113a..53da84f57fd6 100644
--- a/net/smc/smc_tracepoint.h
+++ b/net/smc/smc_tracepoint.h
@@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event,
__field(const void *, smc)
__field(u64, net_cookie)
__field(size_t, len)
- __string(name, smc->conn.lnk->ibname)
+ __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "")
),
TP_fast_assign(
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7081c1214e6c..b5474ce534fb 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -403,7 +403,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
INIT_LIST_HEAD(&cd->readers);
spin_lock_init(&cd->queue_lock);
init_waitqueue_head(&cd->queue_wait);
- cd->next_seqno = 0;
+ cd->next_seqno = 1;
spin_lock(&cache_list_lock);
cd->nextcheck = 0;
cd->entries = 0;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 2590e855f6a5..964ebc268ee4 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -789,23 +789,33 @@ static int tls_push_record(struct sock *sk, int flags,
i = msg_pl->sg.end;
sk_msg_iter_var_prev(i);
+ /* msg_pl->sg.data is a ring; data[MAX+1] is reserved for the wrap
+ * link (frags won't use it). 'i' is now the last filled entry:
+ *
+ * i end start
+ * v v v [ rsv ]
+ * [ d ][ d ][ ][ ]...[ ][ d ][ d ][ d ][chain]
+ * ^ END v
+ * `-----------------------------------------'
+ *
+ * Note that SGL does not allow chain-after-chain, so for TLS 1.3,
+ * we must make sure we don't create the wrap entry and then chain
+ * link to content_type immediately at index 0.
+ */
+ if (i < msg_pl->sg.start)
+ sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data),
+ msg_pl->sg.data);
+
rec->content_type = record_type;
if (prot->version == TLS_1_3_VERSION) {
/* Add content type to end of message. No padding added */
sg_set_buf(&rec->sg_content_type, &rec->content_type, 1);
sg_mark_end(&rec->sg_content_type);
- sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1,
- &rec->sg_content_type);
+ sg_chain(msg_pl->sg.data, i + 2, &rec->sg_content_type);
} else {
sg_mark_end(sk_msg_elem(msg_pl, i));
}
- if (msg_pl->sg.end < msg_pl->sg.start) {
- sg_chain(&msg_pl->sg.data[msg_pl->sg.start],
- MAX_SKB_FRAGS - msg_pl->sg.start + 1,
- msg_pl->sg.data);
- }
-
i = msg_pl->sg.start;
sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]);
@@ -1356,9 +1366,14 @@ unlock:
mutex_unlock(&tls_ctx->tx_lock);
}
+/* When has_copied is true the caller has already moved bytes to
+ * userspace. Report sk_err but leave it set so the next read
+ * surfaces it instead of a spurious EOF, otherwise sk_err is
+ * consumed via sock_error().
+ */
static int
tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
- bool released)
+ bool released, bool has_copied)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1376,8 +1391,11 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
if (!sk_psock_queue_empty(psock))
return 0;
- if (sk->sk_err)
+ if (sk->sk_err) {
+ if (has_copied)
+ return -READ_ONCE(sk->sk_err);
return sock_error(sk);
+ }
if (ret < 0)
return ret;
@@ -1413,7 +1431,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
}
if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
- return tls_rx_rec_wait(sk, psock, nonblock, false);
+ return tls_rx_rec_wait(sk, psock, nonblock, false, has_copied);
return 1;
}
@@ -2100,7 +2118,7 @@ int tls_sw_recvmsg(struct sock *sk,
int to_decrypt, chunk;
err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT,
- released);
+ released, !!(decrypted + copied));
if (err <= 0) {
if (psock) {
chunk = sk_msg_recvmsg(sk, psock, msg, len,
@@ -2287,7 +2305,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct tls_decrypt_arg darg;
err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK,
- true);
+ true, false);
if (err <= 0)
goto splice_read_end;
@@ -2373,7 +2391,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
} else {
struct tls_decrypt_arg darg;
- err = tls_rx_rec_wait(sk, NULL, true, released);
+ err = tls_rx_rec_wait(sk, NULL, true, released, !!copied);
if (err <= 0)
goto read_sock_end;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1cbf36ea043b..dc71ed79be4a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2711,8 +2711,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last, unsigned int last_len,
- bool freezable)
+ struct sk_buff *last, bool freezable)
{
unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
struct sk_buff *tail;
@@ -2725,7 +2724,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail != last ||
- (tail && tail->len != last_len) ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -2921,7 +2919,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
int flags = state->flags;
bool check_creds = false;
struct scm_cookie scm;
- unsigned int last_len;
struct unix_sock *u;
int copied = 0;
int err = 0;
@@ -2967,7 +2964,6 @@ redo:
goto unlock;
}
last = skb = skb_peek(&sk->sk_receive_queue);
- last_len = last ? last->len : 0;
again:
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
@@ -3001,8 +2997,7 @@ again:
mutex_unlock(&u->iolock);
- timeo = unix_stream_data_wait(sk, timeo, last,
- last_len, freezable);
+ timeo = unix_stream_data_wait(sk, timeo, last, freezable);
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
@@ -3019,7 +3014,6 @@ unlock:
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
last = skb;
- last_len = skb->len;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
goto again;
@@ -3094,7 +3088,6 @@ unlock:
skip = 0;
last = skb;
- last_len = skb->len;
unix_state_lock(sk);
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9b8014516f4f..df3b418e0392 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -70,34 +70,6 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
return true;
}
-static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
- struct sk_buff *skb,
- struct msghdr *msg,
- size_t pkt_len,
- bool zerocopy)
-{
- struct ubuf_info *uarg;
-
- if (msg->msg_ubuf) {
- uarg = msg->msg_ubuf;
- net_zcopy_get(uarg);
- } else {
- struct ubuf_info_msgzc *uarg_zc;
-
- uarg = msg_zerocopy_realloc(sk_vsock(vsk),
- pkt_len, NULL, false);
- if (!uarg)
- return -1;
-
- uarg_zc = uarg_to_msgzc(uarg);
- uarg_zc->zerocopy = zerocopy ? 1 : 0;
- }
-
- skb_zcopy_init(skb, uarg);
-
- return 0;
-}
-
static int virtio_transport_fill_skb(struct sk_buff *skb,
struct virtio_vsock_pkt_info *info,
size_t len,
@@ -136,27 +108,6 @@ static void virtio_transport_init_hdr(struct sk_buff *skb,
hdr->fwd_cnt = cpu_to_le32(0);
}
-static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
- void *dst,
- size_t len)
-{
- struct iov_iter iov_iter = { 0 };
- struct kvec kvec;
- size_t to_copy;
-
- kvec.iov_base = dst;
- kvec.iov_len = len;
-
- iov_iter.iter_type = ITER_KVEC;
- iov_iter.kvec = &kvec;
- iov_iter.nr_segs = 1;
-
- to_copy = min_t(size_t, len, skb->len);
-
- skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
- &iov_iter, to_copy);
-}
-
/* Packet capture */
static struct sk_buff *virtio_transport_build_skb(void *opaque)
{
@@ -166,12 +117,12 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
struct sk_buff *skb;
size_t payload_len;
- /* A packet could be split to fit the RX buffer, so we can retrieve
- * the payload length from the header and the buffer pointer taking
- * care of the offset in the original packet.
+ /* A packet could be split to fit the RX buffer, so we use
+ * the payload length from the header, which has been updated
+ * by the sender to reflect the fragment size.
*/
pkt_hdr = virtio_vsock_hdr(pkt);
- payload_len = pkt->len;
+ payload_len = le32_to_cpu(pkt_hdr->len);
skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
GFP_ATOMIC);
@@ -214,12 +165,18 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
if (payload_len) {
- if (skb_is_nonlinear(pkt)) {
- void *data = skb_put(skb, payload_len);
-
- virtio_transport_copy_nonlinear_skb(pkt, data, payload_len);
- } else {
- skb_put_data(skb, pkt->data, payload_len);
+ struct iov_iter iov_iter;
+ struct kvec kvec;
+ void *data = skb_put(skb, payload_len);
+
+ kvec.iov_base = data;
+ kvec.iov_len = payload_len;
+ iov_iter_kvec(&iov_iter, ITER_DEST, &kvec, 1, payload_len);
+
+ if (skb_copy_datagram_iter(pkt, VIRTIO_VSOCK_SKB_CB(pkt)->offset,
+ &iov_iter, payload_len)) {
+ kfree_skb(skb);
+ return NULL;
}
}
@@ -332,8 +289,10 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
u32 src_cid, src_port, dst_cid, dst_port;
const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
+ struct ubuf_info *uarg = NULL;
u32 pkt_len = info->pkt_len;
bool can_zcopy = false;
+ bool have_uref = false;
u32 rest_len;
int ret;
@@ -375,6 +334,25 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (can_zcopy)
max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
(MAX_SKB_FRAGS * PAGE_SIZE));
+
+ if (info->msg->msg_flags & MSG_ZEROCOPY &&
+ info->op == VIRTIO_VSOCK_OP_RW) {
+ uarg = info->msg->msg_ubuf;
+
+ if (!uarg) {
+ uarg = msg_zerocopy_realloc(sk_vsock(vsk),
+ pkt_len, NULL, false);
+ if (!uarg) {
+ virtio_transport_put_credit(vvs, pkt_len);
+ return -ENOMEM;
+ }
+
+ if (!can_zcopy)
+ uarg_to_msgzc(uarg)->zerocopy = 0;
+
+ have_uref = true;
+ }
+ }
}
rest_len = pkt_len;
@@ -393,27 +371,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
break;
}
- /* We process buffer part by part, allocating skb on
- * each iteration. If this is last skb for this buffer
- * and MSG_ZEROCOPY mode is in use - we must allocate
- * completion for the current syscall.
- *
- * Pass pkt_len because msg iter is already consumed
- * by virtio_transport_fill_skb(), so iter->count
- * can not be used for RLIMIT_MEMLOCK pinned-pages
- * accounting done by msg_zerocopy_realloc().
- */
- if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
- skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) {
- if (virtio_transport_init_zcopy_skb(vsk, skb,
- info->msg,
- pkt_len,
- can_zcopy)) {
- kfree_skb(skb);
- ret = -ENOMEM;
- break;
- }
- }
+ skb_zcopy_set(skb, uarg, NULL);
virtio_transport_inc_tx_pkt(vvs, skb);
@@ -437,6 +395,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
virtio_transport_put_credit(vvs, rest_len);
+ /* msg_zerocopy_realloc() initializes the ubuf_info refcnt to 1.
+ * skb_zcopy_set() increases it for each skb, so we can drop that
+ * initial reference to keep it balanced.
+ */
+ if (have_uref) {
+ if (rest_len == pkt_len)
+ /* No data sent, abort the notification. */
+ net_zcopy_put_abort(uarg, true);
+ else
+ net_zcopy_put(uarg);
+ }
+
/* Return number of bytes, if any data has been sent. */
if (rest_len != pkt_len)
ret = pkt_len - rest_len;
@@ -449,7 +419,14 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
{
u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0);
- if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc)
+ /* Allow at most buf_alloc * 2 total budget (payload + overhead),
+ * similar to how SO_RCVBUF is doubled to reserve space for sk_buff
+ * metadata. Check payload against buf_alloc to be sure the other
+ * peer is respecting the credit, and sk_buff overhead to bound
+ * queue growth.
+ */
+ if ((u64)vvs->buf_used + len > vvs->buf_alloc ||
+ skb_overhead > vvs->buf_alloc)
return false;
vvs->rx_bytes += len;
@@ -1365,7 +1342,7 @@ destroy:
return err;
}
-static void
+static bool
virtio_transport_recv_enqueue(struct vsock_sock *vsk,
struct sk_buff *skb)
{
@@ -1380,10 +1357,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
spin_lock_bh(&vvs->rx_lock);
can_enqueue = virtio_transport_inc_rx_pkt(vvs, len);
- if (!can_enqueue) {
- free_pkt = true;
+ if (!can_enqueue)
goto out;
- }
if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count++;
@@ -1423,6 +1398,8 @@ out:
spin_unlock_bh(&vvs->rx_lock);
if (free_pkt)
kfree_skb(skb);
+
+ return can_enqueue;
}
static int
@@ -1435,7 +1412,17 @@ virtio_transport_recv_connected(struct sock *sk,
switch (le16_to_cpu(hdr->op)) {
case VIRTIO_VSOCK_OP_RW:
- virtio_transport_recv_enqueue(vsk, skb);
+ if (!virtio_transport_recv_enqueue(vsk, skb)) {
+ /* There is no more space to queue the packet, so let's
+ * close the connection; otherwise, we'll lose data.
+ */
+ (void)virtio_transport_reset(vsk, skb);
+ virtio_transport_do_close(vsk, true);
+ sk->sk_err = ENOBUFS;
+ sk_error_report(sk);
+ vsock_remove_sock(vsk);
+ break;
+ }
vsock_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 4296ca1183f1..d2579380f51e 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1164,7 +1164,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
/* Close and cleanup the connection. */
vmci_transport_send_reset(pending, pkt);
skerr = EPROTO;
- err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
+ err = -EINVAL;
goto destroy;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 328af43ef832..358cbc9e43d8 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2462,6 +2462,9 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
memcpy(merged_ie + copied_len, next_sub->data,
next_sub->datalen);
copied_len += next_sub->datalen;
+
+ mbssid_elem = next_mbssid;
+ sub_elem = next_sub;
}
return copied_len;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 22d9d9bae8f5..63d145b524c9 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -789,6 +789,8 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq);
if (!chandef.chan)
return -EINVAL;
+ if (!cfg80211_chandef_valid(&chandef))
+ return -EINVAL;
return cfg80211_set_monitor_channel(rdev, dev, &chandef);
case NL80211_IFTYPE_MESH_POINT:
freq = cfg80211_wext_freq(wextfreq);
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index afa457506274..3bff346308d0 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -184,6 +184,10 @@ static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
}
xs = (struct xdp_sock *)sock->sk;
+ if (!READ_ONCE(xs->rx)) {
+ sockfd_put(sock);
+ return -ENOBUFS;
+ }
map_entry = &m->xsk_map[i];
node = xsk_map_node_alloc(m, map_entry);