summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/atm/lec.c26
-rw-r--r--net/batman-adv/bat_v_elp.c10
-rw-r--r--net/batman-adv/hard-interface.c8
-rw-r--r--net/batman-adv/hard-interface.h1
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/bridge/br_vlan_options.c26
-rw-r--r--net/can/bcm.c1
-rw-r--r--net/ceph/auth.c6
-rw-r--r--net/ceph/messenger_v2.c31
-rw-r--r--net/ceph/mon_client.c6
-rw-r--r--net/core/dev.c24
-rw-r--r--net/core/dev.h35
-rw-r--r--net/core/devmem.c6
-rw-r--r--net/core/filter.c13
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/page_pool_user.c4
-rw-r--r--net/core/secure_seq.c80
-rw-r--r--net/core/skmsg.c14
-rw-r--r--net/ipv4/Kconfig2
-rw-r--r--net/ipv4/af_inet.c6
-rw-r--r--net/ipv4/inet_hashtables.c8
-rw-r--r--net/ipv4/ip_tunnel_core.c15
-rw-r--r--net/ipv4/nexthop.c14
-rw-r--r--net/ipv4/syncookies.c11
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_ao.c3
-rw-r--r--net/ipv4/tcp_bpf.c2
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c40
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/udp.c27
-rw-r--r--net/ipv4/udp_bpf.c2
-rw-r--r--net/ipv6/af_inet6.c8
-rw-r--r--net/ipv6/inet6_hashtables.c3
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/syncookies.c11
-rw-r--r--net/ipv6/tcp_ipv6.c40
-rw-r--r--net/mac80211/eht.c1
-rw-r--r--net/mctp/route.c11
-rw-r--r--net/mptcp/pm.c55
-rw-r--r--net/mptcp/pm_kernel.c9
-rw-r--r--net/ncsi/ncsi-aen.c3
-rw-r--r--net/ncsi/ncsi-rsp.c16
-rw-r--r--net/netfilter/nf_tables_api.c49
-rw-r--r--net/netfilter/nfnetlink_cthelper.c8
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/nft_chain_filter.c2
-rw-r--r--net/netfilter/nft_set_hash.c1
-rw-r--r--net/netfilter/nft_set_pipapo.c65
-rw-r--r--net/netfilter/nft_set_pipapo.h2
-rw-r--r--net/netfilter/nft_set_rbtree.c8
-rw-r--r--net/netfilter/xt_IDLETIMER.c6
-rw-r--r--net/netfilter/xt_dccp.c4
-rw-r--r--net/netfilter/xt_tcpudp.c6
-rw-r--r--net/nfc/digital_core.c8
-rw-r--r--net/nfc/nci/core.c30
-rw-r--r--net/nfc/nci/data.c12
-rw-r--r--net/nfc/rawsock.c11
-rw-r--r--net/rds/tcp.c14
-rw-r--r--net/rxrpc/af_rxrpc.c8
-rw-r--r--net/sched/act_ct.c6
-rw-r--r--net/sched/act_gate.c265
-rw-r--r--net/sched/act_ife.c93
-rw-r--r--net/sched/cls_api.c7
-rw-r--r--net/sched/sch_cake.c53
-rw-r--r--net/sched/sch_ets.c12
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_teql.c1
-rw-r--r--net/shaper/shaper.c11
-rw-r--r--net/sunrpc/xprtrdma/verbs.c7
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/unix/af_unix.c8
-rw-r--r--net/xdp/xsk.c26
78 files changed, 860 insertions, 532 deletions
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a107375c0629..fb93c6e1c329 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1260,24 +1260,28 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry)
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
struct net_device *dev = (struct net_device *)vcc->proto_data;
- vcc->pop = vpriv->old_pop;
- if (vpriv->xoff)
- netif_wake_queue(dev);
- kfree(vpriv);
- vcc->user_back = NULL;
- vcc->push = entry->old_push;
- vcc_release_async(vcc, -EPIPE);
+ if (vpriv) {
+ vcc->pop = vpriv->old_pop;
+ if (vpriv->xoff)
+ netif_wake_queue(dev);
+ kfree(vpriv);
+ vcc->user_back = NULL;
+ vcc->push = entry->old_push;
+ vcc_release_async(vcc, -EPIPE);
+ }
entry->vcc = NULL;
}
if (entry->recv_vcc) {
struct atm_vcc *vcc = entry->recv_vcc;
struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
- kfree(vpriv);
- vcc->user_back = NULL;
+ if (vpriv) {
+ kfree(vpriv);
+ vcc->user_back = NULL;
- entry->recv_vcc->push = entry->old_recv_push;
- vcc_release_async(entry->recv_vcc, -EPIPE);
+ entry->recv_vcc->push = entry->old_recv_push;
+ vcc_release_async(entry->recv_vcc, -EPIPE);
+ }
entry->recv_vcc = NULL;
}
}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 2ce4e5bf9292..fdc2abe96d77 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -111,7 +111,15 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
/* unsupported WiFi driver version */
goto default_throughput;
- real_netdev = batadv_get_real_netdev(hard_iface->net_dev);
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+ real_netdev = __batadv_get_real_netdev(hard_iface->net_dev);
+ rtnl_unlock();
if (!real_netdev)
goto default_throughput;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 7b7640f3ffe2..d6732c34aeaf 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -204,7 +204,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
}
/**
- * batadv_get_real_netdevice() - check if the given netdev struct is a virtual
+ * __batadv_get_real_netdev() - check if the given netdev struct is a virtual
* interface on top of another 'real' interface
* @netdev: the device to check
*
@@ -214,7 +214,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
* Return: the 'real' net device or the original net device and NULL in case
* of an error.
*/
-static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
+struct net_device *__batadv_get_real_netdev(struct net_device *netdev)
{
struct batadv_hard_iface *hard_iface = NULL;
struct net_device *real_netdev = NULL;
@@ -267,7 +267,7 @@ struct net_device *batadv_get_real_netdev(struct net_device *net_device)
struct net_device *real_netdev;
rtnl_lock();
- real_netdev = batadv_get_real_netdevice(net_device);
+ real_netdev = __batadv_get_real_netdev(net_device);
rtnl_unlock();
return real_netdev;
@@ -336,7 +336,7 @@ static u32 batadv_wifi_flags_evaluate(struct net_device *net_device)
if (batadv_is_cfg80211_netdev(net_device))
wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
- real_netdev = batadv_get_real_netdevice(net_device);
+ real_netdev = __batadv_get_real_netdev(net_device);
if (!real_netdev)
return wifi_flags;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 9db8a310961e..9ba8fb2bdceb 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -67,6 +67,7 @@ enum batadv_hard_if_bcast {
extern struct notifier_block batadv_hard_if_notifier;
+struct net_device *__batadv_get_real_netdev(struct net_device *net_device);
struct net_device *batadv_get_real_netdev(struct net_device *net_device);
bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface);
bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ee01122f466f..f7502e62dd35 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -74,7 +74,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
- } else if (IS_ENABLED(CONFIG_IPV6) &&
+ } else if (ipv6_mod_enabled() &&
skb->protocol == htons(ETH_P_IPV6) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 1405f1061a54..2cbae0f9ae1f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -170,7 +170,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
(skb->protocol == htons(ETH_P_ARP) ||
skb->protocol == htons(ETH_P_RARP))) {
br_do_proxy_suppress_arp(skb, br, vid, p);
- } else if (IS_ENABLED(CONFIG_IPV6) &&
+ } else if (ipv6_mod_enabled() &&
skb->protocol == htons(ETH_P_IPV6) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b9b2981c4841..9b55d38ea9ed 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1345,6 +1345,16 @@ br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
}
static inline bool
+br_multicast_port_ctx_options_equal(const struct net_bridge_mcast_port *pmctx1,
+ const struct net_bridge_mcast_port *pmctx2)
+{
+ return br_multicast_ngroups_get(pmctx1) ==
+ br_multicast_ngroups_get(pmctx2) &&
+ br_multicast_ngroups_get_max(pmctx1) ==
+ br_multicast_ngroups_get_max(pmctx2);
+}
+
+static inline bool
br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx)
{
bool vlan_snooping_enabled;
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index 8fa89b04ee94..5514e1fc8d1f 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -43,9 +43,29 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
u8 range_mc_rtr = br_vlan_multicast_router(range_end);
u8 curr_mc_rtr = br_vlan_multicast_router(v_curr);
- return v_curr->state == range_end->state &&
- __vlan_tun_can_enter_range(v_curr, range_end) &&
- curr_mc_rtr == range_mc_rtr;
+ if (v_curr->state != range_end->state)
+ return false;
+
+ if (!__vlan_tun_can_enter_range(v_curr, range_end))
+ return false;
+
+ if (curr_mc_rtr != range_mc_rtr)
+ return false;
+
+ /* Check user-visible priv_flags that affect output */
+ if ((v_curr->priv_flags ^ range_end->priv_flags) &
+ (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED | BR_VLFLAG_MCAST_ENABLED))
+ return false;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (!br_vlan_is_master(v_curr) &&
+ !br_multicast_port_ctx_vlan_disabled(&v_curr->port_mcast_ctx) &&
+ !br_multicast_port_ctx_options_equal(&v_curr->port_mcast_ctx,
+ &range_end->port_mcast_ctx))
+ return false;
+#endif
+
+ return true;
}
bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
diff --git a/net/can/bcm.c b/net/can/bcm.c
index b7324e9c955b..fd9fa072881e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1176,6 +1176,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;
+ spin_lock_init(&op->bcm_tx_lock);
op->can_id = msg_head->can_id;
op->nframes = msg_head->nframes;
op->cfsiz = CFSIZ(msg_head->flags);
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 343c841784ce..901b93530b21 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -205,9 +205,9 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
s32 result;
u64 global_id;
void *payload, *payload_end;
- int payload_len;
+ u32 payload_len;
char *result_msg;
- int result_msg_len;
+ u32 result_msg_len;
int ret = -EINVAL;
mutex_lock(&ac->mutex);
@@ -217,10 +217,12 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
result = ceph_decode_32(&p);
global_id = ceph_decode_64(&p);
payload_len = ceph_decode_32(&p);
+ ceph_decode_need(&p, end, payload_len, bad);
payload = p;
p += payload_len;
ceph_decode_need(&p, end, sizeof(u32), bad);
result_msg_len = ceph_decode_32(&p);
+ ceph_decode_need(&p, end, result_msg_len, bad);
result_msg = p;
p += result_msg_len;
if (p != end)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 5ec3272cd2dd..50f65820f623 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -392,7 +392,7 @@ static int head_onwire_len(int ctrl_len, bool secure)
int head_len;
int rem_len;
- BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
+ BUG_ON(ctrl_len < 1 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
if (secure) {
head_len = CEPH_PREAMBLE_SECURE_LEN;
@@ -401,9 +401,7 @@ static int head_onwire_len(int ctrl_len, bool secure)
head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN;
}
} else {
- head_len = CEPH_PREAMBLE_PLAIN_LEN;
- if (ctrl_len)
- head_len += ctrl_len + CEPH_CRC_LEN;
+ head_len = CEPH_PREAMBLE_PLAIN_LEN + ctrl_len + CEPH_CRC_LEN;
}
return head_len;
}
@@ -528,11 +526,16 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc)
desc->fd_aligns[i] = ceph_decode_16(&p);
}
- if (desc->fd_lens[0] < 0 ||
+ /*
+ * This would fire for FRAME_TAG_WAIT (it has one empty
+ * segment), but we should never get it as client.
+ */
+ if (desc->fd_lens[0] < 1 ||
desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
pr_err("bad control segment length %d\n", desc->fd_lens[0]);
return -EINVAL;
}
+
if (desc->fd_lens[1] < 0 ||
desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
pr_err("bad front segment length %d\n", desc->fd_lens[1]);
@@ -549,10 +552,6 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc)
return -EINVAL;
}
- /*
- * This would fire for FRAME_TAG_WAIT (it has one empty
- * segment), but we should never get it as client.
- */
if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
pr_err("last segment empty, segment count %d\n",
desc->fd_seg_cnt);
@@ -2833,12 +2832,15 @@ static int process_message_header(struct ceph_connection *con,
void *p, void *end)
{
struct ceph_frame_desc *desc = &con->v2.in_desc;
- struct ceph_msg_header2 *hdr2 = p;
+ struct ceph_msg_header2 *hdr2;
struct ceph_msg_header hdr;
int skip;
int ret;
u64 seq;
+ ceph_decode_need(&p, end, sizeof(*hdr2), bad);
+ hdr2 = p;
+
/* verify seq# */
seq = le64_to_cpu(hdr2->seq);
if ((s64)seq - (s64)con->in_seq < 1) {
@@ -2869,6 +2871,10 @@ static int process_message_header(struct ceph_connection *con,
WARN_ON(!con->in_msg);
WARN_ON(con->in_msg->con != con);
return 1;
+
+bad:
+ pr_err("failed to decode message header\n");
+ return -EINVAL;
}
static int process_message(struct ceph_connection *con)
@@ -2898,6 +2904,11 @@ static int __handle_control(struct ceph_connection *con, void *p)
if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE)
return process_control(con, p, end);
+ if (con->state != CEPH_CON_S_OPEN) {
+ con->error_msg = "protocol error, unexpected message";
+ return -EINVAL;
+ }
+
ret = process_message_header(con, p, end);
if (ret < 0)
return ret;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 5136b3766c44..d5080530ce0c 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -72,8 +72,8 @@ static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
struct ceph_monmap *monmap = NULL;
struct ceph_fsid fsid;
u32 struct_len;
- int blob_len;
- int num_mon;
+ u32 blob_len;
+ u32 num_mon;
u8 struct_v;
u32 epoch;
int ret;
@@ -112,7 +112,7 @@ static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
}
ceph_decode_32_safe(p, end, num_mon, e_inval);
- dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch,
+ dout("%s fsid %pU epoch %u num_mon %u\n", __func__, &fsid, epoch,
num_mon);
if (num_mon > CEPH_MAX_MON)
goto e_inval;
diff --git a/net/core/dev.c b/net/core/dev.c
index c1a9f7fdcffa..14a83f2035b9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3987,7 +3987,7 @@ static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb,
if (shinfo->nr_frags > 0) {
niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0]));
if (net_is_devmem_iov(niov) &&
- net_devmem_iov_binding(niov)->dev != dev)
+ READ_ONCE(net_devmem_iov_binding(niov)->dev) != dev)
goto out_free;
}
@@ -4818,10 +4818,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
- /* Other cpus might concurrently change txq->xmit_lock_owner
- * to -1 or to their cpu id, but not to our id.
- */
- if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
+ if (!netif_tx_owned(txq, cpu)) {
bool is_list = false;
if (dev_xmit_recursion())
@@ -7794,11 +7791,12 @@ static int napi_thread_wait(struct napi_struct *napi)
return -1;
}
-static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
+static void napi_threaded_poll_loop(struct napi_struct *napi,
+ unsigned long *busy_poll_last_qs)
{
+ unsigned long last_qs = busy_poll_last_qs ? *busy_poll_last_qs : jiffies;
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct softnet_data *sd;
- unsigned long last_qs = jiffies;
for (;;) {
bool repoll = false;
@@ -7827,12 +7825,12 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
/* When busy poll is enabled, the old packets are not flushed in
* napi_complete_done. So flush them here.
*/
- if (busy_poll)
+ if (busy_poll_last_qs)
gro_flush_normal(&napi->gro, HZ >= 1000);
local_bh_enable();
/* Call cond_resched here to avoid watchdog warnings. */
- if (repoll || busy_poll) {
+ if (repoll || busy_poll_last_qs) {
rcu_softirq_qs_periodic(last_qs);
cond_resched();
}
@@ -7840,11 +7838,15 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
if (!repoll)
break;
}
+
+ if (busy_poll_last_qs)
+ *busy_poll_last_qs = last_qs;
}
static int napi_threaded_poll(void *data)
{
struct napi_struct *napi = data;
+ unsigned long last_qs = jiffies;
bool want_busy_poll;
bool in_busy_poll;
unsigned long val;
@@ -7862,7 +7864,7 @@ static int napi_threaded_poll(void *data)
assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state,
want_busy_poll);
- napi_threaded_poll_loop(napi, want_busy_poll);
+ napi_threaded_poll_loop(napi, want_busy_poll ? &last_qs : NULL);
}
return 0;
@@ -13175,7 +13177,7 @@ static void run_backlog_napi(unsigned int cpu)
{
struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
- napi_threaded_poll_loop(&sd->backlog, false);
+ napi_threaded_poll_loop(&sd->backlog, NULL);
}
static void backlog_napi_setup(unsigned int cpu)
diff --git a/net/core/dev.h b/net/core/dev.h
index 98793a738f43..781619e76b3e 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -366,41 +366,6 @@ static inline void napi_assert_will_not_race(const struct napi_struct *napi)
void kick_defer_list_purge(unsigned int cpu);
-#define XMIT_RECURSION_LIMIT 8
-
-#ifndef CONFIG_PREEMPT_RT
-static inline bool dev_xmit_recursion(void)
-{
- return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
- XMIT_RECURSION_LIMIT);
-}
-
-static inline void dev_xmit_recursion_inc(void)
-{
- __this_cpu_inc(softnet_data.xmit.recursion);
-}
-
-static inline void dev_xmit_recursion_dec(void)
-{
- __this_cpu_dec(softnet_data.xmit.recursion);
-}
-#else
-static inline bool dev_xmit_recursion(void)
-{
- return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT);
-}
-
-static inline void dev_xmit_recursion_inc(void)
-{
- current->net_xmit.recursion++;
-}
-
-static inline void dev_xmit_recursion_dec(void)
-{
- current->net_xmit.recursion--;
-}
-#endif
-
int dev_set_hwtstamp_phylib(struct net_device *dev,
struct kernel_hwtstamp_config *cfg,
struct netlink_ext_ack *extack);
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 8c9aad776bf4..69d79aee07ef 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -396,7 +396,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
* net_device.
*/
dst_dev = dst_dev_rcu(dst);
- if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) {
+ if (unlikely(!dst_dev) ||
+ unlikely(dst_dev != READ_ONCE(binding->dev))) {
err = -ENODEV;
goto out_unlock;
}
@@ -513,7 +514,8 @@ static void mp_dmabuf_devmem_uninstall(void *mp_priv,
xa_erase(&binding->bound_rxqs, xa_idx);
if (xa_empty(&binding->bound_rxqs)) {
mutex_lock(&binding->lock);
- binding->dev = NULL;
+ ASSERT_EXCLUSIVE_WRITER(binding->dev);
+ WRITE_ONCE(binding->dev, NULL);
mutex_unlock(&binding->lock);
}
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 0d5d5a17acb2..78b548158fb0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2228,6 +2228,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
return -ENOMEM;
}
+ if (unlikely(!ipv6_mod_enabled()))
+ goto out_drop;
+
rcu_read_lock();
if (!nh) {
dst = skb_dst(skb);
@@ -2335,6 +2338,10 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
} else if (nh->nh_family == AF_INET6) {
+ if (unlikely(!ipv6_mod_enabled())) {
+ rcu_read_unlock();
+ goto out_drop;
+ }
neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
is_v6gw = true;
} else if (nh->nh_family == AF_INET) {
@@ -4150,12 +4157,14 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
struct xdp_rxq_info *rxq = xdp->rxq;
- unsigned int tailroom;
+ int tailroom;
if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
return -EOPNOTSUPP;
- tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+ tailroom = rxq->frag_size - skb_frag_size(frag) -
+ skb_frag_off(frag) % rxq->frag_size;
+ WARN_ON_ONCE(tailroom < 0);
if (unlikely(offset > tailroom))
return -EINVAL;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a95cfe77f7f0..c56a4e7bf790 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -820,7 +820,8 @@ int pneigh_create(struct neigh_table *tbl, struct net *net,
update:
WRITE_ONCE(n->flags, flags);
n->permanent = permanent;
- WRITE_ONCE(n->protocol, protocol);
+ if (protocol)
+ WRITE_ONCE(n->protocol, protocol);
out:
mutex_unlock(&tbl->phash_lock);
return err;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a8558a52884f..cd74beffd209 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -132,7 +132,7 @@ static int netif_local_xmit_active(struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
+ if (netif_tx_owned(txq, smp_processor_id()))
return 1;
}
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index c82a95beceff..ee5060d8eec0 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -245,7 +245,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
goto err_cancel;
if (pool->user.detach_time &&
nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
- pool->user.detach_time))
+ ktime_divns(pool->user.detach_time, NSEC_PER_SEC)))
goto err_cancel;
if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL))
@@ -337,7 +337,7 @@ err_unlock:
void page_pool_detached(struct page_pool *pool)
{
mutex_lock(&page_pools_lock);
- pool->user.detach_time = ktime_get_boottime_seconds();
+ pool->user.detach_time = ktime_get_boottime();
netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
mutex_unlock(&page_pools_lock);
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9a3965680451..6a6f2cda5aae 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,7 +20,6 @@
#include <net/tcp.h>
static siphash_aligned_key_t net_secret;
-static siphash_aligned_key_t ts_secret;
#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
@@ -28,11 +27,6 @@ static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
}
-
-static __always_inline void ts_secret_init(void)
-{
- net_get_random_once(&ts_secret, sizeof(ts_secret));
-}
#endif
#ifdef CONFIG_INET
@@ -53,28 +47,9 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
-u32 secure_tcpv6_ts_off(const struct net *net,
- const __be32 *saddr, const __be32 *daddr)
-{
- const struct {
- struct in6_addr saddr;
- struct in6_addr daddr;
- } __aligned(SIPHASH_ALIGNMENT) combined = {
- .saddr = *(struct in6_addr *)saddr,
- .daddr = *(struct in6_addr *)daddr,
- };
-
- if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
- return 0;
-
- ts_secret_init();
- return siphash(&combined, offsetofend(typeof(combined), daddr),
- &ts_secret);
-}
-EXPORT_IPV6_MOD(secure_tcpv6_ts_off);
-
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport)
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+ const __be32 *daddr, __be16 sport, __be16 dport)
{
const struct {
struct in6_addr saddr;
@@ -87,14 +62,20 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
.sport = sport,
.dport = dport
};
- u32 hash;
+ union tcp_seq_and_ts_off st;
net_secret_init();
- hash = siphash(&combined, offsetofend(typeof(combined), dport),
- &net_secret);
- return seq_scale(hash);
+
+ st.hash64 = siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
+
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+ st.ts_off = 0;
+
+ st.seq = seq_scale(st.seq);
+ return st;
}
-EXPORT_SYMBOL(secure_tcpv6_seq);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_ts_off);
u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
@@ -118,33 +99,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
-{
- if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
- return 0;
-
- ts_secret_init();
- return siphash_2u32((__force u32)saddr, (__force u32)daddr,
- &ts_secret);
-}
-
/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
* it would be easy enough to have the former function use siphash_4u32, passing
* the arguments as separate u32.
*/
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport)
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport)
{
- u32 hash;
+ u32 ports = (__force u32)sport << 16 | (__force u32)dport;
+ union tcp_seq_and_ts_off st;
net_secret_init();
- hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u32)sport << 16 | (__force u32)dport,
- &net_secret);
- return seq_scale(hash);
+
+ st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ ports, &net_secret);
+
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+ st.ts_off = 0;
+
+ st.seq = seq_scale(st.seq);
+ return st;
}
-EXPORT_SYMBOL_GPL(secure_tcp_seq);
+EXPORT_SYMBOL_GPL(secure_tcp_seq_and_ts_off);
u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2e26174c9919..3261793abe83 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1205,8 +1205,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
return;
psock->saved_data_ready = sk->sk_data_ready;
- sk->sk_data_ready = sk_psock_strp_data_ready;
- sk->sk_write_space = sk_psock_write_space;
+ WRITE_ONCE(sk->sk_data_ready, sk_psock_strp_data_ready);
+ WRITE_ONCE(sk->sk_write_space, sk_psock_write_space);
}
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
@@ -1216,8 +1216,8 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
if (!psock->saved_data_ready)
return;
- sk->sk_data_ready = psock->saved_data_ready;
- psock->saved_data_ready = NULL;
+ WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready);
+ WRITE_ONCE(psock->saved_data_ready, NULL);
strp_stop(&psock->strp);
}
@@ -1296,8 +1296,8 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
return;
psock->saved_data_ready = sk->sk_data_ready;
- sk->sk_data_ready = sk_psock_verdict_data_ready;
- sk->sk_write_space = sk_psock_write_space;
+ WRITE_ONCE(sk->sk_data_ready, sk_psock_verdict_data_ready);
+ WRITE_ONCE(sk->sk_write_space, sk_psock_write_space);
}
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
@@ -1308,6 +1308,6 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
if (!psock->saved_data_ready)
return;
- sk->sk_data_ready = psock->saved_data_ready;
+ WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready);
psock->saved_data_ready = NULL;
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b71c22475c51..df922f9f5289 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -748,6 +748,7 @@ config TCP_SIGPOOL
config TCP_AO
bool "TCP: Authentication Option (RFC5925)"
select CRYPTO
+ select CRYPTO_LIB_UTILS
select TCP_SIGPOOL
depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64)
help
@@ -761,6 +762,7 @@ config TCP_AO
config TCP_MD5SIG
bool "TCP: MD5 Signature Option support (RFC2385)"
select CRYPTO_LIB_MD5
+ select CRYPTO_LIB_UTILS
help
RFC2385 specifies a method of giving MD5 protection to TCP sessions.
Its main (only?) use is to protect BGP sessions between core routers
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8036e76aa1e4..c7731e300a44 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -124,6 +124,12 @@
#include <trace/events/sock.h>
+/* Keep the definition of IPv6 disable here for now, to avoid annoying linker
+ * issues in case IPv6=m
+ */
+int disable_ipv6_mod;
+EXPORT_SYMBOL(disable_ipv6_mod);
+
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
*/
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fca980772c81..9bfccc283fa6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -200,7 +200,7 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2,
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
struct inet_bind2_bucket *tb2, unsigned short port)
{
- inet_sk(sk)->inet_num = port;
+ WRITE_ONCE(inet_sk(sk)->inet_num, port);
inet_csk(sk)->icsk_bind_hash = tb;
inet_csk(sk)->icsk_bind2_hash = tb2;
sk_add_bind_node(sk, &tb2->owners);
@@ -224,7 +224,7 @@ static void __inet_put_port(struct sock *sk)
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
inet_csk(sk)->icsk_bind_hash = NULL;
- inet_sk(sk)->inet_num = 0;
+ WRITE_ONCE(inet_sk(sk)->inet_num, 0);
sk->sk_userlocks &= ~SOCK_CONNECT_BIND;
spin_lock(&head2->lock);
@@ -352,7 +352,7 @@ static inline int compute_score(struct sock *sk, const struct net *net,
{
int score = -1;
- if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
+ if (net_eq(sock_net(sk), net) && READ_ONCE(sk->sk_num) == hnum &&
!ipv6_only_sock(sk)) {
if (sk->sk_rcv_saddr != daddr)
return -1;
@@ -1206,7 +1206,7 @@ error:
sk->sk_hash = 0;
inet_sk(sk)->inet_sport = 0;
- inet_sk(sk)->inet_num = 0;
+ WRITE_ONCE(inet_sk(sk)->inet_num, 0);
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 2e61ac137128..5683c328990f 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -58,6 +58,19 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
struct iphdr *iph;
int err;
+ if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) {
+ if (dev) {
+ net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
+ dev->name);
+ DEV_STATS_INC(dev, tx_errors);
+ }
+ ip_rt_put(rt);
+ kfree_skb(skb);
+ return;
+ }
+
+ dev_xmit_recursion_inc();
+
skb_scrub_packet(skb, xnet);
skb_clear_hash_if_not_l4(skb);
@@ -88,6 +101,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
pkt_len = 0;
iptunnel_xmit_stats(dev, pkt_len);
}
+
+ dev_xmit_recursion_dec();
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 1aa2b05ee8de..c942f1282236 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -2002,7 +2002,8 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg)
}
static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
- struct nl_info *nlinfo)
+ struct nl_info *nlinfo,
+ struct list_head *deferred_free)
{
struct nh_grp_entry *nhges, *new_nhges;
struct nexthop *nhp = nhge->nh_parent;
@@ -2062,8 +2063,8 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
rcu_assign_pointer(nhp->nh_grp, newg);
list_del(&nhge->nh_list);
- free_percpu(nhge->stats);
nexthop_put(nhge->nh);
+ list_add(&nhge->nh_list, deferred_free);
/* Removal of a NH from a resilient group is notified through
* bucket notifications.
@@ -2083,6 +2084,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
struct nh_grp_entry *nhge, *tmp;
+ LIST_HEAD(deferred_free);
/* If there is nothing to do, let's avoid the costly call to
* synchronize_net()
@@ -2091,10 +2093,16 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
return;
list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
- remove_nh_grp_entry(net, nhge, nlinfo);
+ remove_nh_grp_entry(net, nhge, nlinfo, &deferred_free);
/* make sure all see the newly published array before releasing rtnl */
synchronize_net();
+
+ /* Now safe to free percpu stats — all RCU readers have finished */
+ list_for_each_entry_safe(nhge, tmp, &deferred_free, nh_list) {
+ list_del(&nhge->nh_list);
+ free_percpu(nhge->stats);
+ }
}
static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 061751aabc8e..fc3affd9c801 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -378,9 +378,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcp_ts_off(net,
- ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr);
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcp_seq_and_ts_off(net,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
+ tsoff = st.ts_off;
tcp_opt.rcv_tsecr -= tsoff;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 643763bc2142..5654cc9c8a0b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -486,7 +486,8 @@ static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
proc_fib_multipath_hash_rand_seed),
};
- WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.user_seed, new.user_seed);
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed, new.mp_seed);
}
static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write,
@@ -500,7 +501,7 @@ static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write
int ret;
mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
- user_seed = mphs->user_seed;
+ user_seed = READ_ONCE(mphs->user_seed);
tmp = *table;
tmp.data = &user_seed;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f84d9a45cc9d..202a4e57a218 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -244,6 +244,7 @@
#define pr_fmt(fmt) "TCP: " fmt
#include <crypto/md5.h>
+#include <crypto/utils.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -1446,7 +1447,7 @@ out_err:
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
- sk->sk_write_space(sk);
+ READ_ONCE(sk->sk_write_space)(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
if (binding)
@@ -4181,7 +4182,7 @@ ao_parse:
break;
case TCP_NOTSENT_LOWAT:
WRITE_ONCE(tp->notsent_lowat, val);
- sk->sk_write_space(sk);
+ READ_ONCE(sk->sk_write_space)(sk);
break;
case TCP_INQ:
if (val > 1 || val < 0)
@@ -4970,7 +4971,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
else
tp->af_specific->calc_md5_hash(newhash, key, NULL, skb);
- if (memcmp(hash_location, newhash, 16) != 0) {
+ if (crypto_memneq(hash_location, newhash, 16)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
trace_tcp_hash_md5_mismatch(sk, skb);
return SKB_DROP_REASON_TCP_MD5FAILURE;
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 4980caddb0fc..a97cdf3e6af4 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "TCP: " fmt
#include <crypto/hash.h>
+#include <crypto/utils.h>
#include <linux/inetdevice.h>
#include <linux/tcp.h>
@@ -922,7 +923,7 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb,
/* XXX: make it per-AF callback? */
tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key,
(phash - (u8 *)th), sne);
- if (memcmp(phash, hash_buf, maclen)) {
+ if (crypto_memneq(phash, hash_buf, maclen)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
atomic64_inc(&info->counters.pkt_bad);
atomic64_inc(&key->pkt_bad);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index c449a044895e..813d2e498c93 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -725,7 +725,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash);
tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
} else {
- sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_write_space, psock->saved_write_space);
/* Pairs with lockless read in sk_clone_lock() */
sock_replace_proto(sk, psock->sk_proto);
}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index d83efd91f461..7935702e394b 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -509,7 +509,7 @@ next_chunk:
if (r->sdiag_family != AF_UNSPEC &&
sk->sk_family != r->sdiag_family)
goto next_normal;
- if (r->id.idiag_sport != htons(sk->sk_num) &&
+ if (r->id.idiag_sport != htons(READ_ONCE(sk->sk_num)) &&
r->id.idiag_sport)
goto next_normal;
if (r->id.idiag_dport != sk->sk_dport &&
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 41c57efd125c..cba89733d121 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5374,25 +5374,11 @@ static void tcp_ofo_queue(struct sock *sk)
static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
-/* Check if this incoming skb can be added to socket receive queues
- * while satisfying sk->sk_rcvbuf limit.
- *
- * In theory we should use skb->truesize, but this can cause problems
- * when applications use too small SO_RCVBUF values.
- * When LRO / hw gro is used, the socket might have a high tp->scaling_ratio,
- * allowing RWIN to be close to available space.
- * Whenever the receive queue gets full, we can receive a small packet
- * filling RWIN, but with a high skb->truesize, because most NIC use 4K page
- * plus sk_buff metadata even when receiving less than 1500 bytes of payload.
- *
- * Note that we use skb->len to decide to accept or drop this packet,
- * but sk->sk_rmem_alloc is the sum of all skb->truesize.
- */
static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb)
{
unsigned int rmem = atomic_read(&sk->sk_rmem_alloc);
- return rmem + skb->len <= sk->sk_rcvbuf;
+ return rmem <= sk->sk_rcvbuf;
}
static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb,
@@ -5425,7 +5411,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
return;
}
@@ -5635,7 +5621,7 @@ err:
void tcp_data_ready(struct sock *sk)
{
if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE))
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5691,7 +5677,7 @@ queue_and_out:
inet_csk(sk)->icsk_ack.pending |=
(ICSK_ACK_NOMEM | ICSK_ACK_NOW);
inet_csk_schedule_ack(sk);
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
if (skb_queue_len(&sk->sk_receive_queue) && skb->len) {
reason = SKB_DROP_REASON_PROTO_MEM;
@@ -6114,7 +6100,9 @@ static void tcp_new_space(struct sock *sk)
tp->snd_cwnd_stamp = tcp_jiffies32;
}
- INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
+ INDIRECT_CALL_1(READ_ONCE(sk->sk_write_space),
+ sk_stream_write_space,
+ sk);
}
/* Caller made space either from:
@@ -6325,7 +6313,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
BUG();
WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
}
}
}
@@ -7658,6 +7646,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
+ union tcp_seq_and_ts_off st;
struct request_sock *req;
bool want_cookie = false;
struct dst_entry *dst;
@@ -7727,9 +7716,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (!dst)
goto drop_and_free;
+ if (tmp_opt.tstamp_ok || (!want_cookie && !isn))
+ st = af_ops->init_seq_and_ts_off(net, skb);
+
if (tmp_opt.tstamp_ok) {
tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
- tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
+ tcp_rsk(req)->ts_off = st.ts_off;
}
if (!want_cookie && !isn) {
int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
@@ -7751,7 +7743,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_release;
}
- isn = af_ops->init_seq(skb);
+ isn = st.seq;
}
tcp_ecn_create_request(req, skb, sk, dst);
@@ -7792,7 +7784,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
sock_put(fastopen_sk);
goto drop_and_free;
}
- sk->sk_data_ready(sk);
+ READ_ONCE(sk->sk_data_ready)(sk);
bh_unlock_sock(fastopen_sk);
sock_put(fastopen_sk);
} else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d53d39be291a..c7b2463c2e25 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,6 +88,7 @@
#include <linux/skbuff_ref.h>
#include <crypto/md5.h>
+#include <crypto/utils.h>
#include <trace/events/tcp.h>
@@ -104,17 +105,14 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
static DEFINE_MUTEX(tcp_exit_batch_mutex);
-static u32 tcp_v4_init_seq(const struct sk_buff *skb)
+static union tcp_seq_and_ts_off
+tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
{
- return secure_tcp_seq(ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source);
-}
-
-static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
-{
- return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+ return secure_tcp_seq_and_ts_off(net,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -326,15 +324,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len
rt = NULL;
if (likely(!tp->repair)) {
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcp_seq_and_ts_off(net,
+ inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port);
if (!tp->write_seq)
- WRITE_ONCE(tp->write_seq,
- secure_tcp_seq(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port));
- WRITE_ONCE(tp->tsoffset,
- secure_tcp_ts_off(net, inet->inet_saddr,
- inet->inet_daddr));
+ WRITE_ONCE(tp->write_seq, st.seq);
+ WRITE_ONCE(tp->tsoffset, st.ts_off);
}
atomic_set(&inet->inet_id, get_random_u16());
@@ -839,7 +838,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
goto out;
tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
- if (memcmp(md5_hash_location, newhash, 16) != 0)
+ if (crypto_memneq(md5_hash_location, newhash, 16))
goto out;
}
@@ -1676,8 +1675,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.cookie_init_seq = cookie_v4_init_sequence,
#endif
.route_req = tcp_v4_route_req,
- .init_seq = tcp_v4_init_seq,
- .init_ts_off = tcp_v4_init_ts_off,
+ .init_seq_and_ts_off = tcp_v4_init_seq_and_ts_off,
.send_synack = tcp_v4_send_synack,
};
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d9c5a43bd281..dafb63b923d0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -1004,7 +1004,7 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
reason = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
- parent->sk_data_ready(parent);
+ READ_ONCE(parent->sk_data_ready)(parent);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6c6b68a66dcd..b60fad393e18 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1787,7 +1787,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
* using prepare_to_wait_exclusive().
*/
while (nb) {
- INDIRECT_CALL_1(sk->sk_data_ready,
+ INDIRECT_CALL_1(READ_ONCE(sk->sk_data_ready),
sock_def_readable, sk);
nb--;
}
@@ -2287,7 +2287,6 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
udp_sk(sk)->udp_port_hash);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
nhslot2 = udp_hashslot2(udptable, newhash);
- udp_sk(sk)->udp_portaddr_hash = newhash;
if (hslot2 != nhslot2 ||
rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -2321,19 +2320,25 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
if (udp_hashed4(sk)) {
spin_lock_bh(&hslot->lock);
- udp_rehash4(udptable, sk, newhash4);
- if (hslot2 != nhslot2) {
- spin_lock(&hslot2->lock);
- udp_hash4_dec(hslot2);
- spin_unlock(&hslot2->lock);
-
- spin_lock(&nhslot2->lock);
- udp_hash4_inc(nhslot2);
- spin_unlock(&nhslot2->lock);
+ if (inet_rcv_saddr_any(sk)) {
+ udp_unhash4(udptable, sk);
+ } else {
+ udp_rehash4(udptable, sk, newhash4);
+ if (hslot2 != nhslot2) {
+ spin_lock(&hslot2->lock);
+ udp_hash4_dec(hslot2);
+ spin_unlock(&hslot2->lock);
+
+ spin_lock(&nhslot2->lock);
+ udp_hash4_inc(nhslot2);
+ spin_unlock(&nhslot2->lock);
+ }
}
spin_unlock_bh(&hslot->lock);
}
+
+ udp_sk(sk)->udp_portaddr_hash = newhash;
}
}
EXPORT_IPV6_MOD(udp_lib_rehash);
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 91233e37cd97..779a3a03762f 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -158,7 +158,7 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
if (restore) {
- sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_write_space, psock->saved_write_space);
sock_replace_proto(sk, psock->sk_proto);
return 0;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 23cc9b4cb2f1..4cbd45b68088 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -86,8 +86,6 @@ struct ipv6_params ipv6_defaults = {
.autoconf = 1,
};
-static int disable_ipv6_mod;
-
module_param_named(disable, disable_ipv6_mod, int, 0444);
MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
@@ -97,12 +95,6 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
-bool ipv6_mod_enabled(void)
-{
- return disable_ipv6_mod == 0;
-}
-EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-
static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
const int offset = sk->sk_prot->ipv6_pinfo_offset;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 5e1da088d8e1..182d38e6d6d8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -95,7 +95,8 @@ static inline int compute_score(struct sock *sk, const struct net *net,
{
int score = -1;
- if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
+ if (net_eq(sock_net(sk), net) &&
+ READ_ONCE(inet_sk(sk)->inet_num) == hnum &&
sk->sk_family == PF_INET6) {
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 85df25c36409..08cd86f49bf9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1063,7 +1063,8 @@ static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
*/
if (netif_is_l3_slave(dev) &&
!rt6_need_strict(&res->f6i->fib6_dst.addr))
- dev = l3mdev_master_dev_rcu(dev);
+ dev = l3mdev_master_dev_rcu(dev) ? :
+ dev_net(dev)->loopback_dev;
else if (!netif_is_l3_master(dev))
dev = dev_net(dev)->loopback_dev;
/* last case is netif_is_l3_master(dev) is true in which
@@ -3582,7 +3583,6 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
- int addr_type;
int err;
fib6_nh->fib_nh_family = AF_INET6;
@@ -3624,11 +3624,10 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
fib6_nh->fib_nh_weight = 1;
- /* We cannot add true routes via loopback here,
- * they would result in kernel looping; promote them to reject routes
+ /* Reset the nexthop device to the loopback device in case of reject
+ * routes.
*/
- addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
+ if (cfg->fc_flags & RTF_REJECT) {
/* hold loopback dev/idev if we haven't done so. */
if (dev != net->loopback_dev) {
if (dev) {
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7e007f013ec8..4f6f0d751d6c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -151,9 +151,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcpv6_ts_off(net,
- ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32);
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcpv6_seq_and_ts_off(net,
+ ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
+ tsoff = st.ts_off;
tcp_opt.rcv_tsecr -= tsoff;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e46a0efae012..bb09d5ccf599 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -68,6 +68,7 @@
#include <linux/seq_file.h>
#include <crypto/md5.h>
+#include <crypto/utils.h>
#include <trace/events/tcp.h>
@@ -104,18 +105,14 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
}
}
-static u32 tcp_v6_init_seq(const struct sk_buff *skb)
+static union tcp_seq_and_ts_off
+tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
{
- return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source);
-}
-
-static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
-{
- return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32);
+ return secure_tcpv6_seq_and_ts_off(net,
+ ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
@@ -319,14 +316,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
sk_set_txhash(sk);
if (likely(!tp->repair)) {
+ union tcp_seq_and_ts_off st;
+
+ st = secure_tcpv6_seq_and_ts_off(net,
+ np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport);
if (!tp->write_seq)
- WRITE_ONCE(tp->write_seq,
- secure_tcpv6_seq(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport));
- tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32);
+ WRITE_ONCE(tp->write_seq, st.seq);
+ tp->tsoffset = st.ts_off;
}
if (tcp_fastopen_defer_connect(sk, &err))
@@ -816,8 +815,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.cookie_init_seq = cookie_v6_init_sequence,
#endif
.route_req = tcp_v6_route_req,
- .init_seq = tcp_v6_init_seq,
- .init_ts_off = tcp_v6_init_ts_off,
+ .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off,
.send_synack = tcp_v6_send_synack,
};
@@ -1048,7 +1046,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
key.type = TCP_KEY_MD5;
tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
- if (memcmp(md5_hash_location, newhash, 16) != 0)
+ if (crypto_memneq(md5_hash_location, newhash, 16))
goto out;
}
#endif
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
index 75096b2195d2..078e1e23d8d1 100644
--- a/net/mac80211/eht.c
+++ b/net/mac80211/eht.c
@@ -154,6 +154,7 @@ void ieee80211_rx_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata,
u8 *ptr = mgmt->u.action.u.eml_omn.variable;
struct ieee80211_eml_params eml_params = {
.link_id = status->link_id,
+ .control = control,
};
struct sta_info *sta;
int opt_len = 0;
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 0381377ab760..59ad60b88563 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -359,6 +359,7 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
{
struct mctp_sk_key *key;
struct mctp_flow *flow;
+ unsigned long flags;
flow = skb_ext_find(skb, SKB_EXT_MCTP);
if (!flow)
@@ -366,12 +367,14 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
key = flow->key;
- if (key->dev) {
+ spin_lock_irqsave(&key->lock, flags);
+
+ if (!key->dev)
+ mctp_dev_set_key(dev, key);
+ else
WARN_ON(key->dev != dev);
- return;
- }
- mctp_dev_set_key(dev, key);
+ spin_unlock_irqrestore(&key->lock, flags);
}
#else
static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 7298836469b3..57a456690406 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
}
-void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
+static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow,
+ const struct mptcp_rm_list *rm_list)
+{
+ u8 i, id = subflow_get_local_id(subflow);
+
+ for (i = 0; i < rm_list->nr; i++) {
+ if (rm_list->ids[i] == id)
+ return true;
+ }
+
+ return false;
+}
+
+static void
+mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list)
{
- struct mptcp_subflow_context *subflow, *alt = NULL;
+ struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@@ -224,19 +239,35 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
return;
mptcp_for_each_subflow(msk, subflow) {
- if (__mptcp_subflow_active(subflow)) {
- if (!subflow->stale) {
- mptcp_pm_send_ack(msk, subflow, false, false);
- return;
- }
+ if (!__mptcp_subflow_active(subflow))
+ continue;
- if (!alt)
- alt = subflow;
+ if (unlikely(subflow->stale)) {
+ if (!stale)
+ stale = subflow;
+ } else if (unlikely(rm_list &&
+ subflow_in_rm_list(subflow, rm_list))) {
+ if (!same_id)
+ same_id = subflow;
+ } else {
+ goto send_ack;
}
}
- if (alt)
- mptcp_pm_send_ack(msk, alt, false, false);
+ if (same_id)
+ subflow = same_id;
+ else if (stale)
+ subflow = stale;
+ else
+ return;
+
+send_ack:
+ mptcp_pm_send_ack(msk, subflow, false, false);
+}
+
+void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
+{
+ mptcp_pm_addr_send_ack_avoid_list(msk, NULL);
}
int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -470,7 +501,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
msk->pm.rm_list_tx = *rm_list;
rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
WRITE_ONCE(msk->pm.addr_signal, rm_addr);
- mptcp_pm_addr_send_ack(msk);
+ mptcp_pm_addr_send_ack_avoid_list(msk, rm_list);
return 0;
}
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index b5316a6c7d1b..b2b9df43960e 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -418,6 +418,15 @@ subflow:
}
exit:
+ /* If an endpoint has both the signal and subflow flags, but it is not
+ * possible to create subflows -- the 'while' loop body above never
+ * executed -- then still mark the endp as used, which is somehow the
+ * case. This avoids issues later when removing the endpoint and calling
+ * __mark_subflow_endp_available(), which expects the increment here.
+ */
+ if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id)
+ msk->pm.local_addr_used++;
+
mptcp_pm_nl_check_work_pending(msk);
}
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 62fb1031763d..040a31557201 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -224,7 +224,8 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
if (!nah) {
netdev_warn(ndp->ndev.dev, "Invalid AEN (0x%x) received\n",
h->type);
- return -ENOENT;
+ ret = -ENOENT;
+ goto out;
}
ret = ncsi_validate_aen_pkt(h, nah->payload);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 271ec6c3929e..fbd84bc8026a 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -1176,8 +1176,10 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
/* Find the NCSI device */
nd = ncsi_find_dev(orig_dev);
ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
- if (!ndp)
- return -ENODEV;
+ if (!ndp) {
+ ret = -ENODEV;
+ goto err_free_skb;
+ }
/* Check if it is AEN packet */
hdr = (struct ncsi_pkt_hdr *)skb_network_header(skb);
@@ -1199,7 +1201,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
if (!nrh) {
netdev_err(nd->dev, "Received unrecognized packet (0x%x)\n",
hdr->type);
- return -ENOENT;
+ ret = -ENOENT;
+ goto err_free_skb;
}
/* Associate with the request */
@@ -1207,7 +1210,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
nr = &ndp->requests[hdr->id];
if (!nr->used) {
spin_unlock_irqrestore(&ndp->lock, flags);
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_free_skb;
}
nr->rsp = skb;
@@ -1261,4 +1265,8 @@ out_netlink:
out:
ncsi_free_request(nr);
return ret;
+
+err_free_skb:
+ kfree_skb(skb);
+ return ret;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fd7f7e4e2a43..dacec5f8a11c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -829,10 +829,14 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
nft_set_elem_change_active(ctx->net, set, ext);
nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
- break;
}
}
+/* Use NFT_ITER_UPDATE iterator even if this may be called from the preparation
+ * phase, the set clone might already exist from a previous command, or it might
+ * be a set that is going away and does not require a clone. The netns and
+ * netlink release paths also need to work on the live set.
+ */
static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
@@ -5868,7 +5872,6 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
nft_clear(ctx->net, ext);
nft_setelem_data_activate(ctx->net, set, catchall->elem);
- break;
}
}
@@ -7170,6 +7173,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
+ bool set_full = false;
u64 expiration;
u64 timeout;
int err, i;
@@ -7461,10 +7465,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_elem_free;
+ if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+ unsigned int max = nft_set_maxsize(set), nelems;
+
+ nelems = atomic_inc_return(&set->nelems);
+ if (nelems > max)
+ set_full = true;
+ }
+
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
- goto err_elem_free;
+ goto err_set_size;
}
ext->genmask = nft_genmask_cur(ctx->net);
@@ -7516,7 +7528,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ue->priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans);
- goto err_elem_free;
+ goto err_set_size;
}
}
}
@@ -7534,23 +7546,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_element_clash;
}
- if (!(flags & NFT_SET_ELEM_CATCHALL)) {
- unsigned int max = nft_set_maxsize(set);
-
- if (!atomic_add_unless(&set->nelems, 1, max)) {
- err = -ENFILE;
- goto err_set_full;
- }
- }
-
nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
nft_trans_commit_list_add_elem(ctx->net, trans);
- return 0;
-err_set_full:
- nft_setelem_remove(ctx->net, set, elem.priv);
+ return set_full ? -ENFILE : 0;
+
err_element_clash:
kfree(trans);
+err_set_size:
+ if (!(flags & NFT_SET_ELEM_CATCHALL))
+ atomic_dec(&set->nelems);
err_elem_free:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
err_parse_data:
@@ -7901,9 +7906,12 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
{
+ /* The set backend might need to clone the set, do it now from the
+ * preparation phase, use NFT_ITER_UPDATE_CLONE iterator type.
+ */
struct nft_set_iter iter = {
.genmask = genmask,
- .type = NFT_ITER_UPDATE,
+ .type = NFT_ITER_UPDATE_CLONE,
.fn = nft_setelem_flush,
};
@@ -9678,7 +9686,7 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev,
break;
case NETDEV_REGISTER:
/* NOP if not matching or already registered */
- if (!match || (changename && ops))
+ if (!match || ops)
continue;
ops = kzalloc_obj(struct nf_hook_ops,
@@ -10483,11 +10491,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
schedule_work(&trans_gc_work);
}
-static int nft_trans_gc_space(struct nft_trans_gc *trans)
-{
- return NFT_TRANS_GC_BATCHCOUNT - trans->count;
-}
-
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index d658b1478fa0..d545fa459455 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -601,10 +601,10 @@ restart:
goto out;
}
}
- }
- if (cb->args[1]) {
- cb->args[1] = 0;
- goto restart;
+ if (cb->args[1]) {
+ cb->args[1] = 0;
+ goto restart;
+ }
}
out:
rcu_read_unlock();
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7f5248b5f1ee..47f7f62906e2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1546,8 +1546,10 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
if (entry->state.pf == PF_BRIDGE) {
err = nfqa_parse_bridge(entry, nfqa);
- if (err < 0)
+ if (err < 0) {
+ nfqnl_reinject(entry, NF_DROP);
return err;
+ }
}
if (nfqa[NFQA_PAYLOAD]) {
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index b16185e9a6dd..041426e3bdbf 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -344,7 +344,7 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev,
break;
case NETDEV_REGISTER:
/* NOP if not matching or already registered */
- if (!match || (changename && ops))
+ if (!match || ops)
continue;
ops = kmemdup(&basechain->ops,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 739b992bde59..b0e571c8e3f3 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -374,6 +374,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
{
switch (iter->type) {
case NFT_ITER_UPDATE:
+ case NFT_ITER_UPDATE_CLONE:
/* only relevant for netlink dumps which use READ type */
WARN_ON_ONCE(iter->skip != 0);
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 7ef4b44471d3..7fd24e0cc428 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1640,6 +1640,7 @@ static void pipapo_drop(struct nft_pipapo_match *m,
int i;
nft_pipapo_for_each_field(f, i, m) {
+ bool last = i == m->field_count - 1;
int g;
for (g = 0; g < f->groups; g++) {
@@ -1659,7 +1660,7 @@ static void pipapo_drop(struct nft_pipapo_match *m,
}
pipapo_unmap(f->mt, f->rules, rulemap[i].to, rulemap[i].n,
- rulemap[i + 1].n, i == m->field_count - 1);
+ last ? 0 : rulemap[i + 1].n, last);
if (pipapo_resize(f, f->rules, f->rules - rulemap[i].n)) {
/* We can ignore this, a failure to shrink tables down
* doesn't make tables invalid.
@@ -1680,11 +1681,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
}
/**
- * pipapo_gc() - Drop expired entries from set, destroy start and end elements
+ * pipapo_gc_scan() - Drop expired entries from set and link them to gc list
* @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
@@ -1697,6 +1698,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
if (!gc)
return;
+ list_add(&gc->list, &priv->gc_head);
+
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const struct nft_pipapo_field *f;
@@ -1724,9 +1727,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
- if (!gc)
- return;
+ if (!nft_trans_gc_space(gc)) {
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
+
+ list_add(&gc->list, &priv->gc_head);
+ }
nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
@@ -1740,10 +1747,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
}
}
- gc = nft_trans_gc_catchall_sync(gc);
+ priv->last_gc = jiffies;
+}
+
+/**
+ * pipapo_gc_queue() - Free expired elements
+ * @set: nftables API set representation
+ */
+static void pipapo_gc_queue(struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_trans_gc *gc, *next;
+
+ /* always do a catchall cycle: */
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (gc) {
+ gc = nft_trans_gc_catchall_sync(gc);
+ if (gc)
+ nft_trans_gc_queue_sync_done(gc);
+ }
+
+ /* always purge queued gc elements. */
+ list_for_each_entry_safe(gc, next, &priv->gc_head, list) {
+ list_del(&gc->list);
nft_trans_gc_queue_sync_done(gc);
- priv->last_gc = jiffies;
}
}
@@ -1797,6 +1824,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
*
* We also need to create a new working copy for subsequent insertions and
* deletions.
+ *
+ * After the live copy has been replaced by the clone, we can safely queue
+ * expired elements that have been collected by pipapo_gc_scan() for
+ * memory reclaim.
*/
static void nft_pipapo_commit(struct nft_set *set)
{
@@ -1807,7 +1838,7 @@ static void nft_pipapo_commit(struct nft_set *set)
return;
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ pipapo_gc_scan(set, priv->clone);
old = rcu_replace_pointer(priv->match, priv->clone,
nft_pipapo_transaction_mutex_held(set));
@@ -1815,6 +1846,8 @@ static void nft_pipapo_commit(struct nft_set *set)
if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
+
+ pipapo_gc_queue(set);
}
static void nft_pipapo_abort(const struct nft_set *set)
@@ -2144,13 +2177,20 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_pipapo_match *m;
switch (iter->type) {
- case NFT_ITER_UPDATE:
+ case NFT_ITER_UPDATE_CLONE:
m = pipapo_maybe_clone(set);
if (!m) {
iter->err = -ENOMEM;
return;
}
-
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ break;
+ case NFT_ITER_UPDATE:
+ if (priv->clone)
+ m = priv->clone;
+ else
+ m = rcu_dereference_protected(priv->match,
+ nft_pipapo_transaction_mutex_held(set));
nft_pipapo_do_walk(ctx, set, m, iter);
break;
case NFT_ITER_READ:
@@ -2272,6 +2312,7 @@ static int nft_pipapo_init(const struct nft_set *set,
f->mt = NULL;
}
+ INIT_LIST_HEAD(&priv->gc_head);
rcu_assign_pointer(priv->match, m);
return 0;
@@ -2321,6 +2362,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
+ WARN_ON_ONCE(!list_empty(&priv->gc_head));
+
m = rcu_dereference_protected(priv->match, true);
if (priv->clone) {
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index eaab422aa56a..9aee9a9eaeb7 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -156,12 +156,14 @@ struct nft_pipapo_match {
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
* @last_gc: Timestamp of last garbage collection run, jiffies
+ * @gc_head: list of nft_trans_gc to queue up for mem reclaim
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
unsigned long last_gc;
+ struct list_head gc_head;
};
struct nft_pipapo_elem;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 3f02e4478216..ee3d4f5b9ff7 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -861,13 +861,15 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_rbtree *priv = nft_set_priv(set);
switch (iter->type) {
- case NFT_ITER_UPDATE:
- lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
-
+ case NFT_ITER_UPDATE_CLONE:
if (nft_array_may_resize(set) < 0) {
iter->err = -ENOMEM;
break;
}
+ fallthrough;
+ case NFT_ITER_UPDATE:
+ lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
+
nft_rbtree_do_walk(ctx, set, iter);
break;
case NFT_ITER_READ:
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 5d93e225d0f8..517106165ad2 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -318,6 +318,12 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
info->timer = __idletimer_tg_find_by_label(info->label);
if (info->timer) {
+ if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
+ pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n");
+ mutex_unlock(&list_mutex);
+ return -EINVAL;
+ }
+
info->timer->refcnt++;
mod_timer(&info->timer->timer,
secs_to_jiffies(info->timeout) + jiffies);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index e5a13ecbe67a..037ab93e25d0 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -62,10 +62,10 @@ dccp_find_option(u_int8_t option,
return true;
}
- if (op[i] < 2)
+ if (op[i] < 2 || i == optlen - 1)
i++;
else
- i += op[i+1]?:1;
+ i += op[i + 1] ? : 1;
}
spin_unlock_bh(&dccp_buflock);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index e8991130a3de..f76cf18f1a24 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -59,8 +59,10 @@ tcp_find_option(u_int8_t option,
for (i = 0; i < optlen; ) {
if (op[i] == option) return !invert;
- if (op[i] < 2) i++;
- else i += op[i+1]?:1;
+ if (op[i] < 2 || i == optlen - 1)
+ i++;
+ else
+ i += op[i + 1] ? : 1;
}
return invert;
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 3670bb33732e..7cb1e6aaae90 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -707,8 +707,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
int rc;
data_exch = kzalloc_obj(*data_exch);
- if (!data_exch)
+ if (!data_exch) {
+ kfree_skb(skb);
return -ENOMEM;
+ }
data_exch->cb = cb;
data_exch->cb_context = cb_context;
@@ -731,8 +733,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
data_exch);
exit:
- if (rc)
+ if (rc) {
+ kfree_skb(skb);
kfree(data_exch);
+ }
return rc;
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 6e9b76e2cc56..43d871525dbc 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -567,6 +567,10 @@ static int nci_close_device(struct nci_dev *ndev)
flush_workqueue(ndev->cmd_wq);
timer_delete_sync(&ndev->cmd_timer);
timer_delete_sync(&ndev->data_timer);
+ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
+ nci_data_exchange_complete(ndev, NULL,
+ ndev->cur_conn_id,
+ -ENODEV);
mutex_unlock(&ndev->req_lock);
return 0;
}
@@ -598,6 +602,11 @@ static int nci_close_device(struct nci_dev *ndev)
flush_workqueue(ndev->cmd_wq);
timer_delete_sync(&ndev->cmd_timer);
+ timer_delete_sync(&ndev->data_timer);
+
+ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
+ nci_data_exchange_complete(ndev, NULL, ndev->cur_conn_id,
+ -ENODEV);
/* Clear flags except NCI_UNREG */
ndev->flags &= BIT(NCI_UNREG);
@@ -1035,18 +1044,23 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
struct nci_conn_info *conn_info;
conn_info = ndev->rf_conn_info;
- if (!conn_info)
+ if (!conn_info) {
+ kfree_skb(skb);
return -EPROTO;
+ }
pr_debug("target_idx %d, len %d\n", target->idx, skb->len);
if (!ndev->target_active_prot) {
pr_err("unable to exchange data, no active target\n");
+ kfree_skb(skb);
return -EINVAL;
}
- if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags))
+ if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) {
+ kfree_skb(skb);
return -EBUSY;
+ }
/* store cb and context to be used on receiving data */
conn_info->data_exchange_cb = cb;
@@ -1482,10 +1496,20 @@ static bool nci_valid_size(struct sk_buff *skb)
unsigned int hdr_size = NCI_CTRL_HDR_SIZE;
if (skb->len < hdr_size ||
- !nci_plen(skb->data) ||
skb->len < hdr_size + nci_plen(skb->data)) {
return false;
}
+
+ if (!nci_plen(skb->data)) {
+ /* Allow zero length in proprietary notifications (0x20 - 0x3F). */
+ if (nci_opcode_oid(nci_opcode(skb->data)) >= 0x20 &&
+ nci_mt(skb->data) == NCI_MT_NTF_PKT)
+ return true;
+
+ /* Disallow zero length otherwise. */
+ return false;
+ }
+
return true;
}
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 78f4131af3cf..5f98c73db5af 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -33,7 +33,8 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
if (!conn_info) {
kfree_skb(skb);
- goto exit;
+ clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
+ return;
}
cb = conn_info->data_exchange_cb;
@@ -45,6 +46,12 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
timer_delete_sync(&ndev->data_timer);
clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
+ /* Mark the exchange as done before calling the callback.
+ * The callback (e.g. rawsock_data_exchange_complete) may
+ * want to immediately queue another data exchange.
+ */
+ clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
+
if (cb) {
/* forward skb to nfc core */
cb(cb_context, skb, err);
@@ -54,9 +61,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
/* no waiting callback, free skb */
kfree_skb(skb);
}
-
-exit:
- clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
}
/* ----------------- NCI TX Data ----------------- */
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index b049022399ae..f7d7a599fade 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -67,6 +67,17 @@ static int rawsock_release(struct socket *sock)
if (sock->type == SOCK_RAW)
nfc_sock_unlink(&raw_sk_list, sk);
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* Prevent rawsock_tx_work from starting new transmits and
+ * wait for any in-progress work to finish. This must happen
+ * before the socket is orphaned to avoid a race where
+ * rawsock_tx_work runs after the NCI device has been freed.
+ */
+ sk->sk_shutdown |= SEND_SHUTDOWN;
+ cancel_work_sync(&nfc_rawsock(sk)->tx_work);
+ rawsock_write_queue_purge(sk);
+ }
+
sock_orphan(sk);
sock_put(sk);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 04f310255692..654e23d13e3d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -490,18 +490,24 @@ bool rds_tcp_tune(struct socket *sock)
struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk);
- lock_sock(sk);
/* TCP timer functions might access net namespace even after
* a process which created this net namespace terminated.
*/
if (!sk->sk_net_refcnt) {
- if (!maybe_get_net(net)) {
- release_sock(sk);
+ if (!maybe_get_net(net))
return false;
- }
+ /*
+ * sk_net_refcnt_upgrade() must be called before lock_sock()
+ * because it does a GFP_KERNEL allocation, which can trigger
+ * fs_reclaim and create a circular lock dependency with the
+ * socket lock. The fields it modifies (sk_net_refcnt,
+ * ns_tracker) are not accessed by any concurrent code path
+ * at this point.
+ */
sk_net_refcnt_upgrade(sk);
put_net(net);
}
+ lock_sock(sk);
rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0c2c68c4b07e..0f90272ac254 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -267,12 +267,13 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* Lookup or create a remote transport endpoint record for the specified
* address.
*
- * Return: The peer record found with a reference, %NULL if no record is found
- * or a negative error code if the address is invalid or unsupported.
+ * Return: The peer record found with a reference or a negative error code if
+ * the address is invalid or unsupported.
*/
struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
struct sockaddr_rxrpc *srx, gfp_t gfp)
{
+ struct rxrpc_peer *peer;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
int ret;
@@ -280,7 +281,8 @@ struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
if (ret < 0)
return ERR_PTR(ret);
- return rxrpc_lookup_peer(rx->local, srx, gfp);
+ peer = rxrpc_lookup_peer(rx->local, srx, gfp);
+ return peer ?: ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(rxrpc_kernel_lookup_peer);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index bd51522c7953..7d5e50c921a0 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1360,6 +1360,12 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
+ if (bind && !(flags & TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Attaching ct to a non ingress/clsact qdisc is unsupported");
+ return -EOPNOTSUPP;
+ }
+
err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack);
if (err < 0)
return err;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 686eaed81b81..fdbfcaa3e2ab 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -32,9 +32,12 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
return KTIME_MAX;
}
-static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void tcf_gate_params_free_rcu(struct rcu_head *head);
+
+static void gate_get_start_time(struct tcf_gate *gact,
+ const struct tcf_gate_params *param,
+ ktime_t *start)
{
- struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
u64 n;
@@ -69,12 +72,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
{
struct tcf_gate *gact = container_of(timer, struct tcf_gate,
hitimer);
- struct tcf_gate_params *p = &gact->param;
struct tcfg_gate_entry *next;
+ struct tcf_gate_params *p;
ktime_t close_time, now;
spin_lock(&gact->tcf_lock);
+ p = rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
next = gact->next_entry;
/* cycle start, clear pending bit, clear total octets */
@@ -225,6 +230,35 @@ static void release_entry_list(struct list_head *entries)
}
}
+static int tcf_gate_copy_entries(struct tcf_gate_params *dst,
+ const struct tcf_gate_params *src,
+ struct netlink_ext_ack *extack)
+{
+ struct tcfg_gate_entry *entry;
+ int i = 0;
+
+ list_for_each_entry(entry, &src->entries, list) {
+ struct tcfg_gate_entry *new;
+
+ new = kzalloc(sizeof(*new), GFP_ATOMIC);
+ if (!new) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ return -ENOMEM;
+ }
+
+ new->index = entry->index;
+ new->gate_state = entry->gate_state;
+ new->interval = entry->interval;
+ new->ipv = entry->ipv;
+ new->maxoctets = entry->maxoctets;
+ list_add_tail(&new->list, &dst->entries);
+ i++;
+ }
+
+ dst->num_entries = i;
+ return 0;
+}
+
static int parse_gate_list(struct nlattr *list_attr,
struct tcf_gate_params *sched,
struct netlink_ext_ack *extack)
@@ -270,24 +304,44 @@ release_list:
return err;
}
-static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
- enum tk_offsets tko, s32 clockid,
- bool do_init)
+static bool gate_timer_needs_cancel(u64 basetime, u64 old_basetime,
+ enum tk_offsets tko,
+ enum tk_offsets old_tko,
+ s32 clockid, s32 old_clockid)
{
- if (!do_init) {
- if (basetime == gact->param.tcfg_basetime &&
- tko == gact->tk_offset &&
- clockid == gact->param.tcfg_clockid)
- return;
+ return basetime != old_basetime ||
+ clockid != old_clockid ||
+ tko != old_tko;
+}
- spin_unlock_bh(&gact->tcf_lock);
- hrtimer_cancel(&gact->hitimer);
- spin_lock_bh(&gact->tcf_lock);
+static int gate_clock_resolve(s32 clockid, enum tk_offsets *tko,
+ struct netlink_ext_ack *extack)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ *tko = TK_OFFS_REAL;
+ return 0;
+ case CLOCK_MONOTONIC:
+ *tko = TK_OFFS_MAX;
+ return 0;
+ case CLOCK_BOOTTIME:
+ *tko = TK_OFFS_BOOT;
+ return 0;
+ case CLOCK_TAI:
+ *tko = TK_OFFS_TAI;
+ return 0;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ return -EINVAL;
}
- gact->param.tcfg_basetime = basetime;
- gact->param.tcfg_clockid = clockid;
- gact->tk_offset = tko;
- hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT);
+}
+
+static void gate_setup_timer(struct tcf_gate *gact, s32 clockid,
+ enum tk_offsets tko)
+{
+ WRITE_ONCE(gact->tk_offset, tko);
+ hrtimer_setup(&gact->hitimer, gate_timer_func, clockid,
+ HRTIMER_MODE_ABS_SOFT);
}
static int tcf_gate_init(struct net *net, struct nlattr *nla,
@@ -296,15 +350,22 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
- enum tk_offsets tk_offset = TK_OFFS_TAI;
+ u64 cycletime = 0, basetime = 0, cycletime_ext = 0;
+ struct tcf_gate_params *p = NULL, *old_p = NULL;
+ enum tk_offsets old_tk_offset = TK_OFFS_TAI;
+ const struct tcf_gate_params *cur_p = NULL;
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
+ enum tk_offsets tko = TK_OFFS_TAI;
struct tcf_chain *goto_ch = NULL;
- u64 cycletime = 0, basetime = 0;
- struct tcf_gate_params *p;
+ s32 timer_clockid = CLOCK_TAI;
+ bool use_old_entries = false;
+ s32 old_clockid = CLOCK_TAI;
+ bool need_cancel = false;
s32 clockid = CLOCK_TAI;
struct tcf_gate *gact;
struct tc_gate *parm;
+ u64 old_basetime = 0;
int ret = 0, err;
u32 gflags = 0;
s32 prio = -1;
@@ -321,26 +382,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_GATE_PARMS])
return -EINVAL;
- if (tb[TCA_GATE_CLOCKID]) {
+ if (tb[TCA_GATE_CLOCKID])
clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
- switch (clockid) {
- case CLOCK_REALTIME:
- tk_offset = TK_OFFS_REAL;
- break;
- case CLOCK_MONOTONIC:
- tk_offset = TK_OFFS_MAX;
- break;
- case CLOCK_BOOTTIME:
- tk_offset = TK_OFFS_BOOT;
- break;
- case CLOCK_TAI:
- tk_offset = TK_OFFS_TAI;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
- return -EINVAL;
- }
- }
parm = nla_data(tb[TCA_GATE_PARMS]);
index = parm->index;
@@ -366,6 +409,60 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
+ gact = to_gate(*a);
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ err = -ENOMEM;
+ goto chain_put;
+ }
+ INIT_LIST_HEAD(&p->entries);
+
+ use_old_entries = !tb[TCA_GATE_ENTRY_LIST];
+ if (!use_old_entries) {
+ err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+ if (err < 0)
+ goto err_free;
+ use_old_entries = !err;
+ }
+
+ if (ret == ACT_P_CREATED && use_old_entries) {
+ NL_SET_ERR_MSG(extack, "The entry list is empty");
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (ret != ACT_P_CREATED) {
+ rcu_read_lock();
+ cur_p = rcu_dereference(gact->param);
+
+ old_basetime = cur_p->tcfg_basetime;
+ old_clockid = cur_p->tcfg_clockid;
+ old_tk_offset = READ_ONCE(gact->tk_offset);
+
+ basetime = old_basetime;
+ cycletime_ext = cur_p->tcfg_cycletime_ext;
+ prio = cur_p->tcfg_priority;
+ gflags = cur_p->tcfg_flags;
+
+ if (!tb[TCA_GATE_CLOCKID])
+ clockid = old_clockid;
+
+ err = 0;
+ if (use_old_entries) {
+ err = tcf_gate_copy_entries(p, cur_p, extack);
+ if (!err && !tb[TCA_GATE_CYCLE_TIME])
+ cycletime = cur_p->tcfg_cycletime;
+ }
+ rcu_read_unlock();
+ if (err)
+ goto err_free;
+ }
+
if (tb[TCA_GATE_PRIORITY])
prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
@@ -375,25 +472,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GATE_FLAGS])
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
- gact = to_gate(*a);
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&gact->param.entries);
+ if (tb[TCA_GATE_CYCLE_TIME])
+ cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
- if (err < 0)
- goto release_idr;
+ if (tb[TCA_GATE_CYCLE_TIME_EXT])
+ cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
- spin_lock_bh(&gact->tcf_lock);
- p = &gact->param;
+ err = gate_clock_resolve(clockid, &tko, extack);
+ if (err)
+ goto err_free;
+ timer_clockid = clockid;
- if (tb[TCA_GATE_CYCLE_TIME])
- cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+ need_cancel = ret != ACT_P_CREATED &&
+ gate_timer_needs_cancel(basetime, old_basetime,
+ tko, old_tk_offset,
+ timer_clockid, old_clockid);
- if (tb[TCA_GATE_ENTRY_LIST]) {
- err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
- if (err < 0)
- goto chain_put;
- }
+ if (need_cancel)
+ hrtimer_cancel(&gact->hitimer);
+
+ spin_lock_bh(&gact->tcf_lock);
if (!cycletime) {
struct tcfg_gate_entry *entry;
@@ -402,22 +500,20 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
list_for_each_entry(entry, &p->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
cycletime = cycle;
- if (!cycletime) {
- err = -EINVAL;
- goto chain_put;
- }
}
p->tcfg_cycletime = cycletime;
+ p->tcfg_cycletime_ext = cycletime_ext;
- if (tb[TCA_GATE_CYCLE_TIME_EXT])
- p->tcfg_cycletime_ext =
- nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
-
- gate_setup_timer(gact, basetime, tk_offset, clockid,
- ret == ACT_P_CREATED);
+ if (need_cancel || ret == ACT_P_CREATED)
+ gate_setup_timer(gact, timer_clockid, tko);
p->tcfg_priority = prio;
p->tcfg_flags = gflags;
- gate_get_start_time(gact, &start);
+ p->tcfg_basetime = basetime;
+ p->tcfg_clockid = timer_clockid;
+ gate_get_start_time(gact, p, &start);
+
+ old_p = rcu_replace_pointer(gact->param, p,
+ lockdep_is_held(&gact->tcf_lock));
gact->current_close_time = start;
gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
@@ -434,11 +530,15 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+ if (old_p)
+ call_rcu(&old_p->rcu, tcf_gate_params_free_rcu);
+
return ret;
+err_free:
+ release_entry_list(&p->entries);
+ kfree(p);
chain_put:
- spin_unlock_bh(&gact->tcf_lock);
-
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
@@ -446,21 +546,29 @@ release_idr:
* without taking tcf_lock.
*/
if (ret == ACT_P_CREATED)
- gate_setup_timer(gact, gact->param.tcfg_basetime,
- gact->tk_offset, gact->param.tcfg_clockid,
- true);
+ gate_setup_timer(gact, timer_clockid, tko);
+
tcf_idr_release(*a, bind);
return err;
}
+static void tcf_gate_params_free_rcu(struct rcu_head *head)
+{
+ struct tcf_gate_params *p = container_of(head, struct tcf_gate_params, rcu);
+
+ release_entry_list(&p->entries);
+ kfree(p);
+}
+
static void tcf_gate_cleanup(struct tc_action *a)
{
struct tcf_gate *gact = to_gate(a);
struct tcf_gate_params *p;
- p = &gact->param;
hrtimer_cancel(&gact->hitimer);
- release_entry_list(&p->entries);
+ p = rcu_dereference_protected(gact->param, 1);
+ if (p)
+ call_rcu(&p->rcu, tcf_gate_params_free_rcu);
}
static int dumping_entry(struct sk_buff *skb,
@@ -509,10 +617,9 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
struct nlattr *entry_list;
struct tcf_t t;
- spin_lock_bh(&gact->tcf_lock);
- opt.action = gact->tcf_action;
-
- p = &gact->param;
+ rcu_read_lock();
+ opt.action = READ_ONCE(gact->tcf_action);
+ p = rcu_dereference(gact->param);
if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -552,12 +659,12 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &gact->tcf_tm);
if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
goto nla_put_failure;
- spin_unlock_bh(&gact->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&gact->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 79df81d12894..d5e8a91bb4eb 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -293,8 +293,8 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held)
/* called when adding new meta information
*/
static int __add_metainfo(const struct tcf_meta_ops *ops,
- struct tcf_ife_info *ife, u32 metaid, void *metaval,
- int len, bool atomic, bool exists)
+ struct tcf_ife_params *p, u32 metaid, void *metaval,
+ int len, bool atomic)
{
struct tcf_meta_info *mi = NULL;
int ret = 0;
@@ -313,45 +313,40 @@ static int __add_metainfo(const struct tcf_meta_ops *ops,
}
}
- if (exists)
- spin_lock_bh(&ife->tcf_lock);
- list_add_tail(&mi->metalist, &ife->metalist);
- if (exists)
- spin_unlock_bh(&ife->tcf_lock);
+ list_add_tail(&mi->metalist, &p->metalist);
return ret;
}
static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops,
- struct tcf_ife_info *ife, u32 metaid,
- bool exists)
+ struct tcf_ife_params *p, u32 metaid)
{
int ret;
if (!try_module_get(ops->owner))
return -ENOENT;
- ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists);
+ ret = __add_metainfo(ops, p, metaid, NULL, 0, true);
if (ret)
module_put(ops->owner);
return ret;
}
-static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
- int len, bool exists)
+static int add_metainfo(struct tcf_ife_params *p, u32 metaid, void *metaval,
+ int len)
{
const struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret;
if (!ops)
return -ENOENT;
- ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists);
+ ret = __add_metainfo(ops, p, metaid, metaval, len, false);
if (ret)
/*put back what find_ife_oplist took */
module_put(ops->owner);
return ret;
}
-static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
+static int use_all_metadata(struct tcf_ife_params *p)
{
struct tcf_meta_ops *o;
int rc = 0;
@@ -359,7 +354,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
read_lock(&ife_mod_lock);
list_for_each_entry(o, &ifeoplist, list) {
- rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists);
+ rc = add_metainfo_and_get_ops(o, p, o->metaid);
if (rc == 0)
installed += 1;
}
@@ -371,7 +366,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
return -EINVAL;
}
-static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
+static int dump_metalist(struct sk_buff *skb, struct tcf_ife_params *p)
{
struct tcf_meta_info *e;
struct nlattr *nest;
@@ -379,14 +374,14 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
int total_encoded = 0;
/*can only happen on decode */
- if (list_empty(&ife->metalist))
+ if (list_empty(&p->metalist))
return 0;
nest = nla_nest_start_noflag(skb, TCA_IFE_METALST);
if (!nest)
goto out_nlmsg_trim;
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry(e, &p->metalist, metalist) {
if (!e->ops->get(skb, e))
total_encoded += 1;
}
@@ -403,13 +398,11 @@ out_nlmsg_trim:
return -1;
}
-/* under ife->tcf_lock */
-static void _tcf_ife_cleanup(struct tc_action *a)
+static void __tcf_ife_cleanup(struct tcf_ife_params *p)
{
- struct tcf_ife_info *ife = to_ife(a);
struct tcf_meta_info *e, *n;
- list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
+ list_for_each_entry_safe(e, n, &p->metalist, metalist) {
list_del(&e->metalist);
if (e->metaval) {
if (e->ops->release)
@@ -422,18 +415,23 @@ static void _tcf_ife_cleanup(struct tc_action *a)
}
}
+static void tcf_ife_cleanup_params(struct rcu_head *head)
+{
+ struct tcf_ife_params *p = container_of(head, struct tcf_ife_params,
+ rcu);
+
+ __tcf_ife_cleanup(p);
+ kfree(p);
+}
+
static void tcf_ife_cleanup(struct tc_action *a)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p;
- spin_lock_bh(&ife->tcf_lock);
- _tcf_ife_cleanup(a);
- spin_unlock_bh(&ife->tcf_lock);
-
p = rcu_dereference_protected(ife->params, 1);
if (p)
- kfree_rcu(p, rcu);
+ call_rcu(&p->rcu, tcf_ife_cleanup_params);
}
static int load_metalist(struct nlattr **tb, bool rtnl_held)
@@ -455,8 +453,7 @@ static int load_metalist(struct nlattr **tb, bool rtnl_held)
return 0;
}
-static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
- bool exists, bool rtnl_held)
+static int populate_metalist(struct tcf_ife_params *p, struct nlattr **tb)
{
int len = 0;
int rc = 0;
@@ -468,7 +465,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
val = nla_data(tb[i]);
len = nla_len(tb[i]);
- rc = add_metainfo(ife, i, val, len, exists);
+ rc = add_metainfo(p, i, val, len);
if (rc)
return rc;
}
@@ -523,6 +520,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
p = kzalloc_obj(*p);
if (!p)
return -ENOMEM;
+ INIT_LIST_HEAD(&p->metalist);
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
@@ -567,8 +565,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
ife = to_ife(*a);
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&ife->metalist);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
@@ -600,8 +596,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
if (tb[TCA_IFE_METALST]) {
- err = populate_metalist(ife, tb2, exists,
- !(flags & TCA_ACT_FLAGS_NO_RTNL));
+ err = populate_metalist(p, tb2);
if (err)
goto metadata_parse_err;
} else {
@@ -610,7 +605,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
* as we can. You better have at least one else we are
* going to bail out
*/
- err = use_all_metadata(ife, exists);
+ err = use_all_metadata(p);
if (err)
goto metadata_parse_err;
}
@@ -626,13 +621,14 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (p)
- kfree_rcu(p, rcu);
+ call_rcu(&p->rcu, tcf_ife_cleanup_params);
return ret;
metadata_parse_err:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
+ __tcf_ife_cleanup(p);
kfree(p);
tcf_idr_release(*a, bind);
return err;
@@ -679,7 +675,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
goto nla_put_failure;
- if (dump_metalist(skb, ife)) {
+ if (dump_metalist(skb, p)) {
/*ignore failure to dump metalist */
pr_info("Failed to dump metalist\n");
}
@@ -693,13 +689,13 @@ nla_put_failure:
return -1;
}
-static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
+static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_params *p,
u16 metaid, u16 mlen, void *mdata)
{
struct tcf_meta_info *e;
/* XXX: use hash to speed up */
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (metaid == e->metaid) {
if (e->ops) {
/* We check for decode presence already */
@@ -716,10 +712,13 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
+ struct tcf_ife_params *p;
u8 *ifehdr_end;
u8 *tlv_data;
u16 metalen;
+ p = rcu_dereference_bh(ife->params);
+
bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
@@ -745,7 +744,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
return TC_ACT_SHOT;
}
- if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
+ if (find_decode_metaid(skb, p, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
* but dont have an ops for it
*/
@@ -769,12 +768,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
/*XXX: check if we can do this at install time instead of current
* send data path
**/
-static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
+static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_params *p)
{
- struct tcf_meta_info *e, *n;
+ struct tcf_meta_info *e;
int tot_run_sz = 0, run_sz = 0;
- list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (e->ops->check_presence) {
run_sz = e->ops->check_presence(skb, e);
tot_run_sz += run_sz;
@@ -795,7 +794,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA
where ORIGDATA = original ethernet header ...
*/
- u16 metalen = ife_get_sz(skb, ife);
+ u16 metalen = ife_get_sz(skb, p);
int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
unsigned int skboff = 0;
int new_len = skb->len + hdrm;
@@ -833,25 +832,21 @@ drop:
if (!ife_meta)
goto drop;
- spin_lock(&ife->tcf_lock);
-
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
* ops->presence_check...
*/
- list_for_each_entry(e, &ife->metalist, metalist) {
+ list_for_each_entry_rcu(e, &p->metalist, metalist) {
if (e->ops->encode) {
err = e->ops->encode(skb, (void *)(ife_meta + skboff),
e);
}
if (err < 0) {
/* too corrupt to keep around if overwritten */
- spin_unlock(&ife->tcf_lock);
goto drop;
}
skboff += err;
}
- spin_unlock(&ife->tcf_lock);
oethh = (struct ethhdr *)skb->data;
if (!is_zero_ether_addr(p->eth_src))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 343309e9009b..4829c27446e3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2228,6 +2228,11 @@ static bool is_qdisc_ingress(__u32 classid)
return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS));
}
+static bool is_ingress_or_clsact(struct tcf_block *block, struct Qdisc *q)
+{
+ return tcf_block_shared(block) || (q && !!(q->flags & TCQ_F_INGRESS));
+}
+
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -2420,6 +2425,8 @@ replay:
flags |= TCA_ACT_FLAGS_NO_RTNL;
if (is_qdisc_ingress(parent))
flags |= TCA_ACT_FLAGS_AT_INGRESS;
+ if (is_ingress_or_clsact(block, q))
+ flags |= TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT;
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
flags, extack);
if (err == 0) {
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index a01f14b1c216..9efe23f8371b 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -391,8 +391,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
1239850263, 1191209601, 1147878294, 1108955788
};
-static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
- u64 target_ns, u64 rtt_est_ns);
+static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust);
+
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
*
@@ -2013,7 +2013,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
u64 delay;
u32 len;
- if (q->config->is_shared && now - q->last_checked_active >= q->config->sync_time) {
+ if (q->config->is_shared && q->rate_ns &&
+ now - q->last_checked_active >= q->config->sync_time) {
struct net_device *dev = qdisc_dev(sch);
struct cake_sched_data *other_priv;
u64 new_rate = q->config->rate_bps;
@@ -2039,12 +2040,9 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
if (num_active_qs > 1)
new_rate = div64_u64(q->config->rate_bps, num_active_qs);
- /* mtu = 0 is used to only update the rate and not mess with cobalt params */
- cake_set_rate(b, new_rate, 0, 0, 0);
+ cake_configure_rates(sch, new_rate, true);
q->last_checked_active = now;
q->active_queues = num_active_qs;
- q->rate_ns = b->tin_rate_ns;
- q->rate_shft = b->tin_rate_shft;
}
begin:
@@ -2361,12 +2359,10 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
b->cparams.p_dec = 1 << 20; /* 1/4096 */
}
-static int cake_config_besteffort(struct Qdisc *sch)
+static int cake_config_besteffort(struct Qdisc *sch, u64 rate, u32 mtu)
{
struct cake_sched_data *q = qdisc_priv(sch);
struct cake_tin_data *b = &q->tins[0];
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
q->tin_cnt = 1;
@@ -2380,12 +2376,10 @@ static int cake_config_besteffort(struct Qdisc *sch)
return 0;
}
-static int cake_config_precedence(struct Qdisc *sch)
+static int cake_config_precedence(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* convert high-level (user visible) parameters into internal format */
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2456,7 +2450,7 @@ static int cake_config_precedence(struct Qdisc *sch)
* Total 12 traffic classes.
*/
-static int cake_config_diffserv8(struct Qdisc *sch)
+static int cake_config_diffserv8(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* Pruned list of traffic classes for typical applications:
*
@@ -2473,8 +2467,6 @@ static int cake_config_diffserv8(struct Qdisc *sch)
*/
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2504,7 +2496,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
return 0;
}
-static int cake_config_diffserv4(struct Qdisc *sch)
+static int cake_config_diffserv4(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* Further pruned list of traffic classes for four-class system:
*
@@ -2517,8 +2509,6 @@ static int cake_config_diffserv4(struct Qdisc *sch)
*/
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 4;
@@ -2546,7 +2536,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
return 0;
}
-static int cake_config_diffserv3(struct Qdisc *sch)
+static int cake_config_diffserv3(struct Qdisc *sch, u64 rate, u32 mtu)
{
/* Simplified Diffserv structure with 3 tins.
* Latency Sensitive (CS7, CS6, EF, VA, TOS4)
@@ -2554,8 +2544,6 @@ static int cake_config_diffserv3(struct Qdisc *sch)
* Low Priority (LE, CS1)
*/
struct cake_sched_data *q = qdisc_priv(sch);
- u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 3;
@@ -2580,32 +2568,33 @@ static int cake_config_diffserv3(struct Qdisc *sch)
return 0;
}
-static void cake_reconfigure(struct Qdisc *sch)
+static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust)
{
+ u32 mtu = likely(rate_adjust) ? 0 : psched_mtu(qdisc_dev(sch));
struct cake_sched_data *qd = qdisc_priv(sch);
struct cake_sched_config *q = qd->config;
int c, ft;
switch (q->tin_mode) {
case CAKE_DIFFSERV_BESTEFFORT:
- ft = cake_config_besteffort(sch);
+ ft = cake_config_besteffort(sch, rate, mtu);
break;
case CAKE_DIFFSERV_PRECEDENCE:
- ft = cake_config_precedence(sch);
+ ft = cake_config_precedence(sch, rate, mtu);
break;
case CAKE_DIFFSERV_DIFFSERV8:
- ft = cake_config_diffserv8(sch);
+ ft = cake_config_diffserv8(sch, rate, mtu);
break;
case CAKE_DIFFSERV_DIFFSERV4:
- ft = cake_config_diffserv4(sch);
+ ft = cake_config_diffserv4(sch, rate, mtu);
break;
case CAKE_DIFFSERV_DIFFSERV3:
default:
- ft = cake_config_diffserv3(sch);
+ ft = cake_config_diffserv3(sch, rate, mtu);
break;
}
@@ -2616,6 +2605,14 @@ static void cake_reconfigure(struct Qdisc *sch)
qd->rate_ns = qd->tins[ft].tin_rate_ns;
qd->rate_shft = qd->tins[ft].tin_rate_shft;
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
+
+ cake_configure_rates(sch, qd->config->rate_bps, false);
if (q->buffer_config_limit) {
qd->buffer_limit = q->buffer_config_limit;
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 306e046276d4..a4b07b661b77 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -115,12 +115,12 @@ static void ets_offload_change(struct Qdisc *sch)
struct ets_sched *q = qdisc_priv(sch);
struct tc_ets_qopt_offload qopt;
unsigned int w_psum_prev = 0;
- unsigned int q_psum = 0;
- unsigned int q_sum = 0;
unsigned int quantum;
unsigned int w_psum;
unsigned int weight;
unsigned int i;
+ u64 q_psum = 0;
+ u64 q_sum = 0;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
@@ -138,8 +138,12 @@ static void ets_offload_change(struct Qdisc *sch)
for (i = 0; i < q->nbands; i++) {
quantum = q->classes[i].quantum;
- q_psum += quantum;
- w_psum = quantum ? q_psum * 100 / q_sum : 0;
+ if (quantum) {
+ q_psum += quantum;
+ w_psum = div64_u64(q_psum * 100, q_sum);
+ } else {
+ w_psum = 0;
+ }
weight = w_psum - w_psum_prev;
w_psum_prev = w_psum;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 80235e85f844..05084c9af48e 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -827,6 +827,7 @@ static void fq_reset(struct Qdisc *sch)
for (idx = 0; idx < FQ_BANDS; idx++) {
q->band_flows[idx].new_flows.first = NULL;
q->band_flows[idx].old_flows.first = NULL;
+ q->band_pkt_count[idx] = 0;
}
q->delayed = RB_ROOT;
q->flows = 0;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6e4bdaa876ed..783300d8b019 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -315,6 +315,7 @@ restart:
if (__netif_tx_trylock(slave_txq)) {
unsigned int length = qdisc_pkt_len(skb);
+ skb->dev = slave;
if (!netif_xmit_frozen_or_stopped(slave_txq) &&
netdev_start_xmit(skb, slave, slave_txq, false) ==
NETDEV_TX_OK) {
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
index 005bfc766e22..3fd6629cb999 100644
--- a/net/shaper/shaper.c
+++ b/net/shaper/shaper.c
@@ -759,11 +759,7 @@ int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
if (ret)
goto free_msg;
- ret = genlmsg_reply(msg, info);
- if (ret)
- goto free_msg;
-
- return 0;
+ return genlmsg_reply(msg, info);
free_msg:
nlmsg_free(msg);
@@ -1313,10 +1309,7 @@ int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info)
if (ret)
goto free_msg;
- ret = genlmsg_reply(msg, info);
- if (ret)
- goto free_msg;
- return 0;
+ return genlmsg_reply(msg, info);
free_msg:
nlmsg_free(msg);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 15bbf953dfad..b51a162885bb 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1362,7 +1362,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
needed += RPCRDMA_MAX_RECV_BATCH;
if (atomic_inc_return(&ep->re_receiving) > 1)
- goto out;
+ goto out_dec;
/* fast path: all needed reps can be found on the free list */
wr = NULL;
@@ -1385,7 +1385,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
++count;
}
if (!wr)
- goto out;
+ goto out_dec;
rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
@@ -1400,9 +1400,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
--count;
}
}
+
+out_dec:
if (atomic_dec_return(&ep->re_receiving) > 0)
complete(&ep->re_done);
-
out:
trace_xprtrdma_post_recvs(r_xprt, count);
ep->re_receive_count += count;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4c618c2b871d..9329919fb07f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2233,6 +2233,8 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
if (skb_queue_empty(&sk->sk_write_queue))
break;
get_random_bytes(&delay, 2);
+ if (tsk->conn_timeout < 4)
+ tsk->conn_timeout = 4;
delay %= (tsk->conn_timeout / 4);
delay = msecs_to_jiffies(delay + 100);
sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3756a93dc63a..7eaa5b187fef 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1785,7 +1785,7 @@ restart:
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
sock_put(other);
return 0;
@@ -2278,7 +2278,7 @@ restart_locked:
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
sock_put(other);
scm_destroy(&scm);
return len;
@@ -2351,7 +2351,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
sk_send_sigurg(other);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
return 0;
out_unlock:
@@ -2477,7 +2477,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
- other->sk_data_ready(other);
+ READ_ONCE(other->sk_data_ready)(other);
sent += size;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3b46bc635c43..6149f6a79897 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -167,26 +167,32 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
struct xdp_buff_xsk *pos, *tmp;
struct list_head *xskb_list;
u32 contd = 0;
+ u32 num_desc;
int err;
- if (frags)
- contd = XDP_PKT_CONTD;
+ if (likely(!frags)) {
+ err = __xsk_rcv_zc(xs, xskb, len, contd);
+ if (err)
+ goto err;
+ return 0;
+ }
- err = __xsk_rcv_zc(xs, xskb, len, contd);
- if (err)
+ contd = XDP_PKT_CONTD;
+ num_desc = xdp_get_shared_info_from_buff(xdp)->nr_frags + 1;
+ if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+ xs->rx_queue_full++;
+ err = -ENOBUFS;
goto err;
- if (likely(!frags))
- return 0;
+ }
+ __xsk_rcv_zc(xs, xskb, len, contd);
xskb_list = &xskb->pool->xskb_list;
list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
- err = __xsk_rcv_zc(xs, pos, len, contd);
- if (err)
- goto err;
- list_del(&pos->list_node);
+ __xsk_rcv_zc(xs, pos, len, contd);
+ list_del_init(&pos->list_node);
}
return 0;