summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/hci_sync.c1
-rw-r--r--net/bluetooth/mgmt.c6
-rw-r--r--net/bpf/test_run.c25
-rw-r--r--net/bridge/br_fdb.c28
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/br_vlan_tunnel.c11
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/caif/cffrml.c9
-rw-r--r--net/can/Kconfig1
-rw-r--r--net/can/j1939/socket.c6
-rw-r--r--net/can/j1939/transport.c12
-rw-r--r--net/can/raw.c51
-rw-r--r--net/ceph/messenger_v2.c2
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c14
-rw-r--r--net/ceph/osdmap.c24
-rw-r--r--net/core/dev.c39
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/hotdata.c1
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sysctl_net_core.c7
-rw-r--r--net/dsa/dsa.c67
-rw-r--r--net/ethtool/ioctl.c30
-rw-r--r--net/handshake/netlink.c3
-rw-r--r--net/handshake/request.c8
-rw-r--r--net/hsr/hsr_forward.c2
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/esp4_offload.c4
-rw-r--r--net/ipv4/fib_semantics.c26
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/inet_fragment.c57
-rw-r--r--net/ipv4/ip_fragment.c22
-rw-r--r--net/ipv4/ip_gre.c17
-rw-r--r--net/ipv4/ip_tunnel.c5
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/calipso.c3
-rw-r--r--net/ipv6/esp6_offload.c4
-rw-r--r--net/ipv6/ip6_gre.c15
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/route.c17
-rw-r--r--net/mac80211/cfg.c10
-rw-r--r--net/mac80211/chan.c3
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/mlme.c5
-rw-r--r--net/mac80211/ocb.c3
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mac80211/sta_info.c7
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mptcp/Kconfig2
-rw-r--r--net/mptcp/options.c10
-rw-r--r--net/mptcp/pm_netlink.c3
-rw-r--r--net/mptcp/protocol.c30
-rw-r--r--net/mptcp/protocol.h9
-rw-r--r--net/mptcp/subflow.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c3
-rw-r--r--net/netfilter/nf_conncount.c27
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_flow_table_path.c4
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netfilter/nf_nat_core.c14
-rw-r--r--net/netfilter/nf_tables_api.c87
-rw-r--r--net/netfilter/nft_set_pipapo.c4
-rw-r--r--net/netfilter/nft_synproxy.c6
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netrom/nr_out.c4
-rw-r--r--net/nfc/core.c9
-rw-r--r--net/openvswitch/flow_netlink.c13
-rw-r--r--net/openvswitch/vport-netdev.c17
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_mirred.c15
-rw-r--r--net/sched/sch_ets.c6
-rw-r--r--net/sched/sch_qfq.c8
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/socket.c7
-rw-r--r--net/smc/Kconfig4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c7
-rw-r--r--net/unix/af_unix.c9
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/wireless/sme.c2
-rw-r--r--net/wireless/wext-core.c4
-rw-r--r--net/wireless/wext-priv.c4
-rw-r--r--net/xfrm/xfrm_state.c1
91 files changed, 622 insertions, 333 deletions
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index a9f5b1a68356..cbc3a75d7326 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4420,6 +4420,7 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
if (bis_capable(hdev)) {
events[1] |= 0x20; /* LE PA Report */
events[1] |= 0x40; /* LE PA Sync Established */
+ events[1] |= 0x80; /* LE PA Sync Lost */
events[3] |= 0x04; /* LE Create BIG Complete */
events[3] |= 0x08; /* LE Terminate BIG Complete */
events[3] |= 0x10; /* LE BIG Sync Established */
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c11cdef42b6f..5be9b8c91949 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -849,6 +849,12 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (cis_peripheral_capable(hdev))
settings |= MGMT_SETTING_CIS_PERIPHERAL;
+ if (bis_capable(hdev))
+ settings |= MGMT_SETTING_ISO_BROADCASTER;
+
+ if (sync_recv_capable(hdev))
+ settings |= MGMT_SETTING_ISO_SYNC_RECEIVER;
+
if (ll_privacy_capable(hdev))
settings |= MGMT_SETTING_LL_PRIVACY;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 655efac6f133..26cfcfdc45eb 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1294,8 +1294,6 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
batch_size = NAPI_POLL_WEIGHT;
else if (batch_size > TEST_XDP_MAX_BATCH)
return -E2BIG;
-
- headroom += sizeof(struct xdp_page_head);
} else if (batch_size) {
return -EINVAL;
}
@@ -1308,16 +1306,26 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
/* There can't be user provided data before the meta data */
if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in ||
ctx->data > ctx->data_end ||
- unlikely(xdp_metalen_invalid(ctx->data)) ||
(do_live && (kattr->test.data_out || kattr->test.ctx_out)))
goto free_ctx;
- /* Meta data is allocated from the headroom */
- headroom -= ctx->data;
meta_sz = ctx->data;
+ if (xdp_metalen_invalid(meta_sz) || meta_sz > headroom - sizeof(struct xdp_frame))
+ goto free_ctx;
+
+ /* Meta data is allocated from the headroom */
+ headroom -= meta_sz;
linear_sz = ctx->data_end;
}
+ /* The xdp_page_head structure takes up space in each page, limiting the
+ * size of the packet data; add the extra size to headroom here to make
+ * sure it's accounted in the length checks below, but not in the
+ * metadata size check above.
+ */
+ if (do_live)
+ headroom += sizeof(struct xdp_page_head);
+
max_linear_sz = PAGE_SIZE - headroom - tailroom;
linear_sz = min_t(u32, linear_sz, max_linear_sz);
@@ -1355,13 +1363,13 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (sinfo->nr_frags == MAX_SKB_FRAGS) {
ret = -ENOMEM;
- goto out;
+ goto out_put_dev;
}
page = alloc_page(GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
- goto out;
+ goto out_put_dev;
}
frag = &sinfo->frags[sinfo->nr_frags++];
@@ -1373,7 +1381,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (copy_from_user(page_address(page), data_in + size,
data_len)) {
ret = -EFAULT;
- goto out;
+ goto out_put_dev;
}
sinfo->xdp_frags_size += data_len;
size += data_len;
@@ -1388,6 +1396,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = bpf_test_run_xdp_live(prog, &xdp, repeat, batch_size, &duration);
else
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
+out_put_dev:
/* We convert the xdp_buff back to an xdp_md before checking the return
* code so the reference count of any held netdevice will be decremented
* even if the test run failed.
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 58d22e2b85fc..0501ffcb8a3d 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -70,7 +70,7 @@ static inline int has_expired(const struct net_bridge *br,
{
return !test_bit(BR_FDB_STATIC, &fdb->flags) &&
!test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags) &&
- time_before_eq(fdb->updated + hold_time(br), jiffies);
+ time_before_eq(READ_ONCE(fdb->updated) + hold_time(br), jiffies);
}
static int fdb_to_nud(const struct net_bridge *br,
@@ -126,9 +126,9 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (nla_put_u32(skb, NDA_FLAGS_EXT, ext_flags))
goto nla_put_failure;
- ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
+ ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used));
ci.ndm_confirmed = 0;
- ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
+ ci.ndm_updated = jiffies_to_clock_t(now - READ_ONCE(fdb->updated));
ci.ndm_refcnt = 0;
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
@@ -551,7 +551,7 @@ void br_fdb_cleanup(struct work_struct *work)
*/
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
- unsigned long this_timer = f->updated + delay;
+ unsigned long this_timer = READ_ONCE(f->updated) + delay;
if (test_bit(BR_FDB_STATIC, &f->flags) ||
test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) {
@@ -924,6 +924,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
{
struct net_bridge_fdb_entry *f;
struct __fdb_entry *fe = buf;
+ unsigned long delta;
int num = 0;
memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
@@ -953,8 +954,11 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
fe->port_hi = f->dst->port_no >> 8;
fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags);
- if (!test_bit(BR_FDB_STATIC, &f->flags))
- fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
+ if (!test_bit(BR_FDB_STATIC, &f->flags)) {
+ delta = jiffies - READ_ONCE(f->updated);
+ fe->ageing_timer_value =
+ jiffies_delta_to_clock_t(delta);
+ }
++fe;
++num;
}
@@ -1002,8 +1006,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
unsigned long now = jiffies;
bool fdb_modified = false;
- if (now != fdb->updated) {
- fdb->updated = now;
+ if (now != READ_ONCE(fdb->updated)) {
+ WRITE_ONCE(fdb->updated, now);
fdb_modified = __fdb_mark_active(fdb);
}
@@ -1242,10 +1246,10 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (fdb_handle_notify(fdb, notify))
modified = true;
- fdb->used = jiffies;
+ WRITE_ONCE(fdb->used, jiffies);
if (modified) {
if (refresh)
- fdb->updated = jiffies;
+ WRITE_ONCE(fdb->updated, jiffies);
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
@@ -1556,7 +1560,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
goto err_unlock;
}
- fdb->updated = jiffies;
+ WRITE_ONCE(fdb->updated, jiffies);
if (READ_ONCE(fdb->dst) != p) {
WRITE_ONCE(fdb->dst, p);
@@ -1565,7 +1569,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (test_and_set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
/* Refresh entry */
- fdb->used = jiffies;
+ WRITE_ONCE(fdb->used, jiffies);
} else {
modified = true;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 777fa869c1a1..e355a15bf5ab 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -221,8 +221,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (test_bit(BR_FDB_LOCAL, &dst->flags))
return br_pass_frame_up(skb, false);
- if (now != dst->used)
- dst->used = now;
+ if (now != READ_ONCE(dst->used))
+ WRITE_ONCE(dst->used, now);
br_forward(dst->dst, skb, local_rcv, false);
} else {
if (!mcast_hit)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7280c4e9305f..b9b2981c4841 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -247,6 +247,7 @@ struct net_bridge_vlan {
* struct net_bridge_vlan_group
*
* @vlan_hash: VLAN entry rhashtable
+ * @tunnel_hash: Hash table to map from tunnel key ID (e.g. VXLAN VNI) to VLAN
* @vlan_list: sorted VLAN entry list
* @num_vlans: number of total VLAN entries
* @pvid: PVID VLAN id
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index a966a6ec8263..257cae9f1569 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -189,7 +189,6 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *tunnel_dst;
__be64 tunnel_id;
- int err;
if (!vlan)
return 0;
@@ -199,9 +198,13 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
return 0;
skb_dst_drop(skb);
- err = skb_vlan_pop(skb);
- if (err)
- return err;
+ /* For 802.1ad (QinQ), skb_vlan_pop() incorrectly moves the C-VLAN
+ * from payload to hwaccel after clearing S-VLAN. We only need to
+ * clear the hwaccel S-VLAN; the C-VLAN must stay in payload for
+ * correct VXLAN encapsulation. This is also correct for 802.1Q
+ * where no C-VLAN exists in payload.
+ */
+ __vlan_hwaccel_clear_tag(skb);
if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
__set_bit(IP_TUNNEL_KEY_BIT, flags);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5697e3949a36..a04fc1757528 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1299,7 +1299,7 @@ int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct ne
list_for_each_entry(tmpl, &template_tables, list) {
if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) {
mutex_unlock(&ebt_mutex);
- return -EEXIST;
+ return -EBUSY;
}
}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 6651a8dc62e0..d4d63586053a 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -92,8 +92,15 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
len = le16_to_cpu(tmp);
/* Subtract for FCS on length if FCS is not used. */
- if (!this->dofcs)
+ if (!this->dofcs) {
+ if (len < 2) {
+ ++cffrml_rcv_error;
+ pr_err("Invalid frame length (%d)\n", len);
+ cfpkt_destroy(pkt);
+ return -EPROTO;
+ }
len -= 2;
+ }
if (cfpkt_setlen(pkt, len) < 0) {
++cffrml_rcv_error;
diff --git a/net/can/Kconfig b/net/can/Kconfig
index e4ccf731a24c..af64a6f76458 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -5,7 +5,6 @@
menuconfig CAN
tristate "CAN bus subsystem support"
- select CAN_DEV
help
Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
communications protocol. Development of the CAN bus started in
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 6272326dd614..ff9c4fd7b433 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -482,6 +482,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, in
goto out_release_sock;
}
+ if (ndev->reg_state != NETREG_REGISTERED) {
+ dev_put(ndev);
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
can_ml = can_get_ml_priv(ndev);
if (!can_ml) {
dev_put(ndev);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fbf5c8001c9d..8656ab388c83 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1567,6 +1567,8 @@ int j1939_session_activate(struct j1939_session *session)
if (active) {
j1939_session_put(active);
ret = -EAGAIN;
+ } else if (priv->ndev->reg_state != NETREG_REGISTERED) {
+ ret = -ENODEV;
} else {
WARN_ON_ONCE(session->state != J1939_SESSION_NEW);
list_add_tail(&session->active_session_list_entry,
@@ -1693,8 +1695,16 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
j1939_session_timers_cancel(session);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
- if (session->transmission)
+ if (session->transmission) {
j1939_session_deactivate_activate_next(session);
+ } else if (session->state == J1939_SESSION_WAITING_ABORT) {
+ /* Force deactivation for the receiver.
+ * If we rely on the timer starting in j1939_session_cancel,
+ * a second RTS call here will cancel that timer and fail
+ * to restart it because the state is already WAITING_ABORT.
+ */
+ j1939_session_deactivate_activate_next(session);
+ }
return -EBUSY;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index be1ef7cf4204..12293363413c 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -49,8 +49,8 @@
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/can.h>
+#include <linux/can/can-ml.h>
#include <linux/can/core.h>
-#include <linux/can/dev.h> /* for can_is_canxl_dev_mtu() */
#include <linux/can/skb.h>
#include <linux/can/raw.h>
#include <net/sock.h>
@@ -892,58 +892,21 @@ static void raw_put_canxl_vcid(struct raw_sock *ro, struct sk_buff *skb)
}
}
-static inline bool raw_dev_cc_enabled(struct net_device *dev,
- struct can_priv *priv)
-{
- /* The CANXL-only mode disables error-signalling on the CAN bus
- * which is needed to send CAN CC/FD frames
- */
- if (priv)
- return !can_dev_in_xl_only_mode(priv);
-
- /* virtual CAN interfaces always support CAN CC */
- return true;
-}
-
-static inline bool raw_dev_fd_enabled(struct net_device *dev,
- struct can_priv *priv)
-{
- /* check FD ctrlmode on real CAN interfaces */
- if (priv)
- return (priv->ctrlmode & CAN_CTRLMODE_FD);
-
- /* check MTU for virtual CAN FD interfaces */
- return (READ_ONCE(dev->mtu) >= CANFD_MTU);
-}
-
-static inline bool raw_dev_xl_enabled(struct net_device *dev,
- struct can_priv *priv)
-{
- /* check XL ctrlmode on real CAN interfaces */
- if (priv)
- return (priv->ctrlmode & CAN_CTRLMODE_XL);
-
- /* check MTU for virtual CAN XL interfaces */
- return can_is_canxl_dev_mtu(READ_ONCE(dev->mtu));
-}
-
static unsigned int raw_check_txframe(struct raw_sock *ro, struct sk_buff *skb,
struct net_device *dev)
{
- struct can_priv *priv = safe_candev_priv(dev);
-
/* Classical CAN */
- if (can_is_can_skb(skb) && raw_dev_cc_enabled(dev, priv))
+ if (can_is_can_skb(skb) && can_cap_enabled(dev, CAN_CAP_CC))
return CAN_MTU;
/* CAN FD */
if (ro->fd_frames && can_is_canfd_skb(skb) &&
- raw_dev_fd_enabled(dev, priv))
+ can_cap_enabled(dev, CAN_CAP_FD))
return CANFD_MTU;
/* CAN XL */
if (ro->xl_frames && can_is_canxl_skb(skb) &&
- raw_dev_xl_enabled(dev, priv))
+ can_cap_enabled(dev, CAN_CAP_XL))
return CANXL_MTU;
return 0;
@@ -982,6 +945,12 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (!dev)
return -ENXIO;
+ /* no sending on a CAN device in read-only mode */
+ if (can_cap_enabled(dev, CAN_CAP_RO)) {
+ err = -EACCES;
+ goto put_dev;
+ }
+
skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 833e57849c1d..c9d50c0dcd33 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -2376,7 +2376,9 @@ static int process_auth_done(struct ceph_connection *con, void *p, void *end)
ceph_decode_64_safe(&p, end, global_id, bad);
ceph_decode_32_safe(&p, end, con->v2.con_mode, bad);
+
ceph_decode_32_safe(&p, end, payload_len, bad);
+ ceph_decode_need(&p, end, payload_len, bad);
dout("%s con %p global_id %llu con_mode %d payload_len %d\n",
__func__, con, global_id, con->v2.con_mode, payload_len);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index c227ececa925..fa8dd2a20f7d 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1417,7 +1417,7 @@ static int mon_handle_auth_done(struct ceph_connection *con,
if (!ret)
finish_hunting(monc);
mutex_unlock(&monc->mutex);
- return 0;
+ return ret;
}
static int mon_handle_auth_bad_method(struct ceph_connection *con,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3667319b949d..610e584524d1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1586,6 +1586,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
struct ceph_pg_pool_info *pi;
struct ceph_pg pgid, last_pgid;
struct ceph_osds up, acting;
+ bool should_be_paused;
bool is_read = t->flags & CEPH_OSD_FLAG_READ;
bool is_write = t->flags & CEPH_OSD_FLAG_WRITE;
bool force_resend = false;
@@ -1654,10 +1655,16 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
&last_pgid))
force_resend = true;
- if (t->paused && !target_should_be_paused(osdc, t, pi)) {
- t->paused = false;
+ should_be_paused = target_should_be_paused(osdc, t, pi);
+ if (t->paused && !should_be_paused) {
unpaused = true;
}
+ if (t->paused != should_be_paused) {
+ dout("%s t %p paused %d -> %d\n", __func__, t, t->paused,
+ should_be_paused);
+ t->paused = should_be_paused;
+ }
+
legacy_change = ceph_pg_compare(&t->pgid, &pgid) ||
ceph_osds_changed(&t->acting, &acting,
t->used_replica || any_change);
@@ -4281,6 +4288,9 @@ static void osd_fault(struct ceph_connection *con)
goto out_unlock;
}
+ osd->o_sparse_op_idx = -1;
+ ceph_init_sparse_read(&osd->o_sparse_read);
+
if (!reopen_osd(osd))
kick_osd_requests(osd);
maybe_request_map(osdc);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 34b3ab59602f..92a44026de29 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -241,22 +241,26 @@ static struct crush_choose_arg_map *alloc_choose_arg_map(void)
static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
{
- if (arg_map) {
- int i, j;
+ int i, j;
+
+ if (!arg_map)
+ return;
- WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
+ WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
+ if (arg_map->args) {
for (i = 0; i < arg_map->size; i++) {
struct crush_choose_arg *arg = &arg_map->args[i];
-
- for (j = 0; j < arg->weight_set_size; j++)
- kfree(arg->weight_set[j].weights);
- kfree(arg->weight_set);
+ if (arg->weight_set) {
+ for (j = 0; j < arg->weight_set_size; j++)
+ kfree(arg->weight_set[j].weights);
+ kfree(arg->weight_set);
+ }
kfree(arg->ids);
}
kfree(arg_map->args);
- kfree(arg_map);
}
+ kfree(arg_map);
}
DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
@@ -1979,11 +1983,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
sizeof(u64) + sizeof(u32), e_inval);
ceph_decode_copy(p, &fsid, sizeof(fsid));
epoch = ceph_decode_32(p);
- BUG_ON(epoch != map->epoch+1);
ceph_decode_copy(p, &modified, sizeof(modified));
new_pool_max = ceph_decode_64(p);
new_flags = ceph_decode_32(p);
+ if (epoch != map->epoch + 1)
+ goto e_inval;
+
/* full map? */
ceph_decode_32_safe(p, end, len, e_inval);
if (len > 0) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 9094c0fb8c68..ccef685023c2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -478,15 +478,21 @@ static const unsigned short netdev_lock_type[] = {
ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
+ ARPHRD_CAN, ARPHRD_MCTP,
ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
- ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
+ ARPHRD_RAWHDLC, ARPHRD_RAWIP,
+ ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
- ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
- ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
+ ARPHRD_IEEE80211_RADIOTAP,
+ ARPHRD_IEEE802154, ARPHRD_IEEE802154_MONITOR,
+ ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
+ ARPHRD_CAIF, ARPHRD_IP6GRE, ARPHRD_NETLINK, ARPHRD_6LOWPAN,
+ ARPHRD_VSOCKMON,
+ ARPHRD_VOID, ARPHRD_NONE};
static const char *const netdev_lock_name[] = {
"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -495,15 +501,21 @@ static const char *const netdev_lock_name[] = {
"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
+ "_xmit_CAN", "_xmit_MCTP",
"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
- "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
+ "_xmit_RAWHDLC", "_xmit_RAWIP",
+ "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
- "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
- "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
+ "_xmit_IEEE80211_RADIOTAP",
+ "_xmit_IEEE802154", "_xmit_IEEE802154_MONITOR",
+ "_xmit_PHONET", "_xmit_PHONET_PIPE",
+ "_xmit_CAIF", "_xmit_IP6GRE", "_xmit_NETLINK", "_xmit_6LOWPAN",
+ "_xmit_VSOCKMON",
+ "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -516,6 +528,7 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type)
if (netdev_lock_type[i] == dev_type)
return i;
/* the last key is used by default */
+ WARN_ONCE(1, "netdev_lock_pos() could not find dev_type=%u\n", dev_type);
return ARRAY_SIZE(netdev_lock_type) - 1;
}
@@ -4190,8 +4203,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
do {
if (first_n && !defer_count) {
defer_count = atomic_long_inc_return(&q->defer_count);
- if (unlikely(defer_count > READ_ONCE(q->limit))) {
- kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
+ if (unlikely(defer_count > READ_ONCE(net_hotdata.qdisc_max_burst))) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_BURST_DROP);
return NET_XMIT_DROP;
}
}
@@ -4209,7 +4222,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
ll_list = llist_del_all(&q->defer_list);
/* There is a small race because we clear defer_count not atomically
* with the prior llist_del_all(). This means defer_list could grow
- * over q->limit.
+ * over qdisc_max_burst.
*/
atomic_long_set(&q->defer_count, 0);
@@ -4241,9 +4254,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
int count = 0;
llist_for_each_entry_safe(skb, next, ll_list, ll_node) {
- prefetch(next);
- prefetch(&next->priority);
- skb_mark_not_on_list(skb);
+ if (next) {
+ prefetch(next);
+ prefetch(&next->priority);
+ skb_mark_not_on_list(skb);
+ }
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
count++;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index e9d35f49c9e7..1dae26c51ebe 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -68,6 +68,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->lwtstate = NULL;
rcuref_init(&dst->__rcuref, 1);
INIT_LIST_HEAD(&dst->rt_uncached);
+ dst->rt_uncached_list = NULL;
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index dddd5c287cf0..a6db36580817 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -17,6 +17,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.tstamp_prequeue = 1,
.max_backlog = 1000,
+ .qdisc_max_burst = 1000,
.dev_tx_weight = 64,
.dev_rx_weight = 64,
.sysctl_max_skb_frags = MAX_SKB_FRAGS,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a00808f7be6a..a56133902c0d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4636,12 +4636,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
{
struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
unsigned int tnl_hlen = skb_tnl_header_len(skb);
- unsigned int delta_truesize = 0;
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
struct sk_buff *nskb, *tmp;
int len_diff, err;
+ /* Only skb_gro_receive_list generated skbs arrive here */
+ DEBUG_NET_WARN_ON_ONCE(!(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST));
+
skb_push(skb, -skb_network_offset(skb) + offset);
/* Ensure the head is writeable before touching the shared info */
@@ -4655,8 +4657,9 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
nskb = list_skb;
list_skb = list_skb->next;
+ DEBUG_NET_WARN_ON_ONCE(nskb->sk);
+
err = 0;
- delta_truesize += nskb->truesize;
if (skb_shared(nskb)) {
tmp = skb_clone(nskb, GFP_ATOMIC);
if (tmp) {
@@ -4699,7 +4702,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
goto err_linearize;
}
- skb->truesize = skb->truesize - delta_truesize;
skb->data_len = skb->data_len - delta_len;
skb->len = skb->len - delta_len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 45c98bf524b2..a1c8b47b0d56 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3896,7 +3896,7 @@ void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
int level, int type)
{
- struct sock_exterr_skb *serr;
+ struct sock_extended_err ee;
struct sk_buff *skb;
int copied, err;
@@ -3916,8 +3916,9 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
sock_recv_timestamp(msg, sk, skb);
- serr = SKB_EXT_ERR(skb);
- put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
+ /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
+ ee = SKB_EXT_ERR(skb)->ee;
+ put_cmsg(msg, level, type, sizeof(ee), &ee);
msg->msg_flags |= MSG_ERRQUEUE;
err = copied;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8d4decb2606f..05dd55cf8b58 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -430,6 +430,13 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "qdisc_max_burst",
+ .data = &net_hotdata.qdisc_max_burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "netdev_rss_key",
.data = &netdev_rss_key,
.maxlen = sizeof(int),
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index a20efabe778f..99ede37698ac 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -367,16 +367,10 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst)
{
- struct device_node *ethernet;
- struct net_device *conduit;
struct dsa_port *cpu_dp;
cpu_dp = dsa_tree_find_first_cpu(dst);
- ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0);
- conduit = of_find_net_device_by_node(ethernet);
- of_node_put(ethernet);
-
- return conduit;
+ return cpu_dp->conduit;
}
/* Assign the default CPU port (the first one in the tree) to all ports of the
@@ -1253,14 +1247,25 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
if (ethernet) {
struct net_device *conduit;
const char *user_protocol;
+ int err;
+ rtnl_lock();
conduit = of_find_net_device_by_node(ethernet);
of_node_put(ethernet);
- if (!conduit)
+ if (!conduit) {
+ rtnl_unlock();
return -EPROBE_DEFER;
+ }
+
+ netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL);
+ put_device(&conduit->dev);
+ rtnl_unlock();
user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL);
- return dsa_port_parse_cpu(dp, conduit, user_protocol);
+ err = dsa_port_parse_cpu(dp, conduit, user_protocol);
+ if (err)
+ netdev_put(conduit, &dp->conduit_tracker);
+ return err;
}
if (link)
@@ -1393,37 +1398,30 @@ static struct device *dev_find_class(struct device *parent, char *class)
return device_find_child(parent, class, dev_is_class);
}
-static struct net_device *dsa_dev_to_net_device(struct device *dev)
-{
- struct device *d;
-
- d = dev_find_class(dev, "net");
- if (d != NULL) {
- struct net_device *nd;
-
- nd = to_net_dev(d);
- dev_hold(nd);
- put_device(d);
-
- return nd;
- }
-
- return NULL;
-}
-
static int dsa_port_parse(struct dsa_port *dp, const char *name,
struct device *dev)
{
if (!strcmp(name, "cpu")) {
struct net_device *conduit;
+ struct device *d;
+ int err;
- conduit = dsa_dev_to_net_device(dev);
- if (!conduit)
+ rtnl_lock();
+ d = dev_find_class(dev, "net");
+ if (!d) {
+ rtnl_unlock();
return -EPROBE_DEFER;
+ }
- dev_put(conduit);
+ conduit = to_net_dev(d);
+ netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL);
+ put_device(d);
+ rtnl_unlock();
- return dsa_port_parse_cpu(dp, conduit, NULL);
+ err = dsa_port_parse_cpu(dp, conduit, NULL);
+ if (err)
+ netdev_put(conduit, &dp->conduit_tracker);
+ return err;
}
if (!strcmp(name, "dsa"))
@@ -1491,6 +1489,9 @@ static void dsa_switch_release_ports(struct dsa_switch *ds)
struct dsa_vlan *v, *n;
dsa_switch_for_each_port_safe(dp, next, ds) {
+ if (dsa_port_is_cpu(dp) && dp->conduit)
+ netdev_put(dp->conduit, &dp->conduit_tracker);
+
/* These are either entries that upper layers lost track of
* (probably due to bugs), or installed through interfaces
* where one does not necessarily have to remove them, like
@@ -1635,8 +1636,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
/* Disconnect from further netdevice notifiers on the conduit,
* since netdev_uses_dsa() will now return false.
*/
- dsa_switch_for_each_cpu_port(dp, ds)
+ dsa_switch_for_each_cpu_port(dp, ds) {
dp->conduit->dsa_ptr = NULL;
+ netdev_put(dp->conduit, &dp->conduit_tracker);
+ }
rtnl_unlock();
out:
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index fa83ddade4f8..9431e305b233 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2383,7 +2383,10 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
return -ENOMEM;
WARN_ON_ONCE(!ret);
- gstrings.len = ret;
+ if (gstrings.len && gstrings.len != ret)
+ gstrings.len = 0;
+ else
+ gstrings.len = ret;
if (gstrings.len) {
data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
@@ -2509,10 +2512,13 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
- stats.n_stats = n_stats;
+ if (stats.n_stats && stats.n_stats != n_stats)
+ stats.n_stats = 0;
+ else
+ stats.n_stats = n_stats;
- if (n_stats) {
- data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (stats.n_stats) {
+ data = vzalloc(array_size(stats.n_stats, sizeof(u64)));
if (!data)
return -ENOMEM;
ops->get_ethtool_stats(dev, &stats, data);
@@ -2524,7 +2530,9 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
+ if (stats.n_stats &&
+ copy_to_user(useraddr, data,
+ array_size(stats.n_stats, sizeof(u64))))
goto out;
ret = 0;
@@ -2560,6 +2568,10 @@ static int ethtool_get_phy_stats_phydev(struct phy_device *phydev,
return -EOPNOTSUPP;
n_stats = phy_ops->get_sset_count(phydev);
+ if (stats->n_stats && stats->n_stats != n_stats) {
+ stats->n_stats = 0;
+ return 0;
+ }
ret = ethtool_vzalloc_stats_array(n_stats, data);
if (ret)
@@ -2580,6 +2592,10 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
return -EOPNOTSUPP;
n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+ if (stats->n_stats && stats->n_stats != n_stats) {
+ stats->n_stats = 0;
+ return 0;
+ }
ret = ethtool_vzalloc_stats_array(n_stats, data);
if (ret)
@@ -2616,7 +2632,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
}
useraddr += sizeof(stats);
- if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64))))
+ if (stats.n_stats &&
+ copy_to_user(useraddr, data,
+ array_size(stats.n_stats, sizeof(u64))))
ret = -EFAULT;
out:
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 1d33a4675a48..b989456fc4c5 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -126,7 +126,8 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
}
out_complete:
- handshake_complete(req, -EIO, NULL);
+ if (req)
+ handshake_complete(req, -EIO, NULL);
out_status:
trace_handshake_cmd_accept_err(net, req, NULL, err);
return err;
diff --git a/net/handshake/request.c b/net/handshake/request.c
index 274d2c89b6b2..6b7e3e0bf399 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -276,6 +276,8 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req,
out_unlock:
spin_unlock(&hn->hn_lock);
out_err:
+ /* Restore original destructor so socket teardown still runs on failure */
+ req->hr_sk->sk_destruct = req->hr_odestruct;
trace_handshake_submit_err(net, req, req->hr_sk, ret);
handshake_req_destroy(req);
return ret;
@@ -324,7 +326,11 @@ bool handshake_req_cancel(struct sock *sk)
hn = handshake_pernet(net);
if (hn && remove_pending(hn, req)) {
- /* Request hadn't been accepted */
+ /* Request hadn't been accepted - mark cancelled */
+ if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {
+ trace_handshake_cancel_busy(net, req, sk);
+ return false;
+ }
goto out_true;
}
if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 339f0d220212..aefc9b6936ba 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -205,6 +205,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
__pskb_copy(frame->skb_prp,
skb_headroom(frame->skb_prp),
GFP_ATOMIC);
+ if (!frame->skb_std)
+ return NULL;
} else {
/* Unexpected */
WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7f3863daaa40..c8c3e1713c0e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -564,7 +564,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- arp = skb_put(skb, arp_hdr_len(dev));
+ skb_put(skb, arp_hdr_len(dev));
skb->dev = dev;
skb->protocol = htons(ETH_P_ARP);
if (!src_hw)
@@ -572,12 +572,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
if (!dest_hw)
dest_hw = dev->broadcast;
- /*
- * Fill the device header for the ARP frame
+ /* Fill the device header for the ARP frame.
+ * Note: skb->head can be changed.
*/
if (dev_hard_header(skb, dev, ptype, dest_hw, src_hw, skb->len) < 0)
goto out;
+ arp = arp_hdr(skb);
/*
* Fill out the arp protocol part.
*
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 05828d4cb6cd..abd77162f5e7 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -122,8 +122,8 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x,
- XFRM_MODE_SKB_CB(skb)->protocol);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x, xo->proto);
__be16 type = inner_mode->family == AF_INET6 ? htons(ETH_P_IPV6)
: htons(ETH_P_IP);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a5f3c8459758..0caf38e44c73 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2167,8 +2167,8 @@ void fib_select_multipath(struct fib_result *res, int hash,
{
struct fib_info *fi = res->fi;
struct net *net = fi->fib_net;
- bool found = false;
bool use_neigh;
+ int score = -1;
__be32 saddr;
if (unlikely(res->fi->nh)) {
@@ -2180,7 +2180,7 @@ void fib_select_multipath(struct fib_result *res, int hash,
saddr = fl4 ? fl4->saddr : 0;
change_nexthops(fi) {
- int nh_upper_bound;
+ int nh_upper_bound, nh_score = 0;
/* Nexthops without a carrier are assigned an upper bound of
* minus one when "ignore_routes_with_linkdown" is set.
@@ -2190,24 +2190,18 @@ void fib_select_multipath(struct fib_result *res, int hash,
(use_neigh && !fib_good_nh(nexthop_nh)))
continue;
- if (!found) {
+ if (saddr && nexthop_nh->nh_saddr == saddr)
+ nh_score += 2;
+ if (hash <= nh_upper_bound)
+ nh_score++;
+ if (score < nh_score) {
res->nh_sel = nhsel;
res->nhc = &nexthop_nh->nh_common;
- found = !saddr || nexthop_nh->nh_saddr == saddr;
+ if (nh_score == 3 || (!saddr && nh_score == 1))
+ return;
+ score = nh_score;
}
- if (hash > nh_upper_bound)
- continue;
-
- if (!saddr || nexthop_nh->nh_saddr == saddr) {
- res->nh_sel = nhsel;
- res->nhc = &nexthop_nh->nh_common;
- return;
- }
-
- if (found)
- return;
-
} endfor_nexthops(fi);
}
#endif
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 59a6f0a9638f..7e2c17fec3fc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2053,10 +2053,11 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
continue;
}
- /* Do not flush error routes if network namespace is
- * not being dismantled
+ /* When not flushing the entire table, skip error
+ * routes that are not marked for deletion.
*/
- if (!flush_all && fib_props[fa->fa_type].error) {
+ if (!flush_all && fib_props[fa->fa_type].error &&
+ !(fi->fib_flags & RTNH_F_DEAD)) {
slen = fa->fa_slen;
continue;
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 025895eb6ec5..4e6d7467ed44 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -218,6 +218,41 @@ static int __init inet_frag_wq_init(void)
pure_initcall(inet_frag_wq_init);
+void fqdir_pre_exit(struct fqdir *fqdir)
+{
+ struct inet_frag_queue *fq;
+ struct rhashtable_iter hti;
+
+ /* Prevent creation of new frags.
+ * Pairs with READ_ONCE() in inet_frag_find().
+ */
+ WRITE_ONCE(fqdir->high_thresh, 0);
+
+ /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+ * and ip6frag_expire_frag_queue().
+ */
+ WRITE_ONCE(fqdir->dead, true);
+
+ rhashtable_walk_enter(&fqdir->rhashtable, &hti);
+ rhashtable_walk_start(&hti);
+
+ while ((fq = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(fq)) {
+ if (PTR_ERR(fq) != -EAGAIN)
+ break;
+ continue;
+ }
+ spin_lock_bh(&fq->lock);
+ if (!(fq->flags & INET_FRAG_COMPLETE))
+ inet_frag_queue_flush(fq, 0);
+ spin_unlock_bh(&fq->lock);
+ }
+
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+}
+EXPORT_SYMBOL(fqdir_pre_exit);
+
void fqdir_exit(struct fqdir *fqdir)
{
INIT_WORK(&fqdir->destroy_work, fqdir_work_fn);
@@ -263,8 +298,8 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
kmem_cache_free(f->frags_cachep, q);
}
-unsigned int inet_frag_rbtree_purge(struct rb_root *root,
- enum skb_drop_reason reason)
+static unsigned int
+inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason)
{
struct rb_node *p = rb_first(root);
unsigned int sum = 0;
@@ -284,7 +319,17 @@ unsigned int inet_frag_rbtree_purge(struct rb_root *root,
}
return sum;
}
-EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
+void inet_frag_queue_flush(struct inet_frag_queue *q,
+ enum skb_drop_reason reason)
+{
+ unsigned int sum;
+
+ reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
+ sum = inet_frag_rbtree_purge(&q->rb_fragments, reason);
+ sub_frag_mem_limit(q->fqdir, sum);
+}
+EXPORT_SYMBOL(inet_frag_queue_flush);
void inet_frag_destroy(struct inet_frag_queue *q)
{
@@ -327,7 +372,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
- /* One reference for the timer, one for the hash table. */
+ /* One reference for the timer, one for the hash table.
+ * We never take any extra references, only decrement this field.
+ */
refcount_set(&q->refcnt, 2);
return q;
@@ -441,6 +488,8 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
}
FRAG_CB(skb)->ip_defrag_offset = offset;
+ if (offset)
+ nf_reset_ct(skb);
return IPFRAG_OK;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f7012479713b..56b0f738d2f2 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -134,11 +134,6 @@ static void ip_expire(struct timer_list *t)
net = qp->q.fqdir->net;
rcu_read_lock();
-
- /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
- if (READ_ONCE(qp->q.fqdir->dead))
- goto out_rcu_unlock;
-
spin_lock(&qp->q.lock);
if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -146,6 +141,13 @@ static void ip_expire(struct timer_list *t)
qp->q.flags |= INET_FRAG_DROP;
inet_frag_kill(&qp->q, &refs);
+
+ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(qp->q.fqdir->dead)) {
+ inet_frag_queue_flush(&qp->q, 0);
+ goto out;
+ }
+
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
@@ -240,16 +242,10 @@ static int ip_frag_too_far(struct ipq *qp)
static int ip_frag_reinit(struct ipq *qp)
{
- unsigned int sum_truesize = 0;
-
- if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
- refcount_inc(&qp->q.refcnt);
+ if (!mod_timer_pending(&qp->q.timer, jiffies + qp->q.fqdir->timeout))
return -ETIMEDOUT;
- }
- sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments,
- SKB_DROP_REASON_FRAG_TOO_FAR);
- sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
+ inet_frag_queue_flush(&qp->q, SKB_DROP_REASON_FRAG_TOO_FAR);
qp->q.flags = 0;
qp->q.len = 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 761a53c6a89a..e13244729ad8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -330,6 +330,10 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!tun_dst)
return PACKET_REJECT;
+ /* MUST set options_len before referencing options */
+ info = &tun_dst->u.tun_info;
+ info->options_len = sizeof(*md);
+
/* skb can be uncloned in __iptunnel_pull_header, so
* old pkt_md is no longer valid and we need to reset
* it
@@ -344,10 +348,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
ERSPAN_V2_MDSIZE);
- info = &tun_dst->u.tun_info;
__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
info->key.tun_flags);
- info->options_len = sizeof(*md);
}
skb_reset_mac_header(skb);
@@ -889,10 +891,17 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
const void *daddr, const void *saddr, unsigned int len)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct iphdr *iph;
struct gre_base_hdr *greh;
+ struct iphdr *iph;
+ int needed;
+
+ needed = t->hlen + sizeof(*iph);
+ if (skb_headroom(skb) < needed &&
+ pskb_expand_head(skb, HH_DATA_ALIGN(needed - skb_headroom(skb)),
+ 0, GFP_ATOMIC))
+ return -needed;
- iph = skb_push(skb, t->hlen + sizeof(*iph));
+ iph = skb_push(skb, needed);
greh = (struct gre_base_hdr *)(iph+1);
greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
greh->protocol = htons(type);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 158a30ae7c5f..50d0f5fe4e4c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1281,7 +1281,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
-int ip_tunnel_init(struct net_device *dev)
+int __ip_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
@@ -1308,10 +1308,9 @@ int ip_tunnel_init(struct net_device *dev)
if (tunnel->collect_md)
netif_keep_dst(dev);
- netdev_lockdep_set_classes(dev);
return 0;
}
-EXPORT_SYMBOL_GPL(ip_tunnel_init);
+EXPORT_SYMBOL_GPL(__ip_tunnel_init);
void ip_tunnel_uninit(struct net_device *dev)
{
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index ad56588107cc..cfbd563498e8 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -828,10 +828,8 @@ out:
out_free:
if (free)
kfree(ipc.opt);
- if (!err) {
- icmp_out_count(sock_net(sk), user_icmph.type);
+ if (!err)
return len;
- }
return err;
do_confirm:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b549d6a57307..11d990703d31 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1537,9 +1537,9 @@ void rt_add_uncached_list(struct rtable *rt)
void rt_del_uncached_list(struct rtable *rt)
{
- if (!list_empty(&rt->dst.rt_uncached)) {
- struct uncached_list *ul = rt->dst.rt_uncached_list;
+ struct uncached_list *ul = rt->dst.rt_uncached_list;
+ if (ul) {
spin_lock_bh(&ul->lock);
list_del_init(&rt->dst.rt_uncached);
spin_unlock_bh(&ul->lock);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f035440c475a..d5319ebe2452 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2652,10 +2652,8 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (sk->sk_state == TCP_LISTEN)
goto out;
- if (tp->recvmsg_inq) {
+ if (tp->recvmsg_inq)
*cmsg_flags = TCP_CMSG_INQ;
- msg->msg_get_inq = 1;
- }
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
/* Urgent data needs to be handled specially. */
@@ -2929,10 +2927,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
release_sock(sk);
- if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
+ if ((cmsg_flags | msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
- if (msg->msg_get_inq) {
+ if ((cmsg_flags & TCP_CMSG_INQ) | msg->msg_get_inq) {
msg->msg_inq = tcp_inq_hint(sk);
if (cmsg_flags & TCP_CMSG_INQ)
put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ffe074cb5865..ee63af0ef42c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1851,6 +1851,7 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
sk_peek_offset_bwd(sk, len);
if (!skb_shared(skb)) {
+ skb_orphan(skb);
skb_attempt_defer_free(skb);
return;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b66217d1b2f8..27ab9d7adc64 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3112,12 +3112,12 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
- ipv6_del_addr(ifp);
-
if (!(ifp->flags & IFA_F_TEMPORARY) &&
(ifp->flags & IFA_F_MANAGETEMPADDR))
delete_tempaddrs(idev, ifp);
+ ipv6_del_addr(ifp);
+
addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index df1986973430..21f6ed126253 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1342,7 +1342,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
/* At this point new_end aligns to 4n, so (new_end & 4) pads to 8n */
pad = ((new_end & 4) + (end & 7)) & 7;
len_delta = new_end - (int)end + pad;
- ret_val = skb_cow(skb, skb_headroom(skb) + len_delta);
+ ret_val = skb_cow(skb,
+ skb_headroom(skb) + (len_delta > 0 ? len_delta : 0));
if (ret_val < 0)
return ret_val;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 22410243ebe8..22895521a57d 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -158,8 +158,8 @@ static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x,
- XFRM_MODE_SKB_CB(skb)->protocol);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x, xo->proto);
__be16 type = inner_mode->family == AF_INET ? htons(ETH_P_IP)
: htons(ETH_P_IPV6);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c82a75510c0e..d19d86ed4376 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -535,6 +535,10 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (!tun_dst)
return PACKET_REJECT;
+ /* MUST set options_len before referencing options */
+ info = &tun_dst->u.tun_info;
+ info->options_len = sizeof(*md);
+
/* skb can be uncloned in __iptunnel_pull_header, so
* old pkt_md is no longer valid and we need to reset
* it
@@ -543,7 +547,6 @@ static int ip6erspan_rcv(struct sk_buff *skb,
skb_network_header_len(skb);
pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
sizeof(*ershdr));
- info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
md->version = ver;
md2 = &md->u.md2;
@@ -551,7 +554,6 @@ static int ip6erspan_rcv(struct sk_buff *skb,
ERSPAN_V2_MDSIZE);
__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
info->key.tun_flags);
- info->options_len = sizeof(*md);
ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
@@ -1366,9 +1368,16 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
{
struct ip6_tnl *t = netdev_priv(dev);
struct ipv6hdr *ipv6h;
+ int needed;
__be16 *p;
- ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
+ needed = t->hlen + sizeof(*ipv6h);
+ if (skb_headroom(skb) < needed &&
+ pskb_expand_head(skb, HH_DATA_ALIGN(needed - skb_headroom(skb)),
+ 0, GFP_ATOMIC))
+ return -needed;
+
+ ipv6h = skb_push(skb, needed);
ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
t->fl.u.ip6.flowlabel,
true, &t->fl.u.ip6));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6405072050e0..c1f39735a236 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -844,7 +844,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_reset_network_header(skb);
- if (!pskb_inet_may_pull(skb)) {
+ if (skb_vlan_inet_prepare(skb, true)) {
DEV_STATS_INC(tunnel->dev, rx_length_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aee6a10b112a..e3a260a5564b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -148,9 +148,9 @@ void rt6_uncached_list_add(struct rt6_info *rt)
void rt6_uncached_list_del(struct rt6_info *rt)
{
- if (!list_empty(&rt->dst.rt_uncached)) {
- struct uncached_list *ul = rt->dst.rt_uncached_list;
+ struct uncached_list *ul = rt->dst.rt_uncached_list;
+ if (ul) {
spin_lock_bh(&ul->lock);
list_del_init(&rt->dst.rt_uncached);
spin_unlock_bh(&ul->lock);
@@ -1470,7 +1470,18 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
p = this_cpu_ptr(res->nh->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
- BUG_ON(prev);
+ if (unlikely(prev)) {
+ /*
+ * Another task on this CPU already installed a pcpu_rt.
+ * This can happen on PREEMPT_RT where preemption is possible.
+ * Free our allocation and return the existing one.
+ */
+ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT));
+
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ return prev;
+ }
if (res->f6i->fib6_destroying) {
struct fib6_info *from;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b51c2c8584ae..c81091a5cc3a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1345,7 +1345,6 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
size = sizeof(*new) + new_head_len + new_tail_len;
- /* new or old multiple BSSID elements? */
if (params->mbssid_ies) {
mbssid = params->mbssid_ies;
size += struct_size(new->mbssid_ies, elem, mbssid->cnt);
@@ -1355,15 +1354,6 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
}
size += ieee80211_get_mbssid_beacon_len(mbssid, rnr,
mbssid->cnt);
- } else if (old && old->mbssid_ies) {
- mbssid = old->mbssid_ies;
- size += struct_size(new->mbssid_ies, elem, mbssid->cnt);
- if (old && old->rnr_ies) {
- rnr = old->rnr_ies;
- size += struct_size(new->rnr_ies, elem, rnr->cnt);
- }
- size += ieee80211_get_mbssid_beacon_len(mbssid, rnr,
- mbssid->cnt);
}
new = kzalloc(size, GFP_KERNEL);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index d0bfb1216401..d8c5f11afc15 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -90,6 +90,9 @@ next_interface:
/* next (or first) interface */
iter->sdata = list_prepare_entry(iter->sdata, &local->interfaces, list);
list_for_each_entry_continue(iter->sdata, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(iter->sdata))
+ continue;
+
/* AP_VLAN has a chanctx pointer but follows AP */
if (iter->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
continue;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 4f04d95c19d4..7b0aa24c1f97 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1251,7 +1251,7 @@ configure_monitor:
if (!creator_sdata) {
struct ieee80211_sub_if_data *other;
- list_for_each_entry(other, &local->mon_list, list) {
+ list_for_each_entry_rcu(other, &local->mon_list, u.mntr.list) {
if (!other->vif.bss_conf.mu_mimo_owner)
continue;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e56ad4b9330f..ad53dedd929c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1126,7 +1126,10 @@ again:
while (!ieee80211_chandef_usable(sdata, &chanreq->oper,
IEEE80211_CHAN_DISABLED)) {
- if (WARN_ON(chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT)) {
+ if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT) {
+ link_id_info(sdata, link_id,
+ "unusable channel (%d MHz) for connection\n",
+ chanreq->oper.chan->center_freq);
ret = -EINVAL;
goto free;
}
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index a5d4358f122a..ebb4f4d88c23 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -47,6 +47,9 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta;
int band;
+ if (!ifocb->joined)
+ return;
+
/* XXX: Consider removing the least recently used entry and
* allow new one to be added.
*/
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6a1899512d07..e0ccd9749853 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3511,6 +3511,11 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
rx->skb->len < IEEE80211_MIN_ACTION_SIZE)
return RX_DROP_U_RUNT_ACTION;
+ /* Drop non-broadcast Beacon frames */
+ if (ieee80211_is_beacon(mgmt->frame_control) &&
+ !is_broadcast_ether_addr(mgmt->da))
+ return RX_DROP;
+
if (rx->sdata->vif.type == NL80211_IFTYPE_AP &&
ieee80211_is_beacon(mgmt->frame_control) &&
!(rx->flags & IEEE80211_RX_BEACON_REPORTED)) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f4d3b67fda06..1a995bc301b1 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1533,6 +1533,10 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc)
}
}
+ sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
+ if (sinfo)
+ sta_set_sinfo(sta, sinfo, true);
+
if (sta->uploaded) {
ret = drv_sta_state(local, sdata, sta, IEEE80211_STA_NONE,
IEEE80211_STA_NOTEXIST);
@@ -1541,9 +1545,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc)
sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
- sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
- if (sinfo)
- sta_set_sinfo(sta, sinfo, true);
cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
kfree(sinfo);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9d8b0a25f73c..1b55e8340413 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2397,6 +2397,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
if (chanctx_conf)
chandef = &chanctx_conf->def;
+ else if (local->emulate_chanctx)
+ chandef = &local->hw.conf.chandef;
else
goto fail_rcu;
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index 20328920f6ed..be71fc9b4638 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -4,7 +4,7 @@ config MPTCP
depends on INET
select SKB_EXTENSIONS
select CRYPTO_LIB_SHA256
- select CRYPTO
+ select CRYPTO_LIB_UTILS
help
Multipath TCP (MPTCP) connections send and receive data over multiple
subflows in order to utilize multiple network paths. Each subflow
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index f24ae7d40e88..43df4293f58b 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -408,6 +408,16 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
*/
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
+ if (unlikely(subflow_simultaneous_connect(sk))) {
+ WARN_ON_ONCE(!mptcp_try_fallback(sk, MPTCP_MIB_SIMULTCONNFALLBACK));
+
+ /* Ensure mptcp_finish_connect() will not process the
+ * MPC handshake.
+ */
+ subflow->request_mptcp = 0;
+ return false;
+ }
+
opts->suboptions = OPTION_MPTCP_MPC_SYN;
opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d5b383870f79..7aa42de9c47b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
}
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
+ MPTCP_PM_ADDR_FLAGS_MASK;
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e212c1374bd0..f505b780f713 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1623,7 +1623,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
struct mptcp_sendmsg_info info = {
.flags = flags,
};
- bool do_check_data_fin = false;
+ bool copied = false;
int push_count = 1;
while (mptcp_send_head(sk) && (push_count > 0)) {
@@ -1665,7 +1665,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
push_count--;
continue;
}
- do_check_data_fin = true;
+ copied = true;
}
}
}
@@ -1674,11 +1674,14 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
if (ssk)
mptcp_push_release(ssk, &info);
- /* ensure the rtx timer is running */
- if (!mptcp_rtx_timer_pending(sk))
- mptcp_reset_rtx_timer(sk);
- if (do_check_data_fin)
+ /* Avoid scheduling the rtx timer if no data has been pushed; the timer
+ * will be updated on positive acks by __mptcp_cleanup_una().
+ */
+ if (copied) {
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
mptcp_check_send_data_fin(sk);
+ }
}
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
@@ -2464,10 +2467,10 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
*/
static void __mptcp_subflow_disconnect(struct sock *ssk,
struct mptcp_subflow_context *subflow,
- unsigned int flags)
+ bool fastclosing)
{
if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
- subflow->send_fastclose) {
+ fastclosing) {
/* The MPTCP code never wait on the subflow sockets, TCP-level
* disconnect should never fail
*/
@@ -2535,7 +2538,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
- __mptcp_subflow_disconnect(ssk, subflow, flags);
+ __mptcp_subflow_disconnect(ssk, subflow, msk->fastclosing);
release_sock(ssk);
goto out;
@@ -2766,10 +2769,13 @@ static void __mptcp_retrans(struct sock *sk)
/*
* make the whole retrans decision, xmit, disallow
- * fallback atomic
+ * fallback atomic, note that we can't retrans even
+ * when an infinite fallback is in progress, i.e. new
+ * subflows are disallowed.
*/
spin_lock_bh(&msk->fallback_lock);
- if (__mptcp_check_fallback(msk)) {
+ if (__mptcp_check_fallback(msk) ||
+ !msk->allow_subflows) {
spin_unlock_bh(&msk->fallback_lock);
release_sock(ssk);
goto clear_scheduled;
@@ -2878,6 +2884,7 @@ static void mptcp_do_fastclose(struct sock *sk)
mptcp_set_state(sk, TCP_CLOSE);
mptcp_backlog_purge(sk);
+ msk->fastclosing = 1;
/* Explicitly send the fastclose reset as need */
if (__mptcp_check_fallback(msk))
@@ -3412,6 +3419,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
msk->rcvspace_init = 0;
+ msk->fastclosing = 0;
/* for fallback's sake */
WRITE_ONCE(msk->ack_seq, 0);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 9c0d17876b22..66e973500791 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -320,7 +320,8 @@ struct mptcp_sock {
fastopening:1,
in_accept_queue:1,
free_first:1,
- rcvspace_init:1;
+ rcvspace_init:1,
+ fastclosing:1;
u32 notsent_lowat;
int keepalive_cnt;
int keepalive_idle;
@@ -1337,10 +1338,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- return (1 << sk->sk_state) &
- (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
- is_active_ssk(subflow) &&
- !subflow->conn_finished;
+ /* Note that the sk state implies !subflow->conn_finished. */
+ return sk->sk_state == TCP_SYN_RECV && is_active_ssk(subflow);
}
#ifdef CONFIG_SYN_COOKIES
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 86ce58ae533d..96d54cb2cd93 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1878,12 +1878,6 @@ static void subflow_state_change(struct sock *sk)
__subflow_state_change(sk);
- if (subflow_simultaneous_connect(sk)) {
- WARN_ON_ONCE(!mptcp_try_fallback(sk, MPTCP_MIB_SIMULTCONNFALLBACK));
- subflow->conn_finished = 1;
- mptcp_propagate_state(parent, sk, subflow, NULL);
- }
-
/* as recvmsg() does not acquire the subflow socket for ssk selection
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3162ce3c2640..64c697212578 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -408,6 +408,9 @@ err_put:
return -1;
err_unreach:
+ if (!skb->dev)
+ skb->dev = skb_dst(skb)->dev;
+
dst_link_failure(skb);
return -1;
}
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index f1be4dd5cf85..8487808c8761 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -172,14 +172,14 @@ static int __nf_conncount_add(struct net *net,
struct nf_conn *found_ct;
unsigned int collect = 0;
bool refcounted = false;
+ int err = 0;
if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
return -ENOENT;
if (ct && nf_ct_is_confirmed(ct)) {
- if (refcounted)
- nf_ct_put(ct);
- return -EEXIST;
+ err = -EEXIST;
+ goto out_put;
}
if ((u32)jiffies == list->last_gc)
@@ -229,14 +229,19 @@ static int __nf_conncount_add(struct net *net,
nf_ct_put(found_ct);
}
+ list->last_gc = (u32)jiffies;
add_new_node:
- if (WARN_ON_ONCE(list->count > INT_MAX))
- return -EOVERFLOW;
+ if (WARN_ON_ONCE(list->count > INT_MAX)) {
+ err = -EOVERFLOW;
+ goto out_put;
+ }
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return -ENOMEM;
+ if (conn == NULL) {
+ err = -ENOMEM;
+ goto out_put;
+ }
conn->tuple = tuple;
conn->zone = *zone;
@@ -244,12 +249,11 @@ add_new_node:
conn->jiffies32 = (u32)jiffies;
list_add_tail(&conn->node, &list->head);
list->count++;
- list->last_gc = (u32)jiffies;
out_put:
if (refcounted)
nf_ct_put(ct);
- return 0;
+ return err;
}
int nf_conncount_add_skb(struct net *net,
@@ -456,11 +460,10 @@ restart:
rb_link_node_rcu(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
-
- if (refcounted)
- nf_ct_put(ct);
}
out_unlock:
+ if (refcounted)
+ nf_ct_put(ct);
spin_unlock_bh(&nf_conncount_locks[hash]);
return count;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0b95f226f211..d1f8eb725d42 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2487,6 +2487,7 @@ void nf_conntrack_cleanup_net(struct net *net)
void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
{
struct nf_ct_iter_data iter_data = {};
+ unsigned long start = jiffies;
struct net *net;
int busy;
@@ -2507,6 +2508,8 @@ i_see_dead_people:
busy = 1;
}
if (busy) {
+ DEBUG_NET_WARN_ONCE(time_after(jiffies, start + 60 * HZ),
+ "conntrack cleanup blocked for 60s");
schedule();
goto i_see_dead_people;
}
diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
index f0984cf69a09..eb24fe2715dc 100644
--- a/net/netfilter/nf_flow_table_path.c
+++ b/net/netfilter/nf_flow_table_path.c
@@ -250,6 +250,9 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
nft_dev_path_info(&stack, &info, ha, &ft->data);
+ if (info.outdev)
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
+
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
return;
@@ -269,7 +272,6 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
route->tuple[!dir].in.num_encaps = info.num_encaps;
route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
- route->tuple[dir].out.ifindex = info.outdev->ifindex;
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 74cef8bf554c..62cf6a30875e 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -89,7 +89,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
if (pf == NFPROTO_UNSPEC) {
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
if (rcu_access_pointer(loggers[i][logger->type])) {
- ret = -EEXIST;
+ ret = -EBUSY;
goto unlock;
}
}
@@ -97,7 +97,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
rcu_assign_pointer(loggers[i][logger->type], logger);
} else {
if (rcu_access_pointer(loggers[pf][logger->type])) {
- ret = -EEXIST;
+ ret = -EBUSY;
goto unlock;
}
rcu_assign_pointer(loggers[pf][logger->type], logger);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 78a61dac4ade..e6b24586d2fe 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -294,25 +294,13 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
ct = nf_ct_tuplehash_to_ctrack(thash);
- /* NB: IP_CT_DIR_ORIGINAL should be impossible because
- * nf_nat_used_tuple() handles origin collisions.
- *
- * Handle remote chance other CPU confirmed its ct right after.
- */
- if (thash->tuple.dst.dir != IP_CT_DIR_REPLY)
- goto out;
-
/* clashing connection subject to NAT? Retry with new tuple. */
if (READ_ONCE(ct->status) & uses_nat)
goto out;
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) &&
- nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) {
+ &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple))
taken = false;
- goto out;
- }
out:
nf_ct_put(ct);
return taken;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f3de2f9bbebf..729a92781a1a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -123,6 +123,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
table->validate_state = new_validate_state;
}
+
+static bool nft_chain_vstate_valid(const struct nft_ctx *ctx,
+ const struct nft_chain *chain)
+{
+ const struct nft_base_chain *base_chain;
+ enum nft_chain_types type;
+ u8 hooknum;
+
+ if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain)))
+ return false;
+
+ base_chain = nft_base_chain(ctx->chain);
+ hooknum = base_chain->ops.hooknum;
+ type = base_chain->type->type;
+
+ /* chain is already validated for this call depth */
+ if (chain->vstate.depth >= ctx->level &&
+ chain->vstate.hook_mask[type] & BIT(hooknum))
+ return true;
+
+ return false;
+}
+
static void nf_tables_trans_destroy_work(struct work_struct *w);
static void nft_trans_gc_work(struct work_struct *work);
@@ -4079,6 +4102,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
+static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ const struct nft_base_chain *base_chain;
+ enum nft_chain_types type;
+ u8 hooknum;
+
+ /* ctx->chain must hold the calling base chain. */
+ if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) {
+ memset(&chain->vstate, 0, sizeof(chain->vstate));
+ return;
+ }
+
+ base_chain = nft_base_chain(ctx->chain);
+ hooknum = base_chain->ops.hooknum;
+ type = base_chain->type->type;
+
+ BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX);
+
+ chain->vstate.hook_mask[type] |= BIT(hooknum);
+ if (chain->vstate.depth < ctx->level)
+ chain->vstate.depth = ctx->level;
+}
+
/** nft_chain_validate - loop detection and hook validation
*
* @ctx: context containing call depth and base chain
@@ -4088,15 +4134,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
* and set lookups until either the jump limit is hit or all reachable
* chains have been validated.
*/
-int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
+int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
{
struct nft_expr *expr, *last;
struct nft_rule *rule;
int err;
+ BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255);
if (ctx->level == NFT_JUMP_STACK_SIZE)
return -EMLINK;
+ if (ctx->level > 0) {
+ /* jumps to base chains are not allowed. */
+ if (nft_is_base_chain(chain))
+ return -ELOOP;
+
+ if (nft_chain_vstate_valid(ctx, chain))
+ return 0;
+ }
+
list_for_each_entry(rule, &chain->rules, list) {
if (fatal_signal_pending(current))
return -EINTR;
@@ -4115,8 +4171,11 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
+
+ cond_resched();
}
+ nft_chain_vstate_update(ctx, chain);
return 0;
}
EXPORT_SYMBOL_GPL(nft_chain_validate);
@@ -4128,7 +4187,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
.net = net,
.family = table->family,
};
- int err;
+ int err = 0;
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_base_chain(chain))
@@ -4137,12 +4196,14 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
ctx.chain = chain;
err = nft_chain_validate(&ctx, chain);
if (err < 0)
- return err;
-
- cond_resched();
+ goto err;
}
- return 0;
+err:
+ list_for_each_entry(chain, &table->chains, list)
+ memset(&chain->vstate, 0, sizeof(chain->vstate));
+
+ return err;
}
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
@@ -4378,7 +4439,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
if (!nft_use_inc(&chain->use)) {
err = -EMFILE;
- goto err_release_rule;
+ goto err_destroy_flow;
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
@@ -4428,6 +4489,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
err_destroy_flow_rule:
nft_use_dec_restore(&chain->use);
+err_destroy_flow:
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
@@ -11676,21 +11738,10 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
enum nft_data_types type,
unsigned int len)
{
- int err;
-
switch (reg) {
case NFT_REG_VERDICT:
if (type != NFT_DATA_VERDICT)
return -EINVAL;
-
- if (data != NULL &&
- (data->verdict.code == NFT_GOTO ||
- data->verdict.code == NFT_JUMP)) {
- err = nft_chain_validate(ctx, data->verdict.chain);
- if (err < 0)
- return err;
- }
-
break;
default:
if (type != NFT_DATA_VALUE)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 112fe46788b6..6d77a5f0088a 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1317,8 +1317,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
dup_end = dup_key;
- if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
- !memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
+ if (!memcmp(start, dup_key->data, set->klen) &&
+ !memcmp(end, dup_end->data, set->klen)) {
*elem_priv = &dup->priv;
return -EEXIST;
}
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 5d3e51825985..4d3e5a31b412 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -48,7 +48,7 @@ static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
struct tcphdr *_tcph,
struct synproxy_options *opts)
{
- struct nf_synproxy_info info = priv->info;
+ struct nf_synproxy_info info = READ_ONCE(priv->info);
struct net *net = nft_net(pkt);
struct synproxy_net *snet = synproxy_pernet(net);
struct sk_buff *skb = pkt->skb;
@@ -79,7 +79,7 @@ static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
struct tcphdr *_tcph,
struct synproxy_options *opts)
{
- struct nf_synproxy_info info = priv->info;
+ struct nf_synproxy_info info = READ_ONCE(priv->info);
struct net *net = nft_net(pkt);
struct synproxy_net *snet = synproxy_pernet(net);
struct sk_buff *skb = pkt->skb;
@@ -340,7 +340,7 @@ static void nft_synproxy_obj_update(struct nft_object *obj,
struct nft_synproxy *newpriv = nft_obj_data(newobj);
struct nft_synproxy *priv = nft_obj_data(obj);
- priv->info = newpriv->info;
+ WRITE_ONCE(priv->info, newpriv->info);
}
static struct nft_object_type nft_synproxy_obj_type;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 90b7630421c4..48105ea3df15 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1764,7 +1764,7 @@ EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
int xt_register_template(const struct xt_table *table,
int (*table_init)(struct net *net))
{
- int ret = -EEXIST, af = table->af;
+ int ret = -EBUSY, af = table->af;
struct xt_template *t;
mutex_lock(&xt[af].mutex);
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 5e531394a724..2b3cbceb0b52 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -43,8 +43,10 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
frontlen = skb_headroom(skb);
while (skb->len > 0) {
- if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL)
+ if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) {
+ kfree_skb(skb);
return;
+ }
skb_reserve(skbn, frontlen);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index ae1c842f9c64..82f023f37754 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1154,6 +1154,7 @@ EXPORT_SYMBOL(nfc_register_device);
void nfc_unregister_device(struct nfc_dev *dev)
{
int rc;
+ struct rfkill *rfk = NULL;
pr_debug("dev_name=%s\n", dev_name(&dev->dev));
@@ -1164,13 +1165,17 @@ void nfc_unregister_device(struct nfc_dev *dev)
device_lock(&dev->dev);
if (dev->rfkill) {
- rfkill_unregister(dev->rfkill);
- rfkill_destroy(dev->rfkill);
+ rfk = dev->rfkill;
dev->rfkill = NULL;
}
dev->shutting_down = true;
device_unlock(&dev->dev);
+ if (rfk) {
+ rfkill_unregister(rfk);
+ rfkill_destroy(rfk);
+ }
+
if (dev->ops->check_presence) {
timer_delete_sync(&dev->check_pres_timer);
cancel_work_sync(&dev->check_pres_work);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 1cb4f97335d8..2d536901309e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2802,13 +2802,20 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
return err;
}
-static bool validate_push_nsh(const struct nlattr *attr, bool log)
+static bool validate_push_nsh(const struct nlattr *a, bool log)
{
+ struct nlattr *nsh_key = nla_data(a);
struct sw_flow_match match;
struct sw_flow_key key;
+ /* There must be one and only one NSH header. */
+ if (!nla_ok(nsh_key, nla_len(a)) ||
+ nla_total_size(nla_len(nsh_key)) != nla_len(a) ||
+ nla_type(nsh_key) != OVS_KEY_ATTR_NSH)
+ return false;
+
ovs_match_init(&match, &key, true, NULL);
- return !nsh_key_put_from_nlattr(attr, &match, false, true, log);
+ return !nsh_key_put_from_nlattr(nsh_key, &match, false, true, log);
}
/* Return false if there are any non-masked bits set.
@@ -3389,7 +3396,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
}
mac_proto = MAC_PROTO_NONE;
- if (!validate_push_nsh(nla_data(a), log))
+ if (!validate_push_nsh(a, log))
return -EINVAL;
break;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 91a11067e458..6574f9bcdc02 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -160,10 +160,19 @@ void ovs_netdev_detach_dev(struct vport *vport)
static void netdev_destroy(struct vport *vport)
{
- rtnl_lock();
- if (netif_is_ovs_port(vport->dev))
- ovs_netdev_detach_dev(vport);
- rtnl_unlock();
+ /* When called from ovs_db_notify_wq() after a dp_device_event(), the
+ * port has already been detached, so we can avoid taking the RTNL by
+ * checking this first.
+ */
+ if (netif_is_ovs_port(vport->dev)) {
+ rtnl_lock();
+ /* Check again while holding the lock to ensure we don't race
+ * with the netdev notifier and detach twice.
+ */
+ if (netif_is_ovs_port(vport->dev))
+ ovs_netdev_detach_dev(vport);
+ rtnl_unlock();
+ }
call_rcu(&vport->rcu, vport_netdev_free);
}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index fd67494f2815..c0f5a515a8ce 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -205,7 +205,7 @@ start:
spin_unlock_bh(&rose_list_lock);
for (i = 0; i < cnt; i++) {
- sk = array[cnt];
+ sk = array[i];
rose = rose_sk(sk);
lock_sock(sk);
spin_lock_bh(&rose_list_lock);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ff6be5cfe2b0..e1ab0faeb811 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -940,6 +940,8 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
int ret;
idr_for_each_entry_ul(idr, p, tmp, id) {
+ if (IS_ERR(p))
+ continue;
if (tc_act_in_hw(p) && !mutex_taken) {
rtnl_lock();
mutex_taken = true;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index f27b583def78..05e0b14b5773 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -266,11 +266,22 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
goto err_cant_do;
}
+ want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+
+ at_ingress = skb_at_tc_ingress(skb);
+ if (dev == skb->dev && want_ingress == at_ingress) {
+ pr_notice_once("tc mirred: Loop (%s:%s --> %s:%s)\n",
+ netdev_name(skb->dev),
+ at_ingress ? "ingress" : "egress",
+ netdev_name(dev),
+ want_ingress ? "ingress" : "egress");
+ goto err_cant_do;
+ }
+
/* we could easily avoid the clone only if called by ingress and clsact;
* since we can't easily detect the clsact caller, skip clone only for
* ingress - that covers the TC S/W datapath.
*/
- at_ingress = skb_at_tc_ingress(skb);
dont_clone = skb_at_tc_ingress(skb) && is_redirect &&
tcf_mirred_can_reinsert(retval);
if (!dont_clone) {
@@ -279,8 +290,6 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
goto err_cant_do;
}
- want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
-
/* All mirred/redirected skbs should clear previous ct info */
nf_reset_ct(skb_to_send);
if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 82635dd2cfa5..306e046276d4 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -652,7 +652,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
for (i = nbands; i < oldbands; i++) {
- if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
+ if (cl_is_active(&q->classes[i]))
list_del_init(&q->classes[i].alist);
qdisc_purge_queue(q->classes[i].qdisc);
}
@@ -664,6 +664,10 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
q->classes[i].deficit = quanta[i];
}
}
+ for (i = q->nstrict; i < nstrict; i++) {
+ if (cl_is_active(&q->classes[i]))
+ list_del_init(&q->classes[i].alist);
+ }
WRITE_ONCE(q->nstrict, nstrict);
memcpy(q->prio2band, priomap, sizeof(priomap));
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d920f57dc6d7..9d59090bbe93 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -529,8 +529,10 @@ set_change_agg:
return 0;
destroy_class:
- qdisc_put(cl->qdisc);
- kfree(cl);
+ if (!existing) {
+ qdisc_put(cl->qdisc);
+ kfree(cl);
+ }
return err;
}
@@ -1481,7 +1483,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
- if (cl->qdisc->q.qlen > 0)
+ if (cl_is_active(cl))
qfq_deactivate_class(q, cl);
qdisc_reset(cl->qdisc);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 069b7e45d8bd..531cb0690007 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -492,6 +492,8 @@ static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk)
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct ipv6_txoptions *opt;
+ inet_sk(newsk)->inet_opt = NULL;
+
newnp = inet6_sk(newsk);
rcu_read_lock();
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d808096f5ab1..2493a5b1fa3c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4863,8 +4863,6 @@ static struct sock *sctp_clone_sock(struct sock *sk,
newsp->pf->to_sk_daddr(&asoc->peer.primary_addr, newsk);
newinet->inet_dport = htons(asoc->peer.port);
-
- newsp->pf->copy_ip_options(sk, newsk);
atomic_set(&newinet->inet_id, get_random_u16());
inet_set_bit(MC_LOOP, newsk);
@@ -4874,17 +4872,20 @@ static struct sock *sctp_clone_sock(struct sock *sk,
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *newnp = inet6_sk(newsk);
+ struct ipv6_pinfo *newnp;
newinet->pinet6 = &((struct sctp6_sock *)newsk)->inet6;
newinet->ipv6_fl_list = NULL;
+ newnp = inet6_sk(newsk);
memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo));
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
}
#endif
+ newsp->pf->copy_ip_options(sk, newsk);
+
newsp->do_auto_asconf = 0;
skb_queue_head_init(&newsp->pd_lobby);
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 325addf83cc6..277ef504bc26 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -22,10 +22,10 @@ config SMC_DIAG
config SMC_HS_CTRL_BPF
bool "Generic eBPF hook for SMC handshake flow"
- depends on SMC && BPF_SYSCALL
+ depends on SMC && BPF_JIT && BPF_SYSCALL
default y
help
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
handshake flow, which offer much greater flexibility in modifying the behavior
of the SMC protocol stack compared to a complete kernel-based approach. Select
- this option if you want filtring the handshake process via eBPF programs. \ No newline at end of file
+ this option if you want filtring the handshake process via eBPF programs.
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a8ec30759a18..e2f0df8cdaa6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1083,7 +1083,8 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
}
length = min_t(unsigned int, inlen, (char *)xdr->end - (char *)xdr->p);
- memcpy(page_address(in_token->pages[0]), xdr->p, length);
+ if (length)
+ memcpy(page_address(in_token->pages[0]), xdr->p, length);
inlen -= length;
to_offs = length;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 661b3fe2779f..310de7a80be5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -841,6 +841,9 @@ static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
+ if (head->rc_curpage >= rqstp->rq_maxpages)
+ return -EINVAL;
+
page_len = min_t(unsigned int, remaining,
PAGE_SIZE - head->rc_pageoff);
@@ -848,7 +851,7 @@ static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
head->rc_page_count++;
dst = page_address(rqstp->rq_pages[head->rc_curpage]);
- memcpy(dst + head->rc_curpage, src + offset, page_len);
+ memcpy((unsigned char *)dst + head->rc_pageoff, src + offset, page_len);
head->rc_readbytes += page_len;
head->rc_pageoff += page_len;
@@ -860,7 +863,7 @@ static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
offset += page_len;
}
- return -EINVAL;
+ return 0;
}
/**
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 55cdebfa0da0..d0511225799b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3086,12 +3086,15 @@ unlock:
mutex_unlock(&u->iolock);
if (msg) {
+ bool do_cmsg = READ_ONCE(u->recvmsg_inq);
+
scm_recv_unix(sock, msg, &scm, flags);
- if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) {
+ if ((do_cmsg | msg->msg_get_inq) && (copied ?: err) >= 0) {
msg->msg_inq = READ_ONCE(u->inq_len);
- put_cmsg(msg, SOL_SOCKET, SCM_INQ,
- sizeof(msg->msg_inq), &msg->msg_inq);
+ if (do_cmsg)
+ put_cmsg(msg, SOL_SOCKET, SCM_INQ,
+ sizeof(msg->msg_inq), &msg->msg_inq);
}
} else {
scm_destroy(&scm);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 78323d43e63e..25f65817faab 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -199,7 +199,7 @@ static void unix_free_vertices(struct scm_fp_list *fpl)
}
}
-static DEFINE_SPINLOCK(unix_gc_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(unix_gc_lock);
void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
{
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index adcba1b7bf74..a3505a4dcee0 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1787,6 +1787,10 @@ static int vsock_accept(struct socket *sock, struct socket *newsock,
} else {
newsock->state = SS_CONNECTED;
sock_graft(connected, newsock);
+
+ set_bit(SOCK_CUSTOM_SOCKOPT,
+ &connected->sk_socket->flags);
+
if (vsock_msgzerocopy_allow(vconnected->transport))
set_bit(SOCK_SUPPORT_ZC,
&connected->sk_socket->flags);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3a028ff287fb..4e629ca305bc 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -910,7 +910,7 @@ void __cfg80211_connect_result(struct net_device *dev,
ssid_len = min(ssid->datalen, IEEE80211_MAX_SSID_LEN);
memcpy(wdev->u.client.ssid, ssid->data, ssid_len);
- wdev->u.client.ssid_len = ssid->datalen;
+ wdev->u.client.ssid_len = ssid_len;
break;
}
rcu_read_unlock();
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c32a7c6903d5..7b8e94214b07 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1101,6 +1101,10 @@ static int compat_standard_call(struct net_device *dev,
return ioctl_standard_call(dev, iwr, cmd, info, handler);
iwp_compat = (struct compat_iw_point *) &iwr->u.data;
+
+ /* struct iw_point has a 32bit hole on 64bit arches. */
+ memset(&iwp, 0, sizeof(iwp));
+
iwp.pointer = compat_ptr(iwp_compat->pointer);
iwp.length = iwp_compat->length;
iwp.flags = iwp_compat->flags;
diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c
index 674d426a9d24..37d1147019c2 100644
--- a/net/wireless/wext-priv.c
+++ b/net/wireless/wext-priv.c
@@ -228,6 +228,10 @@ int compat_private_call(struct net_device *dev, struct iwreq *iwr,
struct iw_point iwp;
iwp_compat = (struct compat_iw_point *) &iwr->u.data;
+
+ /* struct iw_point has a 32bit hole on 64bit arches. */
+ memset(&iwp, 0, sizeof(iwp));
+
iwp.pointer = compat_ptr(iwp_compat->pointer);
iwp.length = iwp_compat->length;
iwp.flags = iwp_compat->flags;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9e14e453b55c..98b362d51836 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -3151,6 +3151,7 @@ int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
int err;
if (family == AF_INET &&
+ (!x->dir || x->dir == XFRM_SA_DIR_OUT) &&
READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
x->props.flags |= XFRM_STATE_NOPMTUDISC;