summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-04 14:35:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-04 14:35:55 -0700
commitddd664bbff63e09e7a7f9acae9c43605d4cf185f (patch)
treee679c38664049eb79787147fdfadaf20f9cf171c
parent44ed32d16c9d0e0f3a4b594982a2bb168d2f56ea (diff)
parentb6197b386677ae5268d4702e23849d9ad53051ad (diff)
Merge tag 'net-7.1-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netHEADmaster
Pull networking fixes from Jakub Kicinski: "Including fixes from Netfilter, wireless and Bluetooth. Current release - fix to a fix: - Bluetooth: MGMT: fix backward compatibility with bluetoothd which adds stray bytes to MGMT_OP_ADD_EXT_ADV_DATA Previous releases - regressions: - af_unix: fix inq_len update inaccuracy on partial read - eth: fec: fix pinctrl default state restore order on resume - wifi: iwlwifi: - mvm: don't support the reset handshake for old firmwares - pcie: simplify the resume flow if fast resume is not used, work around NIC access failures Previous releases - always broken: - Bluetooth: L2CAP: reject BR/EDR signaling packets over MTUsig - sctp: fix a couple of bugs in COOKIE_ECHO processing - sched: fix pedit partial COW leading to page cache corruption - wifi: nl80211: reject oversized EMA RNR lists - netfilter: - conntrack_irc: fix possible out-of-bounds read - bridge: make ebt_snat ARP rewrite writable - appletalk: zero-initialize aarp_entry to prevent heap info leak - ipv4: restrict IPOPT_SSRR and IPOPT_LSRR options - mptcp: fix number of bugs reported by AI scans and discovered during NVMe over MPTCP testing" * tag 'net-7.1-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (85 commits) Reapply "bnxt_en: bring back rtnl_lock() in the bnxt_open() path" udp: clear skb->dev before running a sockmap verdict sctp: purge outqueue on stale COOKIE-ECHO handling bonding: annotate data-races arcound churn variables net/802/mrp: fix vector attribute parsing in mrp_pdu_parse_vecattr rtase: Avoid sleeping in get_stats64() ieee802154: 6lowpan: only accept IPv6 packets in lowpan_xmit() ipv6: mcast: Fix use-after-free when processing MLD queries selftests: net: add vxlan vnifilter notification test vxlan: vnifilter: fix spurious notification on VNI update vxlan: vnifilter: send notification on VNI add rtase: Reset TX subqueue when clearing TX ring octeontx2-af: npc: Fix CPT channel mask in npc_install_flow dt-bindings: ethernet: eswin: fix hsp-sp-csr backward compatibility sctp: validate cached peer INIT chunk length in COOKIE_ECHO processing net/sched: fix pedit partial COW leading to page cache corruption vsock/vmci: fix sk_ack_backlog leak on failed handshake net: bonding: fix NULL pointer dereference in bond_do_ioctl() geneve: fix length used in GRO hint UDP checksum adjustment net: ethernet: mtk_eth_soc: Fix use-after-free in metadata dst teardown ...
-rw-r--r--Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml3
-rw-r--r--drivers/net/bonding/bond_3ad.c18
-rw-r--r--drivers/net/bonding/bond_main.c4
-rw-r--r--drivers/net/bonding/bond_netlink.c4
-rw-r--r--drivers/net/bonding/bond_procfs.c8
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c2
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c36
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c36
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c6
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c32
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.h1
-rw-r--r--drivers/net/ethernet/realtek/rtase/rtase_main.c7
-rw-r--r--drivers/net/geneve.c2
-rw-r--r--drivers/net/phy/sfp.c1
-rw-r--r--drivers/net/vxlan/vxlan_vnifilter.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/ap.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/mac80211.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/power.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c26
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c56
-rw-r--r--drivers/ptp/ptp_vclock.c14
-rw-r--r--include/net/act_api.h1
-rw-r--r--include/net/bluetooth/l2cap.h1
-rw-r--r--include/net/ip_vs.h3
-rw-r--r--include/net/mptcp.h7
-rw-r--r--include/net/tc_act/tc_pedit.h1
-rw-r--r--net/6lowpan/iphc.c4
-rw-r--r--net/802/garp.c2
-rw-r--r--net/802/mrp.c9
-rw-r--r--net/appletalk/aarp.c2
-rw-r--r--net/bluetooth/bnep/core.c57
-rw-r--r--net/bluetooth/hci_sync.c5
-rw-r--r--net/bluetooth/hci_sysfs.c6
-rw-r--r--net/bluetooth/iso.c63
-rw-r--r--net/bluetooth/l2cap_core.c46
-rw-r--r--net/bluetooth/mgmt.c17
-rw-r--r--net/bluetooth/rfcomm/core.c67
-rw-r--r--net/bluetooth/rfcomm/sock.c26
-rw-r--r--net/bluetooth/sco.c20
-rw-r--r--net/bridge/netfilter/ebt_snat.c3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/devlink/core.c2
-rw-r--r--net/hsr/hsr_framereg.c4
-rw-r--r--net/ieee802154/6lowpan/tx.c5
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv6/anycast.c16
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c3
-rw-r--r--net/l2tp/l2tp_ppp.c82
-rw-r--r--net/mac80211/tx.c4
-rw-r--r--net/mptcp/options.c79
-rw-r--r--net/mptcp/pm.c15
-rw-r--r--net/mptcp/pm_userspace.c14
-rw-r--r--net/mptcp/protocol.c10
-rw-r--r--net/mptcp/protocol.h7
-rw-r--r--net/mptcp/sockopt.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c14
-rw-r--r--net/netfilter/nf_conntrack_irc.c4
-rw-r--r--net/netfilter/nf_synproxy_core.c24
-rw-r--r--net/netfilter/nft_byteorder.c51
-rw-r--r--net/netfilter/nft_ct.c8
-rw-r--r--net/netfilter/nft_ct_fast.c2
-rw-r--r--net/netfilter/nft_tunnel.c2
-rw-r--r--net/netfilter/xt_NFQUEUE.c2
-rw-r--r--net/rds/ib_cm.c1
-rw-r--r--net/sched/act_api.c7
-rw-r--r--net/sched/act_pedit.c77
-rw-r--r--net/sctp/diag.c17
-rw-r--r--net/sctp/sm_make_chunk.c5
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/unix/af_unix.c11
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--net/wireless/nl80211.c9
-rw-r--r--net/wireless/scan.c9
-rw-r--r--net/xdp/xsk.c11
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c54
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh4
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifilter_notify.sh184
89 files changed, 1019 insertions, 425 deletions
diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
index b66ae6300faf..65882ff79d8d 100644
--- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
+++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
@@ -84,7 +84,8 @@ properties:
This reference is provided for background information only.
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
- - items:
+ - minItems: 4
+ items:
- description: Phandle to HSP(High-Speed Peripheral) device
- description: Offset of phy control register for internal
or external clock selection
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index f0aa7d2f2171..985ef66dc333 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1386,8 +1386,8 @@ static void ad_churn_machine(struct port *port)
{
if (port->sm_vars & AD_PORT_CHURNED) {
port->sm_vars &= ~AD_PORT_CHURNED;
- port->sm_churn_actor_state = AD_CHURN_MONITOR;
- port->sm_churn_partner_state = AD_CHURN_MONITOR;
+ WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN_MONITOR);
+ WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN_MONITOR);
port->sm_churn_actor_timer_counter =
__ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0);
port->sm_churn_partner_timer_counter =
@@ -1398,20 +1398,22 @@ static void ad_churn_machine(struct port *port)
!(--port->sm_churn_actor_timer_counter) &&
port->sm_churn_actor_state == AD_CHURN_MONITOR) {
if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) {
- port->sm_churn_actor_state = AD_NO_CHURN;
+ WRITE_ONCE(port->sm_churn_actor_state, AD_NO_CHURN);
} else {
- port->churn_actor_count++;
- port->sm_churn_actor_state = AD_CHURN;
+ WRITE_ONCE(port->churn_actor_count,
+ port->churn_actor_count + 1);
+ WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN);
}
}
if (port->sm_churn_partner_timer_counter &&
!(--port->sm_churn_partner_timer_counter) &&
port->sm_churn_partner_state == AD_CHURN_MONITOR) {
if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) {
- port->sm_churn_partner_state = AD_NO_CHURN;
+ WRITE_ONCE(port->sm_churn_partner_state, AD_NO_CHURN);
} else {
- port->churn_partner_count++;
- port->sm_churn_partner_state = AD_CHURN;
+ WRITE_ONCE(port->churn_partner_count,
+ port->churn_partner_count + 1);
+ WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN);
}
}
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 82e779f7916b..8e75453ce0ef 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4621,11 +4621,11 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
slave_dev = __dev_get_by_name(net, ifr->ifr_slave);
- slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev);
-
if (!slave_dev)
return -ENODEV;
+ slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev);
+
switch (cmd) {
case SIOCBONDENSLAVE:
res = bond_enslave(bond_dev, slave_dev, NULL);
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index c7d3e0602c83..90365d3f7ebf 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -82,10 +82,10 @@ static int bond_fill_slave_info(struct sk_buff *skb,
goto nla_put_failure_rcu;
if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE,
- ad_port->sm_churn_actor_state))
+ READ_ONCE(ad_port->sm_churn_actor_state)))
goto nla_put_failure_rcu;
if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE,
- ad_port->sm_churn_partner_state))
+ READ_ONCE(ad_port->sm_churn_partner_state)))
goto nla_put_failure_rcu;
}
rcu_read_unlock();
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 3714aab1a3d9..3607b62f9b63 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -221,13 +221,13 @@ static void bond_info_show_slave(struct seq_file *seq,
seq_printf(seq, "Aggregator ID: %d\n",
agg->aggregator_identifier);
seq_printf(seq, "Actor Churn State: %s\n",
- bond_3ad_churn_desc(port->sm_churn_actor_state));
+ bond_3ad_churn_desc(READ_ONCE(port->sm_churn_actor_state)));
seq_printf(seq, "Partner Churn State: %s\n",
- bond_3ad_churn_desc(port->sm_churn_partner_state));
+ bond_3ad_churn_desc(READ_ONCE(port->sm_churn_partner_state)));
seq_printf(seq, "Actor Churned Count: %d\n",
- port->churn_actor_count);
+ READ_ONCE(port->churn_actor_count));
seq_printf(seq, "Partner Churned Count: %d\n",
- port->churn_partner_count);
+ READ_ONCE(port->churn_partner_count));
if (capable(CAP_NET_ADMIN)) {
seq_puts(seq, "details actor lacp pdu:\n");
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index cecd66251dba..eab6a98d62b9 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -2936,7 +2936,7 @@ static void airoha_metadata_dst_free(struct airoha_gdm_port *port)
if (!port->dsa_meta[i])
continue;
- metadata_dst_free(port->dsa_meta[i]);
+ dst_release(&port->dsa_meta[i]->dst);
}
}
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index 911808ab13a7..4f3076d4ea34 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1407,8 +1407,10 @@ static int pcnet32_poll(struct napi_struct *napi, int budget)
pcnet32_restart(dev, CSR0_START);
netif_wake_queue(dev);
}
+ spin_unlock_irqrestore(&lp->lock, flags);
if (work_done < budget && napi_complete_done(napi, work_done)) {
+ spin_lock_irqsave(&lp->lock, flags);
/* clear interrupt masks */
val = lp->a->read_csr(ioaddr, CSR3);
val &= 0x00ff;
@@ -1416,9 +1418,9 @@ static int pcnet32_poll(struct napi_struct *napi, int budget)
/* Set interrupt enable. */
lp->a->write_csr(ioaddr, CSR0, CSR0_INTEN);
+ spin_unlock_irqrestore(&lp->lock, flags);
}
- spin_unlock_irqrestore(&lp->lock, flags);
return work_done;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 008c34cff7b4..35e1f8f663c7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -14388,13 +14388,28 @@ static void bnxt_unlock_sp(struct bnxt *bp)
netdev_unlock(bp->dev);
}
+/* Same as bnxt_lock_sp() with additional rtnl_lock */
+static void bnxt_rtnl_lock_sp(struct bnxt *bp)
+{
+ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ rtnl_lock();
+ netdev_lock(bp->dev);
+}
+
+static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
+{
+ set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ netdev_unlock(bp->dev);
+ rtnl_unlock();
+}
+
/* Only called from bnxt_sp_task() */
static void bnxt_reset(struct bnxt *bp, bool silent)
{
- bnxt_lock_sp(bp);
+ bnxt_rtnl_lock_sp(bp);
if (test_bit(BNXT_STATE_OPEN, &bp->state))
bnxt_reset_task(bp, silent);
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
}
/* Only called from bnxt_sp_task() */
@@ -14402,9 +14417,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
{
int i;
- bnxt_lock_sp(bp);
+ bnxt_rtnl_lock_sp(bp);
if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
return;
}
/* Disable and flush TPA before resetting the RX ring */
@@ -14443,7 +14458,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
}
if (bp->flags & BNXT_FLAG_TPA)
bnxt_set_tpa(bp, true);
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
}
static void bnxt_fw_fatal_close(struct bnxt *bp)
@@ -15358,15 +15373,17 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
fallthrough;
case BNXT_FW_RESET_STATE_OPENING:
- while (!netdev_trylock(bp->dev)) {
+ while (!rtnl_trylock()) {
bnxt_queue_fw_reset_work(bp, HZ / 10);
return;
}
+ netdev_lock(bp->dev);
rc = bnxt_open(bp->dev);
if (rc) {
netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
bnxt_fw_reset_abort(bp, rc);
netdev_unlock(bp->dev);
+ rtnl_unlock();
goto ulp_start;
}
@@ -15386,6 +15403,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_dl_health_fw_status_update(bp, true);
}
netdev_unlock(bp->dev);
+ rtnl_unlock();
bnxt_ulp_start(bp);
bnxt_reenable_sriov(bp);
netdev_lock(bp->dev);
@@ -16379,7 +16397,7 @@ err_reset:
rc);
napi_enable_locked(&bnapi->napi);
bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
- bnxt_reset_task(bp, true);
+ netif_close(dev);
return rc;
}
@@ -17230,6 +17248,7 @@ static int bnxt_resume(struct device *device)
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
+ rtnl_lock();
netdev_lock(dev);
rc = pci_enable_device(bp->pdev);
if (rc) {
@@ -17274,6 +17293,7 @@ static int bnxt_resume(struct device *device)
resume_exit:
netdev_unlock(bp->dev);
+ rtnl_unlock();
if (!rc) {
bnxt_ulp_start(bp);
bnxt_reenable_sriov(bp);
@@ -17445,6 +17465,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
int err;
netdev_info(bp->dev, "PCI Slot Resume\n");
+ rtnl_lock();
netdev_lock(netdev);
err = bnxt_hwrm_func_qcaps(bp);
@@ -17462,6 +17483,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
netif_device_attach(netdev);
netdev_unlock(netdev);
+ rtnl_unlock();
if (!err) {
bnxt_ulp_start(bp);
bnxt_reenable_sriov(bp);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f89aa94ce020..6ebde65d7f1b 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -5594,6 +5594,7 @@ static int fec_resume(struct device *dev)
if (fep->rpm_active)
pm_runtime_force_resume(dev);
+ pinctrl_pm_select_default_state(&fep->pdev->dev);
ret = fec_enet_clk_enable(ndev, true);
if (ret) {
rtnl_unlock();
@@ -5610,8 +5611,6 @@ static int fec_resume(struct device *dev)
val &= ~(FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
writel(val, fep->hwp + FEC_ECNTRL);
fep->wol_flag &= ~FEC_WOL_FLAG_SLEEP_ON;
- } else {
- pinctrl_pm_select_default_state(&fep->pdev->dev);
}
fec_restart(ndev);
netif_tx_lock_bh(ndev);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index de3fbd3d15d6..65397daae4c2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -1145,6 +1145,7 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf,
int slot);
int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc);
int rvu_cpt_init(struct rvu *rvu);
+u32 rvu_get_cpt_chan_mask(struct rvu *rvu);
#define NDC_AF_BANK_MASK GENMASK_ULL(7, 0)
#define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 607d0cf1a778..d301a3f0f87a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -701,6 +701,19 @@ void npc_set_mcam_action(struct rvu *rvu, struct npc_mcam *mcam,
return rvu_write64(rvu, blkaddr, reg, cfg);
}
+u32 rvu_get_cpt_chan_mask(struct rvu *rvu)
+{
+ /* For cn10k the upper two bits of the channel number are
+ * cpt channel number. with masking out these bits in the
+ * mcam entry, same entry used for NIX will allow packets
+ * received from cpt for parsing.
+ */
+ if (!is_rvu_otx2(rvu))
+ return NIX_CHAN_CPT_X2P_MASK;
+ else
+ return 0xFFFu;
+}
+
void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
int nixlf, u64 chan, u8 *mac_addr)
{
@@ -750,7 +763,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
eth_broadcast_addr((u8 *)&req.mask.dmac);
req.features = BIT_ULL(NPC_DMAC);
req.channel = chan;
- req.chan_mask = 0xFFFU;
+ req.chan_mask = rvu_get_cpt_chan_mask(rvu);
req.intf = pfvf->nix_rx_intf;
req.op = action.op;
req.hdr.pcifunc = 0; /* AF is requester */
@@ -845,11 +858,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
* mcam entry, same entry used for NIX will allow packets
* received from cpt for parsing.
*/
- if (!is_rvu_otx2(rvu)) {
- req.chan_mask = NIX_CHAN_CPT_X2P_MASK;
- } else {
- req.chan_mask = 0xFFFU;
- }
+ req.chan_mask = rvu_get_cpt_chan_mask(rvu);
if (chan_cnt > 1) {
if (!is_power_of_2(chan_cnt)) {
@@ -1053,16 +1062,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
ether_addr_copy(req.mask.dmac, mac_addr);
req.features = BIT_ULL(NPC_DMAC);
- /* For cn10k the upper two bits of the channel number are
- * cpt channel number. with masking out these bits in the
- * mcam entry, same entry used for NIX will allow packets
- * received from cpt for parsing.
- */
- if (!is_rvu_otx2(rvu))
- req.chan_mask = NIX_CHAN_CPT_X2P_MASK;
- else
- req.chan_mask = 0xFFFU;
-
+ req.chan_mask = rvu_get_cpt_chan_mask(rvu);
req.channel = chan;
req.intf = pfvf->nix_rx_intf;
req.entry = index;
@@ -2192,8 +2192,8 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
goto free_entry_cntr_map;
/* Alloc memory for saving target device of mcam rule */
- mcam->entry2target_pffunc = kmalloc_array(mcam->total_entries,
- sizeof(u16), GFP_KERNEL);
+ mcam->entry2target_pffunc = kcalloc(mcam->total_entries,
+ sizeof(u16), GFP_KERNEL);
if (!mcam->entry2target_pffunc)
goto free_cntr_refcnt;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 6ae9cdcb608b..34f1e066707b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1820,7 +1820,7 @@ process_flow:
/* ignore chan_mask in case pf func is not AF, revisit later */
if (!is_pffunc_af(req->hdr.pcifunc))
- req->chan_mask = 0xFFF;
+ req->chan_mask = rvu_get_cpt_chan_mask(rvu);
err = npc_check_unsupported_flows(rvu, req->features, req->intf);
if (err) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index ee623476e5ff..f9fbf0c17648 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -3473,7 +3473,7 @@ static void otx2_ndc_sync(struct otx2_nic *pf)
req->nix_lf_rx_sync = 1;
req->npa_lf_sync = 1;
- if (!otx2_sync_mbox_msg(mbox))
+ if (otx2_sync_mbox_msg(mbox))
dev_err(pf->dev, "NDC sync operation failed\n");
mutex_unlock(&mbox->lock);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8d225bc9f063..7d771168b990 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -4491,7 +4491,7 @@ static int mtk_free_dev(struct mtk_eth *eth)
for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
if (!eth->dsa_meta[i])
break;
- metadata_dst_free(eth->dsa_meta[i]);
+ dst_release(&eth->dsa_meta[i]->dst);
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c89417c1a1f9..e2895972cc82 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1002,12 +1002,13 @@ static void cmd_work_handler(struct work_struct *work)
ent->callback(-EBUSY, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
+ complete(&ent->slotted);
cmd_ent_put(ent);
} else {
ent->ret = -EBUSY;
complete(&ent->done);
+ complete(&ent->slotted);
}
- complete(&ent->slotted);
return;
}
alloc_ret = cmd_alloc_index(cmd, ent);
@@ -1017,13 +1018,14 @@ static void cmd_work_handler(struct work_struct *work)
ent->callback(-EAGAIN, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
+ complete(&ent->slotted);
cmd_ent_put(ent);
} else {
ent->ret = -EAGAIN;
complete(&ent->done);
+ complete(&ent->slotted);
}
up(&cmd->vars.sem);
- complete(&ent->slotted);
return;
}
} else {
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index f3332417162e..ffac22883e49 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1219,6 +1219,36 @@ static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
"MAC address set to %pM\n", addr);
}
+static void lan743x_mac_rx_enable_fse(struct lan743x_adapter *adapter)
+{
+ u32 mac_rx;
+ bool rxen;
+
+ mac_rx = lan743x_csr_read(adapter, MAC_RX);
+ if (mac_rx & MAC_RX_FSE_)
+ return;
+
+ rxen = mac_rx & MAC_RX_RXEN_;
+ if (rxen) {
+ mac_rx &= ~MAC_RX_RXEN_;
+ lan743x_csr_write(adapter, MAC_RX, mac_rx);
+ lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+ 1, 1000, 20000, 100);
+ }
+
+ /* Per AN2948, hardware prevents modification of the FSE bit while the
+ * MAC receiver is enabled (RXEN bit set). Use separate register write
+ * to assert the FSE bit before enabling the RXEN bit in MAC_RX
+ */
+ mac_rx |= MAC_RX_FSE_;
+ lan743x_csr_write(adapter, MAC_RX, mac_rx);
+
+ if (rxen) {
+ mac_rx |= MAC_RX_RXEN_;
+ lan743x_csr_write(adapter, MAC_RX, mac_rx);
+ }
+}
+
static int lan743x_mac_init(struct lan743x_adapter *adapter)
{
bool mac_address_valid = true;
@@ -1258,6 +1288,8 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter)
lan743x_mac_set_address(adapter, adapter->mac_address);
eth_hw_addr_set(netdev, adapter->mac_address);
+ lan743x_mac_rx_enable_fse(adapter);
+
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 160d94a7cee6..1573c8f9c993 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -182,6 +182,7 @@
#define MAC_RX (0x104)
#define MAC_RX_MAX_SIZE_SHIFT_ (16)
#define MAC_RX_MAX_SIZE_MASK_ (0x3FFF0000)
+#define MAC_RX_FSE_ BIT(2)
#define MAC_RX_RXD_ BIT(1)
#define MAC_RX_RXEN_ BIT(0)
diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
index ef13109c49cf..55105d34bc79 100644
--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
+++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
@@ -239,6 +239,8 @@ static void rtase_tx_clear(struct rtase_private *tp)
rtase_tx_clear_range(ring, ring->dirty_idx, RTASE_NUM_DESC);
ring->cur_idx = 0;
ring->dirty_idx = 0;
+
+ netdev_tx_reset_subqueue(tp->dev, i);
}
}
@@ -1563,8 +1565,9 @@ static void rtase_dump_tally_counter(const struct rtase_private *tp)
rtase_w32(tp, RTASE_DTCCR0, cmd);
rtase_w32(tp, RTASE_DTCCR0, cmd | RTASE_COUNTER_DUMP);
- err = read_poll_timeout(rtase_r32, val, !(val & RTASE_COUNTER_DUMP),
- 10, 250, false, tp, RTASE_DTCCR0);
+ err = read_poll_timeout_atomic(rtase_r32, val,
+ !(val & RTASE_COUNTER_DUMP),
+ 10, 250, false, tp, RTASE_DTCCR0);
if (err == -ETIMEDOUT)
netdev_err(tp->dev, "error occurred in dump tally counter\n");
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index c6563367d382..715180c3a1b3 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -632,7 +632,7 @@ static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb,
uh = udp_hdr(skb);
uh->len = htons(skb->len - gro_hint->nested_tp_offset);
if (uh->check) {
- len = skb->len - gro_hint->nested_nh_offset;
+ len = skb->len - gro_hint->nested_tp_offset;
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
if (gro_hint->nested_is_v6)
uh->check = ~udp_v6_check(len, &ipv6h->saddr,
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index bd970f753beb..b94b9c433a21 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -822,6 +822,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
return -EINVAL;
}
+ sfp->i2c_block_size = sfp->i2c_max_block_size;
return 0;
}
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index 2042369379ff..3e76f4e21094 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -661,7 +661,7 @@ static int vxlan_vni_update(struct vxlan_dev *vxlan,
if (ret)
return ret;
- if (changed)
+ if (*changed)
vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
return 0;
@@ -759,8 +759,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan,
err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
extack);
- if (changed)
- vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
return err;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ap.c b/drivers/net/wireless/intel/iwlwifi/mld/ap.c
index 5c59acc8c4c5..6598d9333333 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/ap.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/ap.c
@@ -9,7 +9,6 @@
#include "ap.h"
#include "hcmd.h"
#include "tx.h"
-#include "power.h"
#include "key.h"
#include "phy.h"
#include "iwl-utils.h"
@@ -273,9 +272,6 @@ int iwl_mld_start_ap_ibss(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx;
int ret;
- if (vif->type == NL80211_IFTYPE_AP)
- iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link);
-
ret = iwl_mld_update_beacon_template(mld, vif, link);
if (ret)
return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
index da6fd7471568..3c8daddc0bcb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
@@ -1150,6 +1150,13 @@ int iwl_mld_assign_vif_chanctx(struct ieee80211_hw *hw,
if (iwl_mld_can_activate_link(mld, vif, link)) {
iwl_mld_tlc_update_phy(mld, vif, link);
+ /* FW requires AP_TX_POWER_CONSTRAINTS_CMD before link
+ * activation for AP and after link activation for STA,
+ * for an unknown reason.
+ */
+ if (vif->type == NL80211_IFTYPE_AP)
+ iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link);
+
ret = iwl_mld_activate_link(mld, link);
if (ret)
goto err;
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/power.c b/drivers/net/wireless/intel/iwlwifi/mld/power.c
index 49b0d9f8f865..266fe16bb95d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/power.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/power.c
@@ -366,7 +366,7 @@ iwl_mld_send_ap_tx_power_constraint_cmd(struct iwl_mld *mld,
lockdep_assert_wiphy(mld->wiphy);
- if (!mld_link->active)
+ if (!mld_link->active && vif->type != NL80211_IFTYPE_AP)
return;
if (link->chanreq.oper.chan->band != NL80211_BAND_6GHZ)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index f05df3a3300e..6e507d6dcdd2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2025 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2026 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -459,9 +459,14 @@ static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm,
static void iwl_mvm_uats_init(struct iwl_mvm *mvm)
{
+ struct iwl_mcc_allowed_ap_type_cmd_v1 *cmd __free(kfree) = NULL;
int cmd_id = WIDE_ID(REGULATORY_AND_NVM_GROUP,
MCC_ALLOWED_AP_TYPE_CMD);
- struct iwl_mcc_allowed_ap_type_cmd_v1 cmd = {};
+ struct iwl_host_cmd hcmd = {
+ .id = cmd_id,
+ .len[0] = sizeof(*cmd),
+ .dataflags[0] = IWL_HCMD_DFL_NOCOPY,
+ };
u8 cmd_ver;
int ret;
@@ -485,14 +490,25 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm)
if (!mvm->fwrt.ap_type_cmd_valid)
return;
+ /* Since we free the command immediately after iwl_mvm_send_cmd, we
+ * must send this command in SYNC mode.
+ */
+ lockdep_assert_held(&mvm->mutex);
+
+ cmd = kzalloc_obj(*cmd);
+ if (!cmd)
+ return;
+
BUILD_BUG_ON(sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map) !=
- sizeof(cmd.mcc_to_ap_type_map));
+ sizeof(cmd->mcc_to_ap_type_map));
- memcpy(cmd.mcc_to_ap_type_map,
+ memcpy(cmd->mcc_to_ap_type_map,
mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map,
sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map));
- ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cmd), &cmd);
+ hcmd.data[0] = cmd;
+
+ ret = iwl_mvm_send_cmd(mvm, &hcmd);
if (ret < 0)
IWL_ERR(mvm, "failed to send MCC_ALLOWED_AP_TYPE_CMD (%d)\n",
ret);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index ae177477b201..384bed95835d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -1416,6 +1416,12 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_rf_cfg *cfg,
fw_has_capa(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE);
+ /* Those firmware versions claim to support the fw_reset_handshake
+ * but they are buggy.
+ */
+ if (IWL_UCODE_MAJOR(mvm->fw->ucode_ver) <= 77)
+ trans->conf.fw_reset_handshake = false;
+
trans->conf.queue_alloc_cmd_ver =
iwl_fw_lookup_cmd_ver(mvm->fw,
WIDE_ID(DATA_PATH_GROUP,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index dc99e7ac4726..eb3c5a6dd088 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1225,33 +1225,41 @@ static int _iwl_pci_resume(struct device *device, bool restore)
if (!trans->op_mode)
return 0;
- /*
- * Scratch value was altered, this means the device was powered off, we
- * need to reset it completely.
- * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan,
- * but not bits [15:8]. So if we have bits set in lower word, assume
- * the device is alive.
- * Alternatively, if the scratch value is 0xFFFFFFFF, then we no longer
- * have access to the device and consider it powered off.
- * For older devices, just try silently to grab the NIC.
- */
- if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
- u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH);
-
- if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) ||
- scratch == ~0U)
- device_was_powered_off = true;
- } else {
+ if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
/*
- * bh are re-enabled by iwl_trans_pcie_release_nic_access,
- * so re-enable them if _iwl_trans_pcie_grab_nic_access fails.
+ * Scratch value was altered, this means the device was powered
+ * off, we need to reset it completely.
+ * Note: MAC (bits 0:7) will be cleared upon suspend even with
+ * wowlan, but not bits [15:8]. So if we have bits set in lower
+ * word, assume the device is alive.
+ * Alternatively, if the scratch value is 0xFFFFFFFF, then we
+ * no longer have access to the device and consider it powered
+ * off.
+ * For older devices, just try silently to grab the NIC.
*/
- local_bh_disable();
- if (_iwl_trans_pcie_grab_nic_access(trans, true)) {
- iwl_trans_pcie_release_nic_access(trans);
+ if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH);
+
+ if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) ||
+ scratch == ~0U) {
+ IWL_DEBUG_WOWLAN(trans,
+ "Scratch 0x%08x indicates device was powered off\n",
+ scratch);
+ device_was_powered_off = true;
+ }
} else {
- device_was_powered_off = true;
- local_bh_enable();
+ /*
+ * bh are re-enabled by iwl_trans_pcie_release_nic_access,
+ * so re-enable them if _iwl_trans_pcie_grab_nic_access
+ * fails.
+ */
+ local_bh_disable();
+ if (_iwl_trans_pcie_grab_nic_access(trans, true)) {
+ iwl_trans_pcie_release_nic_access(trans);
+ } else {
+ device_was_powered_off = true;
+ local_bh_enable();
+ }
}
}
diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c
index 915a4f6defc9..84cb527f59cc 100644
--- a/drivers/ptp/ptp_vclock.c
+++ b/drivers/ptp/ptp_vclock.c
@@ -19,6 +19,8 @@ static DEFINE_SPINLOCK(vclock_hash_lock);
static DEFINE_READ_MOSTLY_HASHTABLE(vclock_hash, 8);
+DEFINE_STATIC_SRCU(vclock_srcu);
+
static void ptp_vclock_hash_add(struct ptp_vclock *vclock)
{
spin_lock(&vclock_hash_lock);
@@ -37,7 +39,7 @@ static void ptp_vclock_hash_del(struct ptp_vclock *vclock)
spin_unlock(&vclock_hash_lock);
- synchronize_rcu();
+ synchronize_srcu(&vclock_srcu);
}
static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
@@ -276,14 +278,16 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index)
{
unsigned int hash = vclock_index % HASH_SIZE(vclock_hash);
struct ptp_vclock *vclock;
- u64 ns;
u64 vclock_ns = 0;
+ int srcu_idx;
+ u64 ns;
ns = ktime_to_ns(*hwtstamp);
- rcu_read_lock();
+ srcu_idx = srcu_read_lock(&vclock_srcu);
- hlist_for_each_entry_rcu(vclock, &vclock_hash[hash], vclock_hash_node) {
+ hlist_for_each_entry_srcu(vclock, &vclock_hash[hash], vclock_hash_node,
+ srcu_read_lock_held(&vclock_srcu)) {
if (vclock->clock->index != vclock_index)
continue;
@@ -294,7 +298,7 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index)
break;
}
- rcu_read_unlock();
+ srcu_read_unlock(&vclock_srcu, srcu_idx);
return ns_to_ktime(vclock_ns);
}
diff --git a/include/net/act_api.h b/include/net/act_api.h
index d11b79107930..fd2967ee08f7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -45,6 +45,7 @@ struct tc_action {
struct tc_cookie __rcu *user_cookie;
struct tcf_chain __rcu *goto_chain;
u32 tcfa_flags;
+ struct rcu_head tcfa_rcu;
u8 hw_stats;
u8 used_hw_stats;
bool used_hw_stats_valid;
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 5172afee5494..e0a1f2293679 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -33,6 +33,7 @@
/* L2CAP defaults */
#define L2CAP_DEFAULT_MTU 672
#define L2CAP_DEFAULT_MIN_MTU 48
+#define L2CAP_SIG_MTU 48 /* BR/EDR signaling MTU */
#define L2CAP_DEFAULT_FLUSH_TO 0xFFFF
#define L2CAP_EFS_DEFAULT_FLUSH_TO 0xFFFFFFFF
#define L2CAP_DEFAULT_TX_WINDOW 63
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a02e569813d2..e517eaaa177b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1824,8 +1824,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
int ip_vs_bind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *scheduler);
-void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
- struct ip_vs_scheduler *sched);
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc);
struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
struct ip_vs_conn *
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index f7263fe2a2e4..ee70f597a4de 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -27,7 +27,9 @@ struct mptcp_ext {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
- u8 use_map:1,
+
+ struct_group(flags,
+ u8 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
@@ -35,9 +37,10 @@ struct mptcp_ext {
mpc_map:1,
frozen:1,
reset_transient:1;
- u8 reset_reason:4,
+ u8 reset_reason:4,
csum_reqd:1,
infinite_map:1;
+ ); /* end of flags group */
};
#define MPTCPOPT_HMAC_LEN 20
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index f58ee15cd858..cb7b82f2cbc7 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -15,7 +15,6 @@ struct tcf_pedit_parms {
struct tc_pedit_key *tcfp_keys;
struct tcf_pedit_key_ex *tcfp_keys_ex;
int action;
- u32 tcfp_off_max_hint;
unsigned char tcfp_nkeys;
unsigned char tcfp_flags;
struct rcu_head rcu;
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index e116d308a8df..37eaff3f7b69 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -1086,12 +1086,12 @@ static u8 lowpan_iphc_mcast_ctx_addr_compress(u8 **hc_ptr,
const struct lowpan_iphc_ctx *ctx,
const struct in6_addr *ipaddr)
{
- u8 data[6];
+ u8 data[6] = {};
/* flags/scope, reserved (RIID) */
memcpy(data, &ipaddr->s6_addr[1], 2);
/* group ID */
- memcpy(&data[1], &ipaddr->s6_addr[11], 4);
+ memcpy(&data[2], &ipaddr->s6_addr[12], 4);
lowpan_push_hc_data(hc_ptr, data, 6);
return LOWPAN_IPHC_DAM_00;
diff --git a/net/802/garp.c b/net/802/garp.c
index 6f563b6797d9..c7a39f298ad6 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -453,7 +453,7 @@ static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb,
if (!pskb_may_pull(skb, ga->len))
return -1;
skb_pull(skb, ga->len);
- dlen = sizeof(*ga) - ga->len;
+ dlen = ga->len - sizeof(*ga);
if (attrtype > app->app->maxattr)
return 0;
diff --git a/net/802/mrp.c b/net/802/mrp.c
index ff0e80574e6b..160a3b14569c 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -703,6 +703,12 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) &
MRP_VECATTR_HDR_LEN_MASK);
+ /* If valen is 0, only a LeaveAllEvent is present; FirstValue and
+ * Vector fields are absent per IEEE 802.1ak.
+ */
+ if (valen == 0)
+ return 0;
+
/* The VectorAttribute structure in a PDU carries event information
* about one or more attributes having consecutive values. Only the
* value for the first attribute is contained in the structure. So
@@ -753,6 +759,9 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
vaevents %= __MRP_VECATTR_EVENT_MAX;
vaevent = vaevents;
mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+ valen--;
+ mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+ mrp_cb(skb)->mh->attrlen);
}
return 0;
}
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 30493ea3c010..078fb7a6efa5 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -393,7 +393,7 @@ static void aarp_purge(void)
*/
static struct aarp_entry *aarp_alloc(void)
{
- struct aarp_entry *a = kmalloc_obj(*a, GFP_ATOMIC);
+ struct aarp_entry *a = kzalloc_obj(*a, GFP_ATOMIC);
if (!a)
return NULL;
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 0de5df690bd0..5c5f53ff30e8 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -206,14 +206,11 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len)
return 0;
}
-static int bnep_rx_control(struct bnep_session *s, void *data, int len)
+static int bnep_rx_control_cmd(struct bnep_session *s, u8 cmd, void *data,
+ int len)
{
- u8 cmd = *(u8 *)data;
int err = 0;
- data++;
- len--;
-
switch (cmd) {
case BNEP_CMD_NOT_UNDERSTOOD:
case BNEP_SETUP_CONN_RSP:
@@ -254,6 +251,14 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len)
return err;
}
+static int bnep_rx_control(struct bnep_session *s, void *data, int len)
+{
+ if (len < 1)
+ return -EILSEQ;
+
+ return bnep_rx_control_cmd(s, *(u8 *)data, data + 1, len - 1);
+}
+
static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb)
{
struct bnep_ext_hdr *h;
@@ -299,19 +304,26 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
{
struct net_device *dev = s->dev;
struct sk_buff *nskb;
+ u8 *data;
u8 type, ctrl_type;
dev->stats.rx_bytes += skb->len;
- type = *(u8 *) skb->data;
- skb_pull(skb, 1);
- ctrl_type = *(u8 *)skb->data;
+ data = skb_pull_data(skb, sizeof(type));
+ if (!data)
+ goto badframe;
+ type = *data;
if ((type & BNEP_TYPE_MASK) >= sizeof(__bnep_rx_hlen))
goto badframe;
if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) {
- if (bnep_rx_control(s, skb->data, skb->len) < 0) {
+ data = skb_pull_data(skb, sizeof(ctrl_type));
+ if (!data)
+ goto badframe;
+ ctrl_type = *data;
+
+ if (bnep_rx_control_cmd(s, ctrl_type, skb->data, skb->len) < 0) {
dev->stats.tx_errors++;
kfree_skb(skb);
return 0;
@@ -324,24 +336,27 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
/* Verify and pull ctrl message since it's already processed */
switch (ctrl_type) {
- case BNEP_SETUP_CONN_REQ:
- /* Pull: ctrl type (1 b), len (1 b), data (len bytes) */
- if (!skb_pull(skb, 2 + *(u8 *)(skb->data + 1) * 2))
+ case BNEP_SETUP_CONN_REQ: {
+ u8 uuid_size;
+
+ /* Pull uuid_size and the dst/src service UUIDs. */
+ data = skb_pull_data(skb, sizeof(uuid_size));
+ if (!data)
+ goto badframe;
+ uuid_size = *data;
+ if (!skb_pull(skb, uuid_size + uuid_size))
goto badframe;
break;
+ }
case BNEP_FILTER_MULTI_ADDR_SET:
- case BNEP_FILTER_NET_TYPE_SET: {
- u8 *hdr;
-
- /* Pull ctrl type (1 b) + len (2 b) */
- hdr = skb_pull_data(skb, 3);
- if (!hdr)
+ case BNEP_FILTER_NET_TYPE_SET:
+ /* Pull: len (2 b), data (len bytes) */
+ data = skb_pull_data(skb, sizeof(u16));
+ if (!data)
goto badframe;
- /* Pull data (len bytes); length is big-endian */
- if (!skb_pull(skb, get_unaligned_be16(&hdr[1])))
+ if (!skb_pull(skb, get_unaligned_be16(data)))
goto badframe;
break;
- }
default:
kfree_skb(skb);
return 0;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index aeccd8084cba..df23245d6ccd 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1725,6 +1725,11 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
/* Generate Broadcast ID */
get_random_bytes(bid, sizeof(bid));
len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
+ if (adv->adv_data_len > sizeof(ad) - len) {
+ bt_dev_err(hdev, "No room for Broadcast Announcement");
+ return -EINVAL;
+ }
+
memcpy(ad + len, adv->adv_data, adv->adv_data_len);
hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len,
ad, 0, NULL);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 041ce9adc378..8957ce7c21b7 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -83,10 +83,12 @@ static void bt_host_release(struct device *dev)
{
struct hci_dev *hdev = to_hci_dev(dev);
- if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
hci_release_dev(hdev);
- else
+ } else {
+ cleanup_srcu_struct(&hdev->srcu);
kfree(hdev);
+ }
module_put(THIS_MODULE);
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 876649556d3c..3abd8111dda8 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -337,12 +337,20 @@ static int iso_connect_bis(struct sock *sk)
struct iso_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type, bc_sid;
int err;
- BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid);
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ bc_sid = iso_pi(sk)->bc_sid;
+ release_sock(sk);
+
+ BT_DBG("%pMR (SID 0x%2.2x)", &src, bc_sid);
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return -EHOSTUNREACH;
@@ -430,12 +438,19 @@ static int iso_connect_cis(struct sock *sk)
struct iso_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type;
int err;
- BT_DBG("%pMR -> %pMR", &iso_pi(sk)->src, &iso_pi(sk)->dst);
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ release_sock(sk);
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ BT_DBG("%pMR -> %pMR", &src, &dst);
+
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return -EHOSTUNREACH;
@@ -1082,7 +1097,7 @@ static int iso_sock_rebind_bc(struct sock *sk, struct sockaddr_iso *sa,
* ordering.
*/
release_sock(sk);
- hci_dev_lock(bis->hdev);
+ hci_dev_lock(hdev);
lock_sock(sk);
if (!iso_pi(sk)->conn || iso_pi(sk)->conn->hcon != bis) {
@@ -1212,18 +1227,25 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr,
static int iso_listen_bis(struct sock *sk)
{
- struct hci_dev *hdev;
- int err = 0;
struct iso_conn *conn;
struct hci_conn *hcon;
+ struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type, bc_sid;
+ int err = 0;
+
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ bc_sid = iso_pi(sk)->bc_sid;
+ release_sock(sk);
- BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src,
- &iso_pi(sk)->dst, iso_pi(sk)->bc_sid);
+ BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &src, &dst, bc_sid);
write_lock(&iso_sk_list.lock);
- if (__iso_get_sock_listen_by_sid(&iso_pi(sk)->src, &iso_pi(sk)->dst,
- iso_pi(sk)->bc_sid))
+ if (__iso_get_sock_listen_by_sid(&src, &dst, bc_sid))
err = -EADDRINUSE;
write_unlock(&iso_sk_list.lock);
@@ -1231,8 +1253,7 @@ static int iso_listen_bis(struct sock *sk)
if (err)
return err;
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return -EHOSTUNREACH;
@@ -1568,9 +1589,16 @@ static void iso_conn_big_sync(struct sock *sk)
{
int err;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type;
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ release_sock(sk);
+
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return;
@@ -1595,6 +1623,7 @@ static void iso_conn_big_sync(struct sock *sk)
release_sock(sk);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
}
static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 45b175399e8d..c4ccfbda9d78 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5643,6 +5643,15 @@ static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident)
l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
}
+static inline void l2cap_sig_send_mtu_rej(struct l2cap_conn *conn, u8 ident)
+{
+ struct l2cap_cmd_rej_mtu rej;
+
+ rej.reason = cpu_to_le16(L2CAP_REJ_MTU_EXCEEDED);
+ rej.max_mtu = cpu_to_le16(L2CAP_SIG_MTU);
+ l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
+}
+
static inline void l2cap_sig_channel(struct l2cap_conn *conn,
struct sk_buff *skb)
{
@@ -5655,6 +5664,43 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn,
if (hcon->type != ACL_LINK)
goto drop;
+ /*
+ * Bluetooth Core v5.4, Vol 3, Part A, Section 4: the BR/EDR
+ * signaling channel has a fixed signaling MTU (MTUsig) whose
+ * minimum and default is 48 octets. Section 4.1 says that on
+ * an MTUExceeded command reject the identifier "shall match
+ * the first request command in the L2CAP packet" and that
+ * packets containing only response commands "shall be
+ * silently discarded".
+ *
+ * Linux intentionally deviates from that prescription:
+ *
+ * 1. Silently discarding desynchronizes the peer. The
+ * remote stack never learns its responses were dropped,
+ * so any state machine waiting on a paired response
+ * stalls until its own timer fires.
+ *
+ * 2. Locating "the first request command" requires walking
+ * command headers past MTUsig, i.e. processing bytes
+ * from a packet we have already decided is too large to
+ * process.
+ *
+ * Reject every over-MTUsig signaling packet with one
+ * L2CAP_REJ_MTU_EXCEEDED command reject. The reject's
+ * reason field is what tells the peer that the whole packet
+ * was discarded; the identifier value is informational, so
+ * we use the identifier from the first command header, a
+ * single fixed-offset byte read.
+ */
+ if (skb->len > L2CAP_SIG_MTU) {
+ u8 ident = skb->data[1];
+
+ BT_DBG("signaling packet exceeds MTU: %u > %u",
+ skb->len, L2CAP_SIG_MTU);
+ l2cap_sig_send_mtu_rej(conn, ident);
+ goto drop;
+ }
+
while (skb->len >= L2CAP_CMD_HDR_SIZE) {
u16 len;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index de5bd6b637b2..f4aa814a0397 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -8638,6 +8638,12 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
if (!cur_len)
continue;
+ /* If the current field length would exceed the total data
+ * length, then it's invalid.
+ */
+ if (i + cur_len >= len)
+ return false;
+
if (data[i + 1] == EIR_FLAGS &&
(!is_adv_data || flags_managed(adv_flags)))
return false;
@@ -8654,12 +8660,6 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
if (data[i + 1] == EIR_APPEARANCE &&
appearance_managed(adv_flags))
return false;
-
- /* If the current field length would exceed the total data
- * length, then it's invalid.
- */
- if (i + cur_len >= len)
- return false;
}
return true;
@@ -9114,8 +9114,9 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data,
BT_DBG("%s", hdev->name);
- expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len);
- if (expected_len != data_len)
+ expected_len = struct_size(cp, data, cp->adv_data_len +
+ cp->scan_rsp_len);
+ if (expected_len > data_len)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA,
MGMT_STATUS_INVALID_PARAMS);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index d11bd5337d57..364b9381c2dc 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1431,10 +1431,15 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn)
static int rfcomm_recv_pn(struct rfcomm_session *s, int cr, struct sk_buff *skb)
{
- struct rfcomm_pn *pn = (void *) skb->data;
+ struct rfcomm_pn *pn;
struct rfcomm_dlc *d;
- u8 dlci = pn->dlci;
+ u8 dlci;
+
+ pn = skb_pull_data(skb, sizeof(*pn));
+ if (!pn)
+ return -EILSEQ;
+ dlci = pn->dlci;
BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
if (!dlci)
@@ -1483,8 +1488,8 @@ static int rfcomm_recv_pn(struct rfcomm_session *s, int cr, struct sk_buff *skb)
static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_buff *skb)
{
- struct rfcomm_rpn *rpn = (void *) skb->data;
- u8 dlci = __get_dlci(rpn->dlci);
+ struct rfcomm_rpn *rpn;
+ u8 dlci;
u8 bit_rate = 0;
u8 data_bits = 0;
@@ -1495,15 +1500,16 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
u8 xoff_char = 0;
u16 rpn_mask = RFCOMM_RPN_PM_ALL;
- BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x",
- dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
- rpn->xon_char, rpn->xoff_char, rpn->param_mask);
+ if (len == 1) {
+ rpn = skb_pull_data(skb, 1);
+ if (!rpn)
+ return -EILSEQ;
- if (!cr)
- return 0;
+ dlci = __get_dlci(rpn->dlci);
+
+ if (!cr)
+ return 0;
- if (len == 1) {
- /* This is a request, return default (according to ETSI TS 07.10) settings */
bit_rate = RFCOMM_RPN_BR_9600;
data_bits = RFCOMM_RPN_DATA_8;
stop_bits = RFCOMM_RPN_STOP_1;
@@ -1514,6 +1520,19 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
goto rpn_out;
}
+ rpn = skb_pull_data(skb, sizeof(*rpn));
+ if (!rpn)
+ return -EILSEQ;
+
+ dlci = __get_dlci(rpn->dlci);
+
+ BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x",
+ dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
+ rpn->xon_char, rpn->xoff_char, rpn->param_mask);
+
+ if (!cr)
+ return 0;
+
/* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit,
* no parity, no flow control lines, normal XON/XOFF chars */
@@ -1589,9 +1608,14 @@ rpn_out:
static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb)
{
- struct rfcomm_rls *rls = (void *) skb->data;
- u8 dlci = __get_dlci(rls->dlci);
+ struct rfcomm_rls *rls;
+ u8 dlci;
+ rls = skb_pull_data(skb, sizeof(*rls));
+ if (!rls)
+ return -EILSEQ;
+
+ dlci = __get_dlci(rls->dlci);
BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status);
if (!cr)
@@ -1608,10 +1632,15 @@ static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb
static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb)
{
- struct rfcomm_msc *msc = (void *) skb->data;
+ struct rfcomm_msc *msc;
struct rfcomm_dlc *d;
- u8 dlci = __get_dlci(msc->dlci);
+ u8 dlci;
+
+ msc = skb_pull_data(skb, sizeof(*msc));
+ if (!msc)
+ return -EILSEQ;
+ dlci = __get_dlci(msc->dlci);
BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig);
d = rfcomm_dlc_get(s, dlci);
@@ -1644,17 +1673,19 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
static int rfcomm_recv_mcc(struct rfcomm_session *s, struct sk_buff *skb)
{
- struct rfcomm_mcc *mcc = (void *) skb->data;
+ struct rfcomm_mcc *mcc;
u8 type, cr, len;
+ mcc = skb_pull_data(skb, sizeof(*mcc));
+ if (!mcc)
+ return -EILSEQ;
+
cr = __test_cr(mcc->type);
type = __get_mcc_type(mcc->type);
len = __get_mcc_len(mcc->len);
BT_DBG("%p type 0x%x cr %d", s, type, cr);
- skb_pull(skb, 2);
-
switch (type) {
case RFCOMM_PN:
rfcomm_recv_pn(s, cr, skb);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index bd7d959c6e9e..805ed5d28ed6 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -122,7 +122,7 @@ static struct sock *__rfcomm_get_listen_sock_by_addr(u8 channel, bdaddr_t *src)
}
/* Find socket with channel and source bdaddr.
- * Returns closest match.
+ * Returns closest match with an extra reference held.
*/
static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
{
@@ -136,15 +136,25 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *
if (rfcomm_pi(sk)->channel == channel) {
/* Exact match. */
- if (!bacmp(&rfcomm_pi(sk)->src, src))
+ if (!bacmp(&rfcomm_pi(sk)->src, src)) {
+ sock_hold(sk);
break;
+ }
/* Closest match */
- if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY))
+ if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY)) {
+ if (sk1)
+ sock_put(sk1);
+
sk1 = sk;
+ sock_hold(sk1);
+ }
}
}
+ if (sk && sk1)
+ sock_put(sk1);
+
read_unlock(&rfcomm_sk_list.lock);
return sk ? sk : sk1;
@@ -941,6 +951,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
{
struct sock *sk, *parent;
bdaddr_t src, dst;
+ bool defer_setup = false;
int result = 0;
BT_DBG("session %p channel %d", s, channel);
@@ -954,6 +965,11 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
lock_sock(parent);
+ if (parent->sk_state != BT_LISTEN)
+ goto done;
+
+ defer_setup = test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags);
+
/* Check for backlog size */
if (sk_acceptq_is_full(parent)) {
BT_DBG("backlog full %d", parent->sk_ack_backlog);
@@ -981,9 +997,11 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
done:
release_sock(parent);
- if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
+ if (defer_setup)
parent->sk_state_change(parent);
+ sock_put(parent);
+
return result;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f1799c6a6f87..140869e5b2df 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -312,11 +312,21 @@ static int sco_connect(struct sock *sk)
struct sco_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ struct bt_codec codec;
+ __u16 setting;
int err, type;
- BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
+ lock_sock(sk);
+ bacpy(&src, &sco_pi(sk)->src);
+ bacpy(&dst, &sco_pi(sk)->dst);
+ setting = sco_pi(sk)->setting;
+ codec = sco_pi(sk)->codec;
+ release_sock(sk);
+
+ BT_DBG("%pMR -> %pMR", &src, &dst);
- hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
+ hdev = hci_get_route(&dst, &src, BDADDR_BREDR);
if (!hdev)
return -EHOSTUNREACH;
@@ -327,7 +337,7 @@ static int sco_connect(struct sock *sk)
else
type = SCO_LINK;
- switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) {
+ switch (setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_TRANSP:
if (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)) {
err = -EOPNOTSUPP;
@@ -336,8 +346,8 @@ static int sco_connect(struct sock *sk)
break;
}
- hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
- sco_pi(sk)->setting, &sco_pi(sk)->codec,
+ hcon = hci_connect_sco(hdev, type, &dst,
+ setting, &codec,
READ_ONCE(sk->sk_sndtimeo));
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 7dfbcdfc30e5..c9e229af0366 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -31,6 +31,9 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
const struct arphdr *ap;
struct arphdr _ah;
+ if (skb_ensure_writable(skb, sizeof(_ah) + ETH_ALEN))
+ return EBT_DROP;
+
ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
if (ap == NULL)
return EBT_DROP;
diff --git a/net/core/sock.c b/net/core/sock.c
index b37b664b6eb9..d097025c116a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2676,8 +2676,12 @@ void sock_wfree(struct sk_buff *skb)
int old;
if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+ void (*sk_write_space)(struct sock *sk);
+
+ sk_write_space = READ_ONCE(sk->sk_write_space);
+
if (sock_flag(sk, SOCK_RCU_FREE) &&
- sk->sk_write_space == sock_def_write_space) {
+ sk_write_space == sock_def_write_space) {
rcu_read_lock();
free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc,
&old);
@@ -2693,7 +2697,7 @@ void sock_wfree(struct sk_buff *skb)
* after sk_write_space() call
*/
WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
- sk->sk_write_space(sk);
+ sk_write_space(sk);
len = 1;
}
/*
diff --git a/net/devlink/core.c b/net/devlink/core.c
index eeb6a71f5f56..fe9f6a0a67d5 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -518,6 +518,8 @@ void devlink_free(struct devlink *devlink)
{
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ devlink_rel_put(devlink);
+
WARN_ON(!list_empty(&devlink->trap_policer_list));
WARN_ON(!list_empty(&devlink->trap_group_list));
WARN_ON(!list_empty(&devlink->trap_list));
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index b514e43766ef..a28dfd8490c5 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -35,10 +35,8 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
rcu_read_lock();
sn = rcu_dereference(hsr->self_node);
- if (!sn) {
- WARN_ONCE(1, "HSR: No self node\n");
+ if (!sn)
goto out;
- }
if (ether_addr_equal(addr, sn->macaddress_A) ||
ether_addr_equal(addr, sn->macaddress_B))
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index 0c07662b44c0..4df76ff50699 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -255,6 +255,11 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
pr_debug("package xmit\n");
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
WARN_ON_ONCE(skb->len > IPV6_MIN_MTU);
/* We must take a copy of the skb before we modify/replace the ipv6
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index dbcd37dfdc15..5b934ce8d98a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1148,6 +1148,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req)
/* The timer needs to be setup after a successful insertion. */
req->timeout = tcp_timeout_init((struct sock *)req);
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
+
+ preempt_disable_nested();
+
mod_timer(&req->rsk_timer, jiffies + req->timeout);
/* before letting lookups find us, make sure all req fields
@@ -1155,6 +1158,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req)
*/
smp_wmb();
refcount_set(&req->rsk_refcnt, 2 + 1);
+
+ preempt_enable_nested();
+
return true;
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index be8815ce3ac2..09d745112c15 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -530,6 +530,10 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp,
kfree(opt);
return -EINVAL;
}
+ if (opt->opt.srr && !ns_capable(net->user_ns, CAP_NET_RAW)) {
+ kfree(opt);
+ return -EPERM;
+ }
kfree(*optp);
*optp = opt;
return 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0ac2bf4f8759..70f6cbd4ef73 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2011,6 +2011,14 @@ try_again:
}
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
+
+ /*
+ * skb->dev still aliases the UDP rx dev_scratch (its charge was freed
+ * on dequeue above); a sockmap verdict program may deref it via
+ * bpf_sk_lookup_*(), so clear it -> bpf_skc_lookup() uses skb->sk
+ */
+ skb->dev = NULL;
+
return recv_actor(sk, skb);
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 67a42e01dfc3..be6dac8a8566 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -243,16 +243,16 @@ static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
{
unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
- spin_lock(&acaddr_hash_lock);
+ spin_lock_bh(&acaddr_hash_lock);
hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
- spin_unlock(&acaddr_hash_lock);
+ spin_unlock_bh(&acaddr_hash_lock);
}
static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
{
- spin_lock(&acaddr_hash_lock);
+ spin_lock_bh(&acaddr_hash_lock);
hlist_del_init_rcu(&aca->aca_addr_lst);
- spin_unlock(&acaddr_hash_lock);
+ spin_unlock_bh(&acaddr_hash_lock);
}
static void aca_get(struct ifacaddr6 *aca)
@@ -371,10 +371,10 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca->aca_next = idev->ac_list;
rcu_assign_pointer(idev->ac_list, aca);
- write_unlock_bh(&idev->lock);
-
ipv6_add_acaddr_hash(net, aca);
+ write_unlock_bh(&idev->lock);
+
ip6_ins_rt(net, f6i);
addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -649,8 +649,8 @@ void ipv6_anycast_cleanup(void)
{
int i;
- spin_lock(&acaddr_hash_lock);
+ spin_lock_bh(&acaddr_hash_lock);
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
- spin_unlock(&acaddr_hash_lock);
+ spin_unlock_bh(&acaddr_hash_lock);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3330adcf26db..d9b855d5191b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1424,9 +1424,9 @@ out:
static void __mld_query_work(struct sk_buff *skb)
{
struct mld2_query *mlh2 = NULL;
- const struct in6_addr *group;
unsigned long max_delay;
struct inet6_dev *idev;
+ struct in6_addr group;
struct ifmcaddr6 *ma;
struct mld_msg *mld;
int group_type;
@@ -1458,8 +1458,8 @@ static void __mld_query_work(struct sk_buff *skb)
goto kfree_skb;
mld = (struct mld_msg *)icmp6_hdr(skb);
- group = &mld->mld_mca;
- group_type = ipv6_addr_type(group);
+ group = mld->mld_mca;
+ group_type = ipv6_addr_type(&group);
if (group_type != IPV6_ADDR_ANY &&
!(group_type&IPV6_ADDR_MULTICAST))
@@ -1509,7 +1509,7 @@ static void __mld_query_work(struct sk_buff *skb)
}
} else {
for_each_mc_mclock(idev, ma) {
- if (!ipv6_addr_equal(group, &ma->mca_addr))
+ if (!ipv6_addr_equal(&group, &ma->mca_addr))
continue;
if (ma->mca_flags & MAF_TIMER_RUNNING) {
/* gsquery <- gsquery && mark */
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index c0a0075e2590..2dbe44715df3 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -191,6 +191,9 @@ static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt,
if (nft_fib6_info_nh_dev_match(nh_dev, dev))
return true;
+
+ if (!READ_ONCE(rt->fib6_nsiblings))
+ return false;
}
return false;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 99d6582f41de..e0b1915be1a6 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1045,64 +1045,76 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
{
struct pppol2tp_ioc_stats stats;
struct l2tp_session *session;
+ int err = 0;
+
+ session = pppol2tp_sock_to_session(sock->sk);
+ /* Validate session presence and magic integrity ONLY for commands
+ * that belong to L2TP and require a valid session.
+ */
switch (cmd) {
case PPPIOCGMRU:
case PPPIOCGFLAGS:
- session = sock->sk->sk_user_data;
+ case PPPIOCSMRU:
+ case PPPIOCSFLAGS:
+ case PPPIOCGL2TPSTATS:
if (!session)
return -ENOTCONN;
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ if (session->magic != L2TP_SESSION_MAGIC) {
+ l2tp_session_put(session);
return -EBADF;
+ }
+ break;
+ default:
+ break;
+ }
+ switch (cmd) {
+ case PPPIOCGMRU:
+ case PPPIOCGFLAGS:
/* Not defined for tunnels */
- if (!session->session_id && !session->peer_session_id)
- return -ENOSYS;
+ if (!session->session_id && !session->peer_session_id) {
+ err = -ENOSYS;
+ break;
+ }
- if (put_user(0, (int __user *)arg))
- return -EFAULT;
+ if (put_user(0, (int __user *)arg)) {
+ err = -EFAULT;
+ break;
+ }
break;
case PPPIOCSMRU:
case PPPIOCSFLAGS:
- session = sock->sk->sk_user_data;
- if (!session)
- return -ENOTCONN;
-
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return -EBADF;
-
/* Not defined for tunnels */
- if (!session->session_id && !session->peer_session_id)
- return -ENOSYS;
+ if (!session->session_id && !session->peer_session_id) {
+ err = -ENOSYS;
+ break;
+ }
- if (!access_ok((int __user *)arg, sizeof(int)))
- return -EFAULT;
+ if (!access_ok((int __user *)arg, sizeof(int))) {
+ err = -EFAULT;
+ break;
+ }
break;
case PPPIOCGL2TPSTATS:
- session = sock->sk->sk_user_data;
- if (!session)
- return -ENOTCONN;
-
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return -EBADF;
-
/* Session 0 represents the parent tunnel */
if (!session->session_id && !session->peer_session_id) {
u32 session_id;
- int err;
if (copy_from_user(&stats, (void __user *)arg,
- sizeof(stats)))
- return -EFAULT;
+ sizeof(stats))) {
+ err = -EFAULT;
+ break;
+ }
session_id = stats.session_id;
err = pppol2tp_tunnel_copy_stats(&stats,
session->tunnel);
if (err < 0)
- return err;
+ break;
stats.session_id = session_id;
} else {
@@ -1112,15 +1124,21 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
stats.tunnel_id = session->tunnel->tunnel_id;
stats.using_ipsec = l2tp_tunnel_uses_xfrm(session->tunnel);
- if (copy_to_user((void __user *)arg, &stats, sizeof(stats)))
- return -EFAULT;
+ if (copy_to_user((void __user *)arg, &stats, sizeof(stats))) {
+ err = -EFAULT;
+ break;
+ }
break;
default:
- return -ENOIOCTLCMD;
+ err = -ENOIOCTLCMD;
+ break;
}
- return 0;
+ if (session)
+ l2tp_session_put(session);
+
+ return err;
}
/*****************************************************************************
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b487d2330f25..ea7f63e1fc17 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2181,7 +2181,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
case IEEE80211_RADIOTAP_ANTENNA:
/* this can appear multiple times, keep a bitmap */
- info->control.antennas |= BIT(*iterator.this_arg);
+ /* control.antennas is only a 2-bit bitmap */
+ if (*iterator.this_arg < 2)
+ info->control.antennas |= BIT(*iterator.this_arg);
break;
case IEEE80211_RADIOTAP_DATA_RETRIES:
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8a1c5698983c..b3ea7854818f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -566,12 +566,17 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct tcp_sock *tp = tcp_sk(sk);
unsigned int dss_size = 0;
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
- u64 ack_seq;
+ /* Zero `use_ack` and `use_map` flags with one shot. */
+ BUILD_BUG_ON(sizeof_field(struct mptcp_ext, flags) != sizeof(u16));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_ext, flags),
+ sizeof(u16)));
+ *(u16 *)&opts->ext_copy.flags = 0;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
@@ -595,20 +600,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
/* passive sockets msk will set the 'can_ack' after accept(), even
* if the first subflow may have the already the remote key handy
*/
- opts->ext_copy.use_ack = 0;
if (!READ_ONCE(msk->can_ack)) {
*size = ALIGN(dss_size, 4);
return ret;
}
- ack_seq = READ_ONCE(msk->ack_seq);
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
- opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
} else {
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
- opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
@@ -618,6 +619,12 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
if (dss_size == 0)
ack_size += TCPOLEN_MPTCP_DSS_BASE;
+ /* The caller is __tcp_transmit_skb(), and will compute the new rcv
+ * wnd soon: ensure that the window can shrink.
+ */
+ if (skb)
+ tp->rcv_wnd = tp->rcv_nxt - tp->rcv_wup;
+
dss_size += ack_size;
*size = ALIGN(dss_size, 4);
@@ -658,7 +665,6 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- bool drop_other_suboptions = false;
unsigned int opt_size = *size;
struct mptcp_addr_info addr;
bool echo;
@@ -669,36 +675,20 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
*/
if (!mptcp_pm_should_add_signal(msk) ||
(opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
- !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr,
- &echo, &drop_other_suboptions))
+ !skb || !skb_is_tcp_pure_ack(skb) ||
+ !mptcp_pm_add_addr_signal(msk, opt_size, remaining, &addr, &echo))
return false;
- /*
- * Later on, mptcp_write_options() will enforce mutually exclusion with
- * DSS, bail out if such option is set and we can't drop it.
- */
- if (drop_other_suboptions)
- remaining += opt_size;
- else if (opts->suboptions & OPTION_MPTCP_DSS)
- return false;
+ remaining += opt_size;
len = mptcp_add_addr_len(addr.family, echo, !!addr.port);
if (remaining < len)
return false;
*size = len;
- if (drop_other_suboptions) {
- pr_debug("drop other suboptions\n");
- opts->suboptions = 0;
-
- /* note that e.g. DSS could have written into the memory
- * aliased by ahmac, we must reset the field here
- * to avoid appending the hmac even for ADD_ADDR echo
- * options
- */
- opts->ahmac = 0;
- *size -= opt_size;
- }
+ pr_debug("drop other suboptions\n");
+ opts->suboptions = 0;
+ *size -= opt_size;
opts->addr = addr;
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
@@ -708,6 +698,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
&opts->addr);
} else {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
+ opts->ahmac = 0;
}
pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n",
opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
@@ -1297,19 +1288,14 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
-static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
+static u64 mptcp_set_rwin(struct mptcp_sock *msk, struct tcp_sock *tp,
+ struct tcphdr *th, u64 ack_seq)
{
const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
- u64 ack_seq, rcv_wnd_old, rcv_wnd_new;
- struct mptcp_sock *msk;
+ u64 rcv_wnd_old, rcv_wnd_new;
u32 new_win;
u64 win;
- subflow = mptcp_subflow_ctx(ssk);
- msk = mptcp_sk(subflow->conn);
-
- ack_seq = READ_ONCE(msk->ack_seq);
rcv_wnd_new = ack_seq + tp->rcv_wnd;
rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
@@ -1362,7 +1348,7 @@ raise_win:
update_wspace:
WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
- subflow->rcv_wnd_sent = rcv_wnd_new;
+ return rcv_wnd_new;
}
static void mptcp_track_rwin(struct tcp_sock *tp)
@@ -1474,13 +1460,25 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
if (mpext->use_ack) {
+ struct mptcp_sock *msk;
+ u64 ack_seq;
+
+ /* DSS option is set only by mptcp_established_options,
+ * the caller is __tcp_transmit_skb() and ssk is always
+ * not NULL.
+ */
+ subflow = mptcp_subflow_ctx(ssk);
+ msk = mptcp_sk(subflow->conn);
+ ack_seq = READ_ONCE(msk->ack_seq);
if (mpext->ack64) {
- put_unaligned_be64(mpext->data_ack, ptr);
+ put_unaligned_be64(ack_seq, ptr);
ptr += 2;
} else {
- put_unaligned_be32(mpext->data_ack32, ptr);
+ put_unaligned_be32(ack_seq, ptr);
ptr += 1;
}
+ subflow->rcv_wnd_sent = mptcp_set_rwin(msk, tp, th,
+ ack_seq);
}
if (mpext->use_map) {
@@ -1708,9 +1706,6 @@ mp_capable_done:
i += 4;
}
}
-
- if (tp)
- mptcp_set_rwin(tp, th);
}
__be32 mptcp_get_reset_option(const struct sk_buff *skb)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 3e770c7407e1..470501470fe5 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -887,10 +887,9 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
}
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
- unsigned int opt_size, unsigned int remaining,
- struct mptcp_addr_info *addr, bool *echo,
- bool *drop_other_suboptions)
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int opt_size,
+ unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo)
{
bool skip_add_addr = false;
int ret = false;
@@ -908,10 +907,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
* plain dup-ack from TCP perspective. The other MPTCP-relevant info,
* if any, will be carried by the 'original' TCP ack
*/
- if (skb && skb_is_tcp_pure_ack(skb)) {
- remaining += opt_size;
- *drop_other_suboptions = true;
- }
+ remaining += opt_size;
*echo = mptcp_pm_should_add_signal_echo(msk);
if (*echo) {
@@ -929,9 +925,6 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
if (remaining < mptcp_add_addr_len(family, *echo, port)) {
struct net *net = sock_net((struct sock *)msk);
- if (!*drop_other_suboptions)
- goto out_unlock;
-
if (*echo) {
MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP);
} else {
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 8cbc1920afb4..0d3a95e676f1 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -408,19 +408,21 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
local.flags = entry.flags;
local.ifindex = entry.ifindex;
+ spin_lock_bh(&msk->pm.lock);
+ msk->pm.extra_subflows++;
+ spin_unlock_bh(&msk->pm.lock);
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &local, &addr_r);
release_sock(sk);
- if (err)
+ if (err) {
GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err);
- spin_lock_bh(&msk->pm.lock);
- if (err)
+ spin_lock_bh(&msk->pm.lock);
mptcp_userspace_pm_delete_local_addr(msk, &entry);
- else
- msk->pm.extra_subflows++;
- spin_unlock_bh(&msk->pm.lock);
+ spin_unlock_bh(&msk->pm.lock);
+ }
create_err:
sock_put(sk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a72a6ad6ee8b..cb9515f505aa 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2276,6 +2276,10 @@ static bool mptcp_move_skbs(struct sock *sk)
mptcp_backlog_spooled(sk, moved, &skbs);
}
mptcp_data_unlock(sk);
+
+ if (enqueued && mptcp_epollin_ready(sk))
+ sk->sk_data_ready(sk);
+
return enqueued;
}
@@ -2865,6 +2869,10 @@ static void __mptcp_retrans(struct sock *sk)
msk->bytes_retrans += len;
dfrag->already_sent = max(dfrag->already_sent, len);
+ /* With csum enabled retransmission can send new data. */
+ if (after64(dfrag->already_sent + dfrag->data_seq, msk->snd_nxt))
+ WRITE_ONCE(msk->snd_nxt, dfrag->already_sent + dfrag->data_seq);
+
reset_timer:
mptcp_check_and_set_pending(sk);
@@ -4420,6 +4428,8 @@ static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
}
mptcp_eat_recv_skb(sk, skb);
+ if (!desc->count)
+ break;
}
if (noack)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e4f5aba24da7..b93b878478d2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -1229,10 +1229,9 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
- unsigned int opt_size, unsigned int remaining,
- struct mptcp_addr_info *addr, bool *echo,
- bool *drop_other_suboptions);
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int opt_size,
+ unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 87b5796d0135..fcf6feb2a9eb 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -241,15 +241,19 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err;
lock_sock(ssk);
- sock_set_timestamping(ssk, optname, timestamping);
+ err = sock_set_timestamping(ssk, optname, timestamping);
release_sock(ssk);
+
+ if (err < 0 && ret == 0)
+ ret = err;
}
release_sock(sk);
- return 0;
+ return ret;
}
static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
@@ -813,10 +817,11 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err;
- ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
- if (ret)
- break;
+ err = tcp_setsockopt(ssk, level, optname, optval, optlen);
+ if (err < 0 && ret == 0)
+ ret = err;
}
if (!ret)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd9cae44d214..16daba8cac83 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1898,7 +1898,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
if (ret_hooks >= 0)
ip_vs_unregister_hooks(ipvs, u->af);
if (svc != NULL) {
- ip_vs_unbind_scheduler(svc, sched);
+ ip_vs_unbind_scheduler(svc);
ip_vs_service_free(svc);
}
ip_vs_scheduler_put(sched);
@@ -1962,9 +1962,8 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
old_sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched != old_sched) {
if (old_sched) {
- ip_vs_unbind_scheduler(svc, old_sched);
- RCU_INIT_POINTER(svc->scheduler, NULL);
- /* Wait all svc->sched_data users */
+ ip_vs_unbind_scheduler(svc);
+ /* Wait all svc->scheduler/sched_data users */
synchronize_rcu();
}
/* Bind the new scheduler */
@@ -1972,6 +1971,10 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
ret = ip_vs_bind_scheduler(svc, sched);
if (ret) {
ip_vs_scheduler_put(sched);
+ /* Try to restore the old_sched */
+ if (old_sched &&
+ !ip_vs_bind_scheduler(svc, old_sched))
+ old_sched = NULL;
goto out;
}
}
@@ -2027,7 +2030,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/* Unbind scheduler */
old_sched = rcu_dereference_protected(svc->scheduler, 1);
- ip_vs_unbind_scheduler(svc, old_sched);
+ ip_vs_unbind_scheduler(svc);
ip_vs_scheduler_put(old_sched);
/* Unbind persistence engine, keep svc->pe */
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index c6e421c4e299..24adc38942a0 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -56,19 +56,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
/*
* Unbind a service with its scheduler
*/
-void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
- struct ip_vs_scheduler *sched)
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc)
{
- struct ip_vs_scheduler *cur_sched;
+ struct ip_vs_scheduler *sched;
- cur_sched = rcu_dereference_protected(svc->scheduler, 1);
- /* This check proves that old 'sched' was installed */
- if (!cur_sched)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (!sched)
return;
+ /* Reset the scheduler before initiating any RCU callbacks */
+ rcu_assign_pointer(svc->scheduler, NULL);
+ smp_wmb(); /* paired with smp_rmb() in ip_vs_schedule() */
if (sched->done_service)
sched->done_service(svc);
- /* svc->scheduler can be set to NULL only by caller */
}
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 522183b9a604..2ebe4cb47cf6 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -203,7 +203,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
if (parse_dcc(data, data_limit, &dcc_ip,
&dcc_port, &addr_beg_p, &addr_end_p)) {
pr_debug("unable to parse dcc command\n");
- continue;
+ goto out;
}
pr_debug("DCC bound ip/port: %pI4:%u\n",
@@ -217,7 +217,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
&tuple->src.u3.ip,
&dcc_ip, dcc_port);
- continue;
+ goto out;
}
exp = nf_ct_expect_alloc(ct);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 036c8586f49b..ed00114f65f3 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -22,6 +22,8 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_synproxy.h>
+static DEFINE_MUTEX(synproxy_mutex);
+
unsigned int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
@@ -769,26 +771,31 @@ static const struct nf_hook_ops ipv4_synproxy_ops[] = {
int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net)
{
- int err;
+ int err = 0;
+ mutex_lock(&synproxy_mutex);
if (snet->hook_ref4 == 0) {
err = nf_register_net_hooks(net, ipv4_synproxy_ops,
ARRAY_SIZE(ipv4_synproxy_ops));
if (err)
- return err;
+ goto out;
}
snet->hook_ref4++;
- return 0;
+out:
+ mutex_unlock(&synproxy_mutex);
+ return err;
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init);
void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net)
{
+ mutex_lock(&synproxy_mutex);
snet->hook_ref4--;
if (snet->hook_ref4 == 0)
nf_unregister_net_hooks(net, ipv4_synproxy_ops,
ARRAY_SIZE(ipv4_synproxy_ops));
+ mutex_unlock(&synproxy_mutex);
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini);
@@ -1193,27 +1200,32 @@ static const struct nf_hook_ops ipv6_synproxy_ops[] = {
int
nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net)
{
- int err;
+ int err = 0;
+ mutex_lock(&synproxy_mutex);
if (snet->hook_ref6 == 0) {
err = nf_register_net_hooks(net, ipv6_synproxy_ops,
ARRAY_SIZE(ipv6_synproxy_ops));
if (err)
- return err;
+ goto out;
}
snet->hook_ref6++;
- return 0;
+out:
+ mutex_unlock(&synproxy_mutex);
+ return err;
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init);
void
nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net)
{
+ mutex_lock(&synproxy_mutex);
snet->hook_ref6--;
if (snet->hook_ref6 == 0)
nf_unregister_net_hooks(net, ipv6_synproxy_ops,
ARRAY_SIZE(ipv6_synproxy_ops));
+ mutex_unlock(&synproxy_mutex);
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini);
#endif /* CONFIG_IPV6 */
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 2316c77f4228..dfd41fc8d9b8 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -19,7 +19,6 @@ struct nft_byteorder {
u8 sreg;
u8 dreg;
enum nft_byteorder_ops op:8;
- u8 len;
u8 size;
};
@@ -28,13 +27,8 @@ void nft_byteorder_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- u32 *src = &regs->data[priv->sreg];
+ const u32 *src = &regs->data[priv->sreg];
u32 *dst = &regs->data[priv->dreg];
- u16 *s16, *d16;
- unsigned int i;
-
- s16 = (void *)src;
- d16 = (void *)dst;
switch (priv->size) {
case 8: {
@@ -43,18 +37,14 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
- for (i = 0; i < priv->len / 8; i++) {
- src64 = nft_reg_load64(&src[i]);
- nft_reg_store64(&dst64[i],
- be64_to_cpu((__force __be64)src64));
- }
+ src64 = nft_reg_load64(src);
+
+ nft_reg_store64(dst64, be64_to_cpu((__force __be64)src64));
break;
case NFT_BYTEORDER_HTON:
- for (i = 0; i < priv->len / 8; i++) {
- src64 = (__force __u64)
- cpu_to_be64(nft_reg_load64(&src[i]));
- nft_reg_store64(&dst64[i], src64);
- }
+ src64 = (__force __u64)cpu_to_be64(nft_reg_load64(src));
+
+ nft_reg_store64(dst64, src64);
break;
}
break;
@@ -62,24 +52,20 @@ void nft_byteorder_eval(const struct nft_expr *expr,
case 4:
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
- for (i = 0; i < priv->len / 4; i++)
- dst[i] = ntohl((__force __be32)src[i]);
+ *dst = ntohl((__force __be32)*src);
break;
case NFT_BYTEORDER_HTON:
- for (i = 0; i < priv->len / 4; i++)
- dst[i] = (__force __u32)htonl(src[i]);
+ *dst = (__force __u32)htonl(*src);
break;
}
break;
case 2:
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
- for (i = 0; i < priv->len / 2; i++)
- d16[i] = ntohs((__force __be16)s16[i]);
+ nft_reg_store16(dst, ntohs(nft_reg_load_be16(src)));
break;
case NFT_BYTEORDER_HTON:
- for (i = 0; i < priv->len / 2; i++)
- d16[i] = (__force __u16)htons(s16[i]);
+ nft_reg_store_be16(dst, htons(nft_reg_load16(src)));
break;
}
break;
@@ -137,20 +123,22 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- priv->len = len;
+ /* no longer support multi-reg conversions */
+ if (len != size)
+ return -EOPNOTSUPP;
err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg,
- priv->len);
+ len);
if (err < 0)
return err;
err = nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG],
&priv->dreg, NULL, NFT_DATA_VALUE,
- priv->len);
+ len);
if (err < 0)
return err;
- if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len))
+ if (nft_reg_overlap(priv->sreg, priv->dreg, len))
return -EINVAL;
return 0;
@@ -167,10 +155,11 @@ static int nft_byteorder_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
- goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
goto nla_put_failure;
+ /* compatibility for old userspace which permitted size != len */
+ if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->size)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index fa2cc556331c..357513c6dcea 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -78,7 +78,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
break;
}
- if (ct == NULL)
+ if (!ct || nf_ct_is_template(ct))
goto err;
switch (priv->key) {
@@ -180,12 +180,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
tuple = &ct->tuplehash[priv->dir].tuple;
switch (priv->key) {
case NFT_CT_SRC:
- memcpy(dest, tuple->src.u3.all,
- nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ memcpy(dest, tuple->src.u3.all, priv->len);
return;
case NFT_CT_DST:
- memcpy(dest, tuple->dst.u3.all,
- nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ memcpy(dest, tuple->dst.u3.all, priv->len);
return;
case NFT_CT_PROTO_SRC:
nft_reg_store16(dest, (__force u16)tuple->src.u.all);
diff --git a/net/netfilter/nft_ct_fast.c b/net/netfilter/nft_ct_fast.c
index e684c8a91848..ecf7b3a404be 100644
--- a/net/netfilter/nft_ct_fast.c
+++ b/net/netfilter/nft_ct_fast.c
@@ -30,7 +30,7 @@ void nft_ct_get_fast_eval(const struct nft_expr *expr,
break;
}
- if (!ct) {
+ if (!ct || nf_ct_is_template(ct)) {
regs->verdict.code = NFT_BREAK;
return;
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 0b987bc2132a..68f7cfbbee06 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -676,7 +676,7 @@ static void nft_tunnel_obj_destroy(const struct nft_ctx *ctx,
{
struct nft_tunnel_obj *priv = nft_obj_data(obj);
- metadata_dst_free(priv->md);
+ dst_release(&priv->md->dst);
}
static struct nft_object_type nft_tunnel_obj_type;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 466da23e36ff..b32d153e3a18 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -91,7 +91,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
if (info->queues_total > 1) {
if (info->flags & NFQ_FLAG_CPU_FANOUT) {
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
queue = info->queuenum + cpu % info->queues_total;
} else {
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0c64c504f79d..4001de0c4959 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -656,6 +656,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
sends_out:
vfree(ic->i_sends);
+ ic->i_sends = NULL;
ack_dma_out:
rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 332fd9695e54..04ea11c90e03 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -112,11 +112,6 @@ struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
}
EXPORT_SYMBOL(tcf_action_set_ctrlact);
-/* XXX: For standalone actions, we don't need a RCU grace period either, because
- * actions are always connected to filters and filters are already destroyed in
- * RCU callbacks, so after a RCU grace period actions are already disconnected
- * from filters. Readers later can not find us.
- */
static void free_tcf(struct tc_action *p)
{
struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
@@ -129,7 +124,7 @@ static void free_tcf(struct tc_action *p)
if (chain)
tcf_chain_put_by_act(chain);
- kfree(p);
+ kfree_rcu(p, tcfa_rcu);
}
static void offload_action_hw_count_set(struct tc_action *act,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index bc20f08a2789..bd3b1da3cd63 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -16,6 +16,8 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/slab.h>
+#include <linux/overflow.h>
+#include <linux/unaligned.h>
#include <net/ipv6.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -242,7 +244,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_free_ex;
}
- nparms->tcfp_off_max_hint = 0;
nparms->tcfp_flags = parm->flags;
nparms->tcfp_nkeys = parm->nkeys;
@@ -268,14 +269,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
BITS_PER_TYPE(int) - 1,
nparms->tcfp_keys[i].shift);
- /* The AT option can read a single byte, we can bound the actual
- * value with uchar max.
- */
- cur += (0xff & offmask) >> nparms->tcfp_keys[i].shift;
-
- /* Each key touches 4 bytes starting from the computed offset */
- nparms->tcfp_off_max_hint =
- max(nparms->tcfp_off_max_hint, cur + 4);
}
p = to_pedit(*a);
@@ -318,15 +311,12 @@ static void tcf_pedit_cleanup(struct tc_action *a)
call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
}
-static bool offset_valid(struct sk_buff *skb, int offset)
+static bool offset_valid(struct sk_buff *skb, int offset, int len)
{
- if (offset > 0 && offset > skb->len)
- return false;
-
- if (offset < 0 && -offset > skb_headroom(skb))
+ if (offset < -(int)skb_headroom(skb))
return false;
- return true;
+ return offset <= (int)skb->len - len;
}
static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type)
@@ -393,18 +383,10 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
struct tcf_pedit_key_ex *tkey_ex;
struct tcf_pedit_parms *parms;
struct tc_pedit_key *tkey;
- u32 max_offset;
int i;
parms = rcu_dereference_bh(p->parms);
- max_offset = (skb_transport_header_was_set(skb) ?
- skb_transport_offset(skb) :
- skb_network_offset(skb)) +
- parms->tcfp_off_max_hint;
- if (skb_ensure_writable(skb, min(skb->len, max_offset)))
- goto done;
-
tcf_lastuse_update(&p->tcf_tm);
tcf_action_update_bstats(&p->common, skb);
@@ -412,10 +394,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
tkey_ex = parms->tcfp_keys_ex;
for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
+ int write_offset, write_len;
int offset = tkey->off;
int hoffset = 0;
- u32 *ptr, hdata;
- u32 val;
+ u32 cur_val, val;
+ u32 *ptr;
int rc;
if (tkey_ex) {
@@ -433,13 +416,15 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
if (tkey->offmask) {
u8 *d, _d;
+ int at_offset;
- if (!offset_valid(skb, hoffset + tkey->at)) {
+ if (check_add_overflow(hoffset, (int)tkey->at, &at_offset) ||
+ !offset_valid(skb, at_offset, sizeof(_d))) {
pr_info_ratelimited("tc action pedit 'at' offset %d out of bounds\n",
hoffset + tkey->at);
goto bad;
}
- d = skb_header_pointer(skb, hoffset + tkey->at,
+ d = skb_header_pointer(skb, at_offset,
sizeof(_d), &_d);
if (!d)
goto bad;
@@ -451,31 +436,51 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
}
}
- if (!offset_valid(skb, hoffset + offset)) {
- pr_info_ratelimited("tc action pedit offset %d out of bounds\n", hoffset + offset);
+ if (check_add_overflow(hoffset, offset, &write_offset)) {
+ pr_info_ratelimited("tc action pedit offset overflow\n");
goto bad;
}
- ptr = skb_header_pointer(skb, hoffset + offset,
- sizeof(hdata), &hdata);
- if (!ptr)
+ if (!offset_valid(skb, write_offset, sizeof(*ptr))) {
+ pr_info_ratelimited("tc action pedit offset %d out of bounds\n",
+ write_offset);
goto bad;
+ }
+
+ if (write_offset < 0) {
+ if (skb_cow(skb, -write_offset))
+ goto bad;
+ if (write_offset + (int)sizeof(*ptr) > 0) {
+ if (skb_ensure_writable(skb,
+ min_t(int, skb->len,
+ write_offset + (int)sizeof(*ptr))))
+ goto bad;
+ }
+ } else {
+ if (check_add_overflow(write_offset, (int)sizeof(*ptr),
+ &write_len))
+ goto bad;
+ if (skb_ensure_writable(skb, min_t(int, skb->len,
+ write_len)))
+ goto bad;
+ }
+
+ ptr = (u32 *)(skb->data + write_offset);
+ cur_val = get_unaligned(ptr);
/* just do it, baby */
switch (cmd) {
case TCA_PEDIT_KEY_EX_CMD_SET:
val = tkey->val;
break;
case TCA_PEDIT_KEY_EX_CMD_ADD:
- val = (*ptr + tkey->val) & ~tkey->mask;
+ val = (cur_val + tkey->val) & ~tkey->mask;
break;
default:
pr_info_ratelimited("tc action pedit bad command (%d)\n", cmd);
goto bad;
}
- *ptr = ((*ptr & tkey->mask) ^ val);
- if (ptr == &hdata)
- skb_store_bits(skb, hoffset + offset, ptr, 4);
+ put_unaligned((cur_val & tkey->mask) ^ val, ptr);
}
goto done;
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 2afb376299fe..d758f5c3e06e 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -266,15 +266,15 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t
lock_sock(sk);
- rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL);
- if (!rep) {
- release_sock(sk);
- return -ENOMEM;
+ if (ep != assoc->ep || assoc->base.dead) {
+ err = -ESTALE;
+ goto out_unlock;
}
- if (ep != assoc->ep) {
- err = -EAGAIN;
- goto out;
+ rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL);
+ if (!rep) {
+ err = -ENOMEM;
+ goto out_unlock;
}
err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(skb).sk),
@@ -289,8 +289,9 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t
return nlmsg_unicast(sock_net(skb->sk)->diag_nlsk, rep, NETLINK_CB(skb).portid);
out:
- release_sock(sk);
kfree_skb(rep);
+out_unlock:
+ release_sock(sk);
return err;
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index de86ac088289..85264862fb6b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1730,6 +1730,7 @@ struct sctp_association *sctp_unpack_cookie(
struct sctp_signed_cookie *cookie;
struct sk_buff *skb = chunk->skb;
struct sctp_cookie *bear_cookie;
+ struct sctp_chunkhdr *ch;
enum sctp_scope scope;
unsigned int len;
ktime_t kt;
@@ -1759,6 +1760,10 @@ struct sctp_association *sctp_unpack_cookie(
cookie = chunk->subh.cookie_hdr;
bear_cookie = &cookie->c;
+ ch = (struct sctp_chunkhdr *)(bear_cookie + 1);
+ if (ntohs(ch->length) > len - fixed_size)
+ goto malformed;
+
/* Verify the cookie's MAC, if cookie authentication is enabled. */
if (sctp_sk(ep->base.sk)->cookie_auth_enable) {
u8 mac[SHA256_DIGEST_SIZE];
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8e89a870780c..9b23c11cbb9e 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2598,11 +2598,7 @@ static enum sctp_disposition sctp_sf_do_5_2_6_stale(
*/
sctp_add_cmd_sf(commands, SCTP_CMD_DEL_NON_PRIMARY, SCTP_NULL());
- /* If we've sent any data bundled with COOKIE-ECHO we will need to
- * resend
- */
- sctp_add_cmd_sf(commands, SCTP_CMD_T1_RETRAN,
- SCTP_TRANSPORT(asoc->peer.primary_path));
+ sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
/* Cast away the const modifier, as we want to just
* rerun it through as a sideffect.
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dc71ed79be4a..0d9cd977c7b7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2886,7 +2886,7 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
return -EAGAIN;
}
- WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
+ WRITE_ONCE(u->inq_len, u->inq_len - unix_skb_len(skb));
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (skb == u->oob_skb) {
@@ -3063,11 +3063,12 @@ unlock:
unix_detach_fds(&scm, skb);
}
- if (unix_skb_len(skb))
- break;
-
spin_lock(&sk->sk_receive_queue.lock);
- WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
+ WRITE_ONCE(u->inq_len, u->inq_len - chunk);
+ if (unix_skb_len(skb)) {
+ spin_unlock(&sk->sk_receive_queue.lock);
+ break;
+ }
__skb_unlink(skb, &sk->sk_receive_queue);
spin_unlock(&sk->sk_receive_queue.lock);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 5c1ecd5bfdbc..91516488a742 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -980,8 +980,10 @@ static int vmci_transport_recv_listen(struct sock *sk,
err = -EINVAL;
}
- if (err < 0)
+ if (err < 0) {
vsock_remove_pending(sk, pending);
+ sk_acceptq_removed(sk);
+ }
release_sock(pending);
vmci_transport_release_pending(pending);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7db9cd433801..76c537a6e8b5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6366,6 +6366,9 @@ nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs,
if (ret)
return ERR_PTR(ret);
+ if (num_elems >= 255)
+ return ERR_PTR(-EINVAL);
+
num_elems++;
}
@@ -6711,6 +6714,12 @@ static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
return -EINVAL;
}
+ if (!!params->he_cap != !!params->he_oper)
+ return -EINVAL;
+
+ if (!!params->eht_cap != !!params->eht_oper)
+ return -EINVAL;
+
return 0;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 358cbc9e43d8..27a56ee2e8f0 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1071,6 +1071,7 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev)
struct cfg80211_scan_request_int *request;
struct cfg80211_scan_request_int *rdev_req = rdev->scan_req;
u32 n_channels = 0, idx, i;
+ int err;
if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) {
rdev_req->req.first_part = true;
@@ -1100,8 +1101,14 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev)
rdev_req->req.scan_6ghz = false;
rdev_req->req.first_part = true;
+ err = rdev_scan(rdev, request);
+ if (err) {
+ kfree(request);
+ return err;
+ }
+
rdev->int_scan_req = request;
- return rdev_scan(rdev, request);
+ return 0;
}
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5e5786cd9af5..f8c8a8c9dfba 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -802,6 +802,7 @@ static int xsk_skb_metadata(struct sk_buff *skb, void *buffer,
u32 hr)
{
struct xsk_tx_metadata *meta = NULL;
+ u16 csum_start, csum_offset;
if (unlikely(pool->tx_metadata_len == 0))
return -EINVAL;
@@ -811,13 +812,15 @@ static int xsk_skb_metadata(struct sk_buff *skb, void *buffer,
return -EINVAL;
if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
- if (unlikely(meta->request.csum_start +
- meta->request.csum_offset +
+ csum_start = READ_ONCE(meta->request.csum_start);
+ csum_offset = READ_ONCE(meta->request.csum_offset);
+
+ if (unlikely(csum_start + csum_offset +
sizeof(__sum16) > desc->len))
return -EINVAL;
- skb->csum_start = hr + meta->request.csum_start;
- skb->csum_offset = meta->request.csum_offset;
+ skb->csum_start = hr + csum_start;
+ skb->csum_offset = csum_offset;
skb->ip_summed = CHECKSUM_PARTIAL;
if (unlikely(pool->tx_sw_csum)) {
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f3da38c54d27..2ed7d803eb54 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -109,6 +109,7 @@ TEST_PROGS := \
test_vxlan_nh.sh \
test_vxlan_nolocalbypass.sh \
test_vxlan_under_vrf.sh \
+ test_vxlan_vnifilter_notify.sh \
test_vxlan_vnifiltering.sh \
tfo_passive.sh \
traceroute.sh \
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
index 3a86be9bda17..6268b5bf50be 100644
--- a/tools/testing/selftests/net/af_unix/scm_inq.c
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -8,8 +8,9 @@
#include "kselftest_harness.h"
-#define NR_CHUNKS 100
-#define MSG_LEN 256
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+#define NR_PARTIAL_READS 3
FIXTURE(scm_inq)
{
@@ -120,4 +121,53 @@ TEST_F(scm_inq, basic)
recv_chunks(_metadata, self);
}
+TEST_F(scm_inq, partial_read)
+{
+ char buf[MSG_LEN * NR_PARTIAL_READS] = {};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct cmsghdr *cmsg;
+ int err, inq, ret, i;
+ int remain;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+ ASSERT_EQ(0, err);
+
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ iov.iov_base = buf;
+ iov.iov_len = MSG_LEN;
+
+ for (i = 0; i < NR_PARTIAL_READS; i++) {
+ remain = MSG_LEN * (NR_PARTIAL_READS - 1 - i);
+
+ memset(buf, 0, MSG_LEN);
+ memset(cmsg_buf, 0, sizeof(cmsg_buf));
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(NULL, cmsg);
+ ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg->cmsg_type);
+ ASSERT_EQ(remain, *(int *)CMSG_DATA(cmsg));
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(remain, inq);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 5acd12021e6e..4b3f71e66609 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -4100,6 +4100,10 @@ userspace_tests()
chk_rm_nr 0 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
+ # check counters are not affected by errors at creation time
+ userspace_pm_add_sf $ns2 10.0.12.2 10 2>/dev/null
+ chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
kill_events_pids
mptcp_lib_kill_group_wait $tests_pid
fi
diff --git a/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh b/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh
new file mode 100755
index 000000000000..9d51a9e02ae0
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2034,SC2154,SC2317,SC2329
+#
+# Test for VXLAN vnifilter netlink notifications (RTM_NEWTUNNEL /
+# RTM_DELTUNNEL).
+#
+# Verifies that:
+# - Adding a new VNI sends a notification
+# - Adding a new VNI with a remote sends a notification
+# - Deleting a VNI sends a notification
+# - Re-adding an existing VNI with the same attributes does not send
+# a spurious notification
+# - Updating an existing VNI's remote sends a notification
+# - Deleting a non-existent VNI does not send a notification
+
+source lib.sh
+
+require_command bridge
+
+VXLAN_DEV=vxlan100
+
+ALL_TESTS="
+ test_vni_add_notify
+ test_vni_add_remote_notify
+ test_vni_del_notify
+ test_vni_readd_no_notify
+ test_vni_update_remote_notify
+ test_vni_del_nonexistent_no_notify
+"
+
+setup_prepare()
+{
+ setup_ns NS1
+ defer cleanup_all_ns
+
+ ip -n "$NS1" link add $VXLAN_DEV type vxlan dstport 4789 \
+ local 10.0.0.1 nolearning external vnifilter
+ ip -n "$NS1" link set $VXLAN_DEV up
+}
+
+# Run bridge monitor in the background, execute a command, then count
+# the notification lines.
+# Usage: vni_notify_check <command> [args...]
+# Sets: NOTIFY_COUNT with the number of notifications observed.
+vni_notify_check()
+{
+ local tmpf cmd_ret monitor_pid
+
+ tmpf=$(mktemp)
+ defer rm "$tmpf"
+
+ defer_scope_push
+ ip netns exec "$NS1" bridge monitor vni > "$tmpf" 2>/dev/null &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 0.5
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "iproute2 'bridge monitor vni' not supported"
+ return "$RET"
+ fi
+
+ "$@"
+ cmd_ret=$?
+ sleep 0.2
+ defer_scope_pop
+
+ NOTIFY_COUNT=$(grep -c "$VXLAN_DEV" "$tmpf")
+ NOTIFY_COUNT=${NOTIFY_COUNT:-0}
+ return "$cmd_ret"
+}
+
+# Adding a brand new VNI should produce a notification.
+test_vni_add_notify()
+{
+ RET=0
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 1000 dev "$VXLAN_DEV"
+ check_err $? "Failed to add VNI"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI add, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 1000 dev "$VXLAN_DEV" 2>/dev/null
+
+ log_test "VNI add sends notification"
+}
+
+# Adding a VNI with a remote should produce a notification.
+test_vni_add_remote_notify()
+{
+ RET=0
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 4000 remote 10.0.0.2 dev "$VXLAN_DEV"
+ check_err $? "Failed to add VNI with remote"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI add with remote, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 4000 dev "$VXLAN_DEV"
+
+ log_test "VNI add with remote sends notification"
+}
+
+# Deleting a VNI should produce a notification.
+test_vni_del_notify()
+{
+ RET=0
+
+ bridge -n "$NS1" vni add vni 2000 dev "$VXLAN_DEV"
+
+ vni_notify_check \
+ bridge -n "$NS1" vni delete vni 2000 dev "$VXLAN_DEV"
+ check_err $? "Failed to delete VNI"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI del, got $NOTIFY_COUNT"
+
+ log_test "VNI delete sends notification"
+}
+
+# Re-adding an existing VNI with the same attributes should not produce
+# a notification.
+test_vni_readd_no_notify()
+{
+ RET=0
+
+ bridge -n "$NS1" vni add vni 3000 dev "$VXLAN_DEV"
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 3000 dev "$VXLAN_DEV"
+ check_err $? "Failed to re-add VNI"
+
+ [ "$NOTIFY_COUNT" -eq 0 ]
+ check_err $? "Expected 0 notifications for VNI re-add, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 3000 dev "$VXLAN_DEV"
+
+ log_test "VNI re-add does not send spurious notification"
+}
+
+# Updating an existing VNI's remote should produce a notification.
+test_vni_update_remote_notify()
+{
+ RET=0
+
+ bridge -n "$NS1" vni add vni 5000 remote 10.0.0.2 dev "$VXLAN_DEV"
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 5000 remote 10.0.0.3 dev "$VXLAN_DEV"
+ check_err $? "Failed to update VNI remote"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI remote update, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 5000 dev "$VXLAN_DEV"
+
+ log_test "VNI remote update sends notification"
+}
+
+# Deleting a non-existent VNI should not produce a notification.
+test_vni_del_nonexistent_no_notify()
+{
+ RET=0
+
+ vni_notify_check \
+ bridge -n "$NS1" vni delete vni 9999 dev "$VXLAN_DEV" 2>/dev/null
+
+ [ "$NOTIFY_COUNT" -eq 0 ]
+ check_err $? "Expected 0 notifications for non-existent VNI del, got $NOTIFY_COUNT"
+
+ log_test "Non-existent VNI delete does not send notification"
+}
+
+trap defer_scopes_cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit "$EXIT_STATUS"