summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS6
-rw-r--r--drivers/bluetooth/btintel_pcie.c13
-rw-r--r--drivers/bluetooth/btintel_pcie.h2
-rw-r--r--drivers/bluetooth/btmtk.c15
-rw-r--r--drivers/bluetooth/hci_ath.c3
-rw-r--r--drivers/bluetooth/hci_bcsp.c3
-rw-r--r--drivers/bluetooth/hci_h4.c3
-rw-r--r--drivers/bluetooth/hci_h5.c3
-rw-r--r--drivers/bluetooth/virtio_bt.c39
-rw-r--r--drivers/net/dsa/mt7530.c75
-rw-r--r--drivers/net/dsa/mt7530.h8
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c6
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c16
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c29
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c10
-rw-r--r--drivers/net/ethernet/cortina/gemini.c5
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h1
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_vf.c42
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c33
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c374
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h24
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c231
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c114
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h2
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_netdev.c3
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.h10
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_port.c3
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c40
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c10
-rw-r--r--drivers/net/ethernet/microsoft/mana/shm_channel.c5
-rw-r--r--drivers/net/ethernet/renesas/rtsn.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c7
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_vf_common.c4
-rw-r--r--drivers/net/netdevsim/netdev.c3
-rw-r--r--drivers/net/netdevsim/netdevsim.h4
-rw-r--r--drivers/net/netdevsim/psp.c65
-rw-r--r--drivers/net/ovpn/io.c7
-rw-r--r--drivers/net/phy/bcm-phy-lib.c9
-rw-r--r--drivers/net/phy/bcm-phy-lib.h1
-rw-r--r--drivers/net/phy/bcm7xxx.c14
-rw-r--r--drivers/net/phy/broadcom.c5
-rw-r--r--drivers/net/phy/micrel.c15
-rw-r--r--drivers/net/usb/asix_devices.c2
-rw-r--r--drivers/net/usb/cdc_ncm.c8
-rw-r--r--drivers/net/usb/r8152.c1
-rw-r--r--drivers/net/veth.c3
-rw-r--r--drivers/net/wan/fsl_ucc_hdlc.c9
-rw-r--r--drivers/net/wireless/ath/ath10k/Kconfig1
-rw-r--r--drivers/net/wireless/ath/ath12k/core.c77
-rw-r--r--drivers/net/wireless/ath/ath12k/dp_rx.c5
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.c2
-rw-r--r--drivers/net/wireless/ath/ath12k/p2p.c2
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c105
-rw-r--r--drivers/net/wireless/ath/ath5k/base.c3
-rw-r--r--drivers/net/wireless/broadcom/b43/xmit.c3
-rw-r--r--drivers/net/wireless/broadcom/b43legacy/xmit.c3
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c6
-rw-r--r--drivers/net/wireless/marvell/libertas/if_usb.c6
-rw-r--r--drivers/net/wireless/rsi/rsi_common.h5
-rw-r--r--drivers/net/wireless/st/cw1200/pm.c2
-rw-r--r--drivers/net/wwan/t7xx/t7xx_modem_ops.c20
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c18
-rw-r--r--drivers/net/wwan/t7xx/t7xx_port_proxy.h2
-rw-r--r--include/linux/netfilter/x_tables.h8
-rw-r--r--include/linux/sched/isolation.h6
-rw-r--r--include/net/bluetooth/hci_core.h2
-rw-r--r--include/net/dropreason-core.h6
-rw-r--r--include/net/ip_vs.h31
-rw-r--r--include/net/ipv6.h3
-rw-r--r--include/net/mana/shm_channel.h6
-rw-r--r--include/net/netfilter/nf_dup_netdev.h13
-rw-r--r--include/net/netfilter/nf_flow_table.h4
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--net/bluetooth/bnep/core.c13
-rw-r--r--net/bluetooth/hci_conn.c124
-rw-r--r--net/bluetooth/hci_event.c29
-rw-r--r--net/bluetooth/hidp/core.c27
-rw-r--r--net/bluetooth/iso.c56
-rw-r--r--net/bluetooth/l2cap_core.c14
-rw-r--r--net/bluetooth/l2cap_sock.c9
-rw-r--r--net/bluetooth/rfcomm/core.c7
-rw-r--r--net/bluetooth/sco.c60
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/rtnetlink.c1
-rw-r--r--net/ipv4/ah4.c14
-rw-r--r--net/ipv4/esp4.c3
-rw-r--r--net/ipv4/igmp.c58
-rw-r--r--net/ipv4/inetpeer.c3
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ipmr.c10
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c3
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv6/Kconfig4
-rw-r--r--net/ipv6/ah6.c14
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/exthdrs_core.c7
-rw-r--r--net/ipv6/ip6_gre.c5
-rw-r--r--net/ipv6/ip6_input.c5
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c5
-rw-r--r--net/ipv6/route.c5
-rw-r--r--net/ipv6/tcp_ipv6.c17
-rw-r--r--net/ipv6/xfrm6_protocol.c4
-rw-r--r--net/mac80211/mlme.c18
-rw-r--r--net/mac80211/rx.c6
-rw-r--r--net/mac80211/tests/chan-mode.c1
-rw-r--r--net/mac80211/util.c4
-rw-r--r--net/mctp/test/route-test.c2
-rw-r--r--net/mctp/test/utils.c2
-rw-r--r--net/mptcp/fastopen.c4
-rw-r--r--net/mptcp/pm.c62
-rw-r--r--net/mptcp/pm_kernel.c13
-rw-r--r--net/mptcp/sockopt.c4
-rw-r--r--net/mptcp/subflow.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c76
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c164
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c83
-rw-r--r--net/netfilter/nf_dup_netdev.c16
-rw-r--r--net/netfilter/nf_flow_table_core.c1
-rw-r--r--net/netfilter/nf_flow_table_ip.c151
-rw-r--r--net/netfilter/nf_flow_table_path.c7
-rw-r--r--net/netfilter/nf_tables_api.c35
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nft_compat.c45
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/nft_fwd_netdev.c29
-rw-r--r--net/netfilter/nft_osf.c2
-rw-r--r--net/netfilter/nft_tproxy.c8
-rw-r--r--net/netfilter/x_tables.c79
-rw-r--r--net/netfilter/xt_CT.c8
-rw-r--r--net/netfilter/xt_TCPMSS.c33
-rw-r--r--net/netfilter/xt_TPROXY.c11
-rw-r--r--net/netfilter/xt_addrtype.c25
-rw-r--r--net/netfilter/xt_devgroup.c18
-rw-r--r--net/netfilter/xt_ecn.c4
-rw-r--r--net/netfilter/xt_hashlimit.c4
-rw-r--r--net/netfilter/xt_osf.c3
-rw-r--r--net/netfilter/xt_physdev.c20
-rw-r--r--net/netfilter/xt_policy.c24
-rw-r--r--net/netfilter/xt_set.c39
-rw-r--r--net/netfilter/xt_tcpmss.c4
-rw-r--r--net/openvswitch/vport-geneve.c5
-rw-r--r--net/openvswitch/vport-gre.c5
-rw-r--r--net/openvswitch/vport-netdev.c64
-rw-r--r--net/openvswitch/vport-netdev.h2
-rw-r--r--net/openvswitch/vport-vxlan.c5
-rw-r--r--net/psp/psp_main.c42
-rw-r--r--net/rds/message.c20
-rw-r--r--net/sched/sch_cake.c155
-rw-r--r--net/sched/sch_fq_codel.c39
-rw-r--r--net/sched/sch_pie.c14
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfb.c2
-rw-r--r--net/sched/sch_sfq.c48
-rw-r--r--net/smc/af_smc.c8
-rw-r--r--net/tls/tls_sw.c6
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/unix/garbage.c6
-rw-r--r--net/vmw_vsock/virtio_transport_common.c4
-rw-r--r--net/wireless/nl80211.c27
-rw-r--r--net/wireless/pmsr.c2
-rw-r--r--net/xdp/xsk.c115
-rw-r--r--net/xdp/xsk_buff_pool.c3
-rw-r--r--net/xfrm/xfrm_output.c20
-rw-r--r--net/xfrm/xfrm_state.c12
-rw-r--r--net/xfrm/xfrm_user.c1
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/hw/config5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ipsec_vxlan.py204
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py5
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh16
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh20
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh37
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py19
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh4
-rwxr-xr-xtools/testing/selftests/net/tcp_ecmp_failover.sh216
-rw-r--r--tools/testing/selftests/net/tls.c43
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json148
189 files changed, 3479 insertions, 1154 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 37b105a443dd..0650fa014f24 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7077,6 +7077,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/debugobjec
F: include/linux/debugobjects.h
F: lib/debugobjects.c
+DEC LANCE NETWORK DRIVER
+M: "Maciej W. Rozycki" <macro@orcam.me.uk>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/amd/declance.c
+
DECSTATION PLATFORM SUPPORT
M: "Maciej W. Rozycki" <macro@orcam.me.uk>
L: linux-mips@vger.kernel.org
diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index 2f59c0d6f9ec..a3643e67b33f 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -289,6 +289,9 @@ static inline void btintel_pcie_dump_debug_registers(struct hci_dev *hdev)
skb_put_data(skb, buf, strlen(buf));
data->boot_stage_cache = reg;
+ if (reg & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING)
+ bt_dev_warn(hdev, "Controller device warning (boot_stage: 0x%8.8x)", reg);
+
reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_IPC_STATUS_REG);
snprintf(buf, sizeof(buf), "ipc status: 0x%8.8x", reg);
skb_put_data(skb, buf, strlen(buf));
@@ -880,8 +883,11 @@ static inline bool btintel_pcie_in_lockdown(struct btintel_pcie_data *data)
static inline bool btintel_pcie_in_error(struct btintel_pcie_data *data)
{
- return (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR) ||
- (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER);
+ if (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING)
+ bt_dev_warn(data->hdev, "Controller device warning (boot_stage: 0x%8.8x)",
+ data->boot_stage_cache);
+
+ return data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER;
}
static void btintel_pcie_msix_gp1_handler(struct btintel_pcie_data *data)
@@ -914,7 +920,8 @@ static void btintel_pcie_msix_gp0_handler(struct btintel_pcie_data *data)
data->img_resp_cache = reg;
if (btintel_pcie_in_error(data)) {
- bt_dev_err(data->hdev, "Controller in error state");
+ bt_dev_err(data->hdev, "Controller in error state (boot_stage: 0x%8.8x)",
+ data->boot_stage_cache);
btintel_pcie_dump_debug_registers(data->hdev);
return;
}
diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h
index 3c7bb708362d..f922abd1e7d8 100644
--- a/drivers/bluetooth/btintel_pcie.h
+++ b/drivers/bluetooth/btintel_pcie.h
@@ -48,7 +48,7 @@
#define BTINTEL_PCIE_CSR_BOOT_STAGE_OPFW (BIT(2))
#define BTINTEL_PCIE_CSR_BOOT_STAGE_ROM_LOCKDOWN (BIT(10))
#define BTINTEL_PCIE_CSR_BOOT_STAGE_IML_LOCKDOWN (BIT(11))
-#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR (BIT(12))
+#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING (BIT(12))
#define BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER (BIT(13))
#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_HALTED (BIT(14))
#define BTINTEL_PCIE_CSR_BOOT_STAGE_MAC_ACCESS_ON (BIT(16))
diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c
index 6fb6ca274808..f70c1b0f8990 100644
--- a/drivers/bluetooth/btmtk.c
+++ b/drivers/bluetooth/btmtk.c
@@ -695,8 +695,13 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev,
if (data->evt_skb == NULL)
goto err_free_wc;
- /* Parse and handle the return WMT event */
- wmt_evt = (struct btmtk_hci_wmt_evt *)data->evt_skb->data;
+ wmt_evt = skb_pull_data(data->evt_skb, sizeof(*wmt_evt));
+ if (!wmt_evt) {
+ bt_dev_err(hdev, "WMT event too short (%u bytes)",
+ data->evt_skb->len);
+ err = -EINVAL;
+ goto err_free_skb;
+ }
if (wmt_evt->whdr.op != hdr->op) {
bt_dev_err(hdev, "Wrong op received %d expected %d",
wmt_evt->whdr.op, hdr->op);
@@ -712,6 +717,12 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev,
status = BTMTK_WMT_PATCH_DONE;
break;
case BTMTK_WMT_FUNC_CTRL:
+ if (!skb_pull_data(data->evt_skb,
+ sizeof(wmt_evt_funcc->status))) {
+ err = -EINVAL;
+ goto err_free_skb;
+ }
+
wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt;
if (be16_to_cpu(wmt_evt_funcc->status) == 0x404)
status = BTMTK_WMT_ON_DONE;
diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
index fa679ad0acdf..8201fa7f61e8 100644
--- a/drivers/bluetooth/hci_ath.c
+++ b/drivers/bluetooth/hci_ath.c
@@ -191,6 +191,9 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count)
{
struct ath_struct *ath = hu->priv;
+ if (!ath)
+ return -ENODEV;
+
ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count,
ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts));
if (IS_ERR(ath->rx_skb)) {
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index b386f91d8b46..db56eead27ce 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -585,6 +585,9 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count)
if (!test_bit(HCI_UART_REGISTERED, &hu->flags))
return -EUNATCH;
+ if (!bcsp)
+ return -ENODEV;
+
BT_DBG("hu %p count %d rx_state %d rx_count %ld",
hu, count, bcsp->rx_state, bcsp->rx_count);
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index a889a66a326f..767372707498 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -109,6 +109,9 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count)
{
struct h4_struct *h4 = hu->priv;
+ if (!h4)
+ return -ENODEV;
+
h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count,
h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts));
if (IS_ERR(h4->rx_skb)) {
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index cfdf75dc2847..d35383718212 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -587,6 +587,9 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count)
struct h5 *h5 = hu->priv;
const unsigned char *ptr = data;
+ if (!h5)
+ return -ENODEV;
+
BT_DBG("%s pending %zu count %d", hu->hdev->name, h5->rx_pending,
count);
diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c
index 76d61af8a275..140ab55c9fc5 100644
--- a/drivers/bluetooth/virtio_bt.c
+++ b/drivers/bluetooth/virtio_bt.c
@@ -12,6 +12,7 @@
#include <net/bluetooth/hci_core.h>
#define VERSION "0.1"
+#define VIRTBT_RX_BUF_SIZE 1000
enum {
VIRTBT_VQ_TX,
@@ -33,11 +34,11 @@ static int virtbt_add_inbuf(struct virtio_bluetooth *vbt)
struct sk_buff *skb;
int err;
- skb = alloc_skb(1000, GFP_KERNEL);
+ skb = alloc_skb(VIRTBT_RX_BUF_SIZE, GFP_KERNEL);
if (!skb)
return -ENOMEM;
- sg_init_one(sg, skb->data, 1000);
+ sg_init_one(sg, skb->data, VIRTBT_RX_BUF_SIZE);
err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL);
if (err < 0) {
@@ -197,6 +198,7 @@ static int virtbt_shutdown_generic(struct hci_dev *hdev)
static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb)
{
+ size_t min_hdr;
__u8 pkt_type;
pkt_type = *((__u8 *) skb->data);
@@ -204,16 +206,32 @@ static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb)
switch (pkt_type) {
case HCI_EVENT_PKT:
+ min_hdr = sizeof(struct hci_event_hdr);
+ break;
case HCI_ACLDATA_PKT:
+ min_hdr = sizeof(struct hci_acl_hdr);
+ break;
case HCI_SCODATA_PKT:
+ min_hdr = sizeof(struct hci_sco_hdr);
+ break;
case HCI_ISODATA_PKT:
- hci_skb_pkt_type(skb) = pkt_type;
- hci_recv_frame(vbt->hdev, skb);
+ min_hdr = sizeof(struct hci_iso_hdr);
break;
default:
kfree_skb(skb);
- break;
+ return;
+ }
+
+ if (skb->len < min_hdr) {
+ bt_dev_err_ratelimited(vbt->hdev,
+ "rx pkt_type 0x%02x payload %u < hdr %zu\n",
+ pkt_type, skb->len, min_hdr);
+ kfree_skb(skb);
+ return;
}
+
+ hci_skb_pkt_type(skb) = pkt_type;
+ hci_recv_frame(vbt->hdev, skb);
}
static void virtbt_rx_work(struct work_struct *work)
@@ -227,8 +245,15 @@ static void virtbt_rx_work(struct work_struct *work)
if (!skb)
return;
- skb_put(skb, len);
- virtbt_rx_handle(vbt, skb);
+ if (!len || len > VIRTBT_RX_BUF_SIZE) {
+ bt_dev_err_ratelimited(vbt->hdev,
+ "rx reply len %u outside [1, %u]\n",
+ len, VIRTBT_RX_BUF_SIZE);
+ kfree_skb(skb);
+ } else {
+ skb_put(skb, len);
+ virtbt_rx_handle(vbt, skb);
+ }
if (virtbt_add_inbuf(vbt) < 0)
return;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index b9423389c2ef..44d670904ad8 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -25,6 +25,9 @@
#include "mt7530.h"
+#define MT7530_STATS_POLL_INTERVAL (1 * HZ)
+#define MT7530_STATS_RATE_LIMIT (HZ / 10)
+
static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs)
{
return container_of(pcs, struct mt753x_pcs, pcs);
@@ -906,10 +909,9 @@ static void mt7530_get_rmon_stats(struct dsa_switch *ds, int port,
*ranges = mt7530_rmon_ranges;
}
-static void mt7530_get_stats64(struct dsa_switch *ds, int port,
- struct rtnl_link_stats64 *storage)
+static void mt7530_read_port_stats64(struct mt7530_priv *priv, int port,
+ struct rtnl_link_stats64 *storage)
{
- struct mt7530_priv *priv = ds->priv;
uint64_t data;
/* MIB counter doesn't provide a FramesTransmittedOK but instead
@@ -951,6 +953,54 @@ static void mt7530_get_stats64(struct dsa_switch *ds, int port,
&storage->rx_crc_errors);
}
+static void mt7530_stats_refresh(struct mt7530_priv *priv)
+{
+ struct rtnl_link_stats64 stats = {};
+ struct dsa_port *dp;
+ int port;
+
+ dsa_switch_for_each_user_port(dp, priv->ds) {
+ port = dp->index;
+
+ mt7530_read_port_stats64(priv, port, &stats);
+
+ spin_lock_bh(&priv->stats_lock);
+ priv->ports[port].stats = stats;
+ priv->stats_last = jiffies;
+ spin_unlock_bh(&priv->stats_lock);
+ }
+}
+
+static void mt7530_stats_poll(struct work_struct *work)
+{
+ struct mt7530_priv *priv = container_of(work, struct mt7530_priv,
+ stats_work.work);
+
+ mt7530_stats_refresh(priv);
+ schedule_delayed_work(&priv->stats_work,
+ MT7530_STATS_POLL_INTERVAL);
+}
+
+static void mt7530_get_stats64(struct dsa_switch *ds, int port,
+ struct rtnl_link_stats64 *storage)
+{
+ struct mt7530_priv *priv = ds->priv;
+ bool refresh;
+
+ if (priv->bus) {
+ spin_lock_bh(&priv->stats_lock);
+ *storage = priv->ports[port].stats;
+ refresh = time_after(jiffies, priv->stats_last +
+ MT7530_STATS_RATE_LIMIT);
+ spin_unlock_bh(&priv->stats_lock);
+ if (refresh)
+ mod_delayed_work(system_percpu_wq,
+ &priv->stats_work, 0);
+ } else {
+ mt7530_read_port_stats64(priv, port, storage);
+ }
+}
+
static void mt7530_get_eth_ctrl_stats(struct dsa_switch *ds, int port,
struct ethtool_eth_ctrl_stats *ctrl_stats)
{
@@ -3137,9 +3187,24 @@ mt753x_setup(struct dsa_switch *ds)
if (ret && priv->irq_domain)
mt7530_free_mdio_irq(priv);
+ if (!ret && priv->bus) {
+ mt7530_stats_refresh(priv);
+ schedule_delayed_work(&priv->stats_work,
+ MT7530_STATS_POLL_INTERVAL);
+ }
+
return ret;
}
+static void
+mt753x_teardown(struct dsa_switch *ds)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ if (priv->bus)
+ cancel_delayed_work_sync(&priv->stats_work);
+}
+
static int mt753x_set_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_keee *e)
{
@@ -3257,6 +3322,7 @@ static int mt7988_setup(struct dsa_switch *ds)
static const struct dsa_switch_ops mt7530_switch_ops = {
.get_tag_protocol = mtk_get_tag_protocol,
.setup = mt753x_setup,
+ .teardown = mt753x_teardown,
.preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port,
.get_strings = mt7530_get_strings,
.get_ethtool_stats = mt7530_get_ethtool_stats,
@@ -3395,6 +3461,9 @@ mt7530_probe_common(struct mt7530_priv *priv)
priv->ds->ops = &mt7530_switch_ops;
priv->ds->phylink_mac_ops = &mt753x_phylink_mac_ops;
mutex_init(&priv->reg_mutex);
+ spin_lock_init(&priv->stats_lock);
+ INIT_DELAYED_WORK(&priv->stats_work, mt7530_stats_poll);
+
dev_set_drvdata(dev, priv);
return 0;
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 3e0090bed298..dd33b0df3419 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -796,6 +796,7 @@ struct mt7530_fdb {
* @pvid: The VLAN specified is to be considered a PVID at ingress. Any
* untagged frames will be assigned to the related VLAN.
* @sgmii_pcs: Pointer to PCS instance for SerDes ports
+ * @stats: Cached port statistics for MDIO-connected switches
*/
struct mt7530_port {
bool enable;
@@ -803,6 +804,7 @@ struct mt7530_port {
u32 pm;
u16 pvid;
struct phylink_pcs *sgmii_pcs;
+ struct rtnl_link_stats64 stats;
};
/* Port 5 mode definitions of the MT7530 switch */
@@ -875,6 +877,9 @@ struct mt753x_info {
* @create_sgmii: Pointer to function creating SGMII PCS instance(s)
* @active_cpu_ports: Holding the active CPU ports
* @mdiodev: The pointer to the MDIO device structure
+ * @stats_lock: Protects cached per-port stats from concurrent access
+ * @stats_work: Delayed work for polling MIB counters on MDIO switches
+ * @stats_last: Jiffies timestamp of last MIB counter poll
*/
struct mt7530_priv {
struct device *dev;
@@ -900,6 +905,9 @@ struct mt7530_priv {
int (*create_sgmii)(struct mt7530_priv *priv);
u8 active_cpu_ports;
struct mdio_device *mdiodev;
+ spinlock_t stats_lock; /* protects cached stats counters */
+ struct delayed_work stats_work;
+ unsigned long stats_last;
};
struct mt7530_hw_vlan_entry {
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index f8b3d53bccad..d0c0c0ec8a80 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -2120,14 +2120,12 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
error_unmap:
- while (!list_empty(&tx_list)) {
- e = list_first_entry(&tx_list, struct airoha_queue_entry,
- list);
+ list_for_each_entry(e, &tx_list, list) {
dma_unmap_single(dev->dev.parent, e->dma_addr, e->dma_len,
DMA_TO_DEVICE);
e->dma_addr = 0;
- list_move_tail(&e->list, &q->tx_list);
}
+ list_splice(&tx_list, &q->tx_list);
spin_unlock_bh(&q->lock);
error:
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 60b7e53206d1..3d3b09010d48 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -135,11 +135,11 @@
*/
#define XGBE_TSTAMP_SSINC 20
#define XGBE_TSTAMP_SNSINC 0
-#define XGBE_PTP_ACT_CLK_FREQ 500000000
+#define XGBE_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_TSTAMP_SSINC)
#define XGBE_V2_TSTAMP_SSINC 0xA
#define XGBE_V2_TSTAMP_SNSINC 0
-#define XGBE_V2_PTP_ACT_CLK_FREQ 1000000000
+#define XGBE_V2_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_V2_TSTAMP_SSINC)
/* Define maximum supported values */
#define XGBE_MAX_PPS_OUT 4
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8c55874f44ca..008c34cff7b4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3825,7 +3825,10 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
if (!bp->max_tpa_v2)
return 0;
- bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
+ bp->max_tpa = min_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
+ /* Older P5 FW sets max_tpa_v2 low by mistake except NPAR */
+ if (bp->max_tpa <= 32 && BNXT_CHIP_P5(bp) && !BNXT_NPAR(bp))
+ bp->max_tpa = MAX_TPA_P5;
}
for (i = 0; i < bp->rx_nr_rings; i++) {
@@ -17360,9 +17363,14 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
netdev_info(bp->dev, "PCI Slot Reset\n");
- if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
- test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state))
- msleep(900);
+ if (test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) {
+ /* After DPC, the chip should return CRS when the vendor ID
+ * config register is read until it is ready. On all chips,
+ * this is not happening reliably so add a 5-second delay as a
+ * workaround.
+ */
+ msleep(5000);
+ }
netdev_lock(netdev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index 53f336db4fcc..5d41dc1bc782 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -419,31 +419,13 @@ void bnxt_ptp_reapply_pps(struct bnxt *bp)
}
}
-static int bnxt_get_target_cycles(struct bnxt_ptp_cfg *ptp, u64 target_ns,
- u64 *cycles_delta)
-{
- u64 cycles_now;
- u64 nsec_now, nsec_delta;
- int rc;
-
- rc = bnxt_refclk_read(ptp->bp, NULL, &cycles_now);
- if (rc)
- return rc;
-
- nsec_now = bnxt_timecounter_cyc2time(ptp, cycles_now);
-
- nsec_delta = target_ns - nsec_now;
- *cycles_delta = div64_u64(nsec_delta << ptp->cc.shift, ptp->cc.mult);
- return 0;
-}
-
static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp,
struct ptp_clock_request *rq)
{
struct hwrm_func_ptp_cfg_input *req;
struct bnxt *bp = ptp->bp;
struct timespec64 ts;
- u64 target_ns, delta;
+ u64 target_ns;
u16 enables;
int rc;
@@ -451,10 +433,6 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp,
ts.tv_nsec = rq->perout.start.nsec;
target_ns = timespec64_to_ns(&ts);
- rc = bnxt_get_target_cycles(ptp, target_ns, &delta);
- if (rc)
- return rc;
-
rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
if (rc)
return rc;
@@ -468,7 +446,10 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp,
req->ptp_freq_adj_dll_phase = 0;
req->ptp_freq_adj_ext_period = cpu_to_le32(NSEC_PER_SEC);
req->ptp_freq_adj_ext_up = 0;
- req->ptp_freq_adj_ext_phase_lower = cpu_to_le32(delta);
+ req->ptp_freq_adj_ext_phase_lower =
+ cpu_to_le32(lower_32_bits(target_ns));
+ req->ptp_freq_adj_ext_phase_upper =
+ cpu_to_le32(upper_32_bits(target_ns));
return hwrm_req_send(bp, req);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 052bf69cfa4c..5c751933da6a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -175,8 +175,14 @@ int bnxt_register_dev(struct bnxt_en_dev *edev,
ulp->handle = handle;
rcu_assign_pointer(ulp->ulp_ops, ulp_ops);
- if (test_bit(BNXT_STATE_OPEN, &bp->state))
- bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]);
+ if (test_bit(BNXT_STATE_OPEN, &bp->state)) {
+ rc = bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]);
+ if (rc) {
+ netdev_err(dev, "Failed to configure dual VNIC mode\n");
+ RCU_INIT_POINTER(ulp->ulp_ops, NULL);
+ goto exit;
+ }
+ }
edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 4824232f4890..065cbbf52686 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -1491,6 +1491,11 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget)
gpage = gmac_get_queue_page(geth, port, mapping + PAGE_SIZE);
if (!gpage) {
dev_err(geth->dev, "could not find mapping\n");
+ if (skb) {
+ napi_free_frags(&port->napi);
+ port->stats.rx_dropped++;
+ skb = NULL;
+ }
continue;
}
page = gpage->page;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index e663bb5e614e..e691144e8756 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -330,6 +330,7 @@ struct enetc_si {
struct workqueue_struct *workqueue;
struct work_struct rx_mode_task;
struct dentry *debugfs_root;
+ struct enetc_msg_swbd msg; /* Only valid for VSI */
};
#define ENETC_SI_ALIGN 32
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 6c4b374bcb0e..df8e95cc47d0 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -17,11 +17,36 @@ static void enetc_msg_vsi_write_msg(struct enetc_hw *hw,
enetc_wr(hw, ENETC_VSIMSGSNDAR0, val);
}
+static void enetc_msg_dma_free(struct device *dev, struct enetc_msg_swbd *msg)
+{
+ if (msg->vaddr) {
+ dma_free_coherent(dev, msg->size, msg->vaddr, msg->dma);
+ msg->vaddr = NULL;
+ }
+}
+
static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg)
{
+ struct device *dev = &si->pdev->dev;
int timeout = 100;
u32 vsimsgsr;
+ /* The VSI mailbox may be busy if last message was not yet processed
+ * by PSI. So need to check the mailbox status before sending.
+ */
+ vsimsgsr = enetc_rd(&si->hw, ENETC_VSIMSGSR);
+ if (vsimsgsr & ENETC_VSIMSGSR_MB) {
+ /* It is safe to free the DMA buffer here, the caller does
+ * not access the DMA buffer if enetc_msg_vsi_send() fails.
+ */
+ enetc_msg_dma_free(dev, msg);
+ dev_err(dev, "VSI mailbox is busy\n");
+ return -EIO;
+ }
+
+ /* Free the DMA buffer of the last message */
+ enetc_msg_dma_free(dev, &si->msg);
+ si->msg = *msg;
enetc_msg_vsi_write_msg(&si->hw, msg);
do {
@@ -32,12 +57,15 @@ static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg)
usleep_range(1000, 2000);
} while (--timeout);
- if (!timeout)
+ if (!timeout) {
+ dev_err(dev, "VSI mailbox timeout\n");
+
return -ETIMEDOUT;
+ }
/* check for message delivery error */
if (vsimsgsr & ENETC_VSIMSGSR_MS) {
- dev_err(&si->pdev->dev, "VSI command execute error: %d\n",
+ dev_err(dev, "VSI command execute error: %d\n",
ENETC_SIMSGSR_GET_MC(vsimsgsr));
return -EIO;
}
@@ -50,7 +78,6 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv,
{
struct enetc_msg_cmd_set_primary_mac *cmd;
struct enetc_msg_swbd msg;
- int err;
msg.size = ALIGN(sizeof(struct enetc_msg_cmd_set_primary_mac), 64);
msg.vaddr = dma_alloc_coherent(priv->dev, msg.size, &msg.dma,
@@ -67,11 +94,7 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv,
memcpy(&cmd->mac, saddr, sizeof(struct sockaddr));
/* send the command and wait */
- err = enetc_msg_vsi_send(priv->si, &msg);
-
- dma_free_coherent(priv->dev, msg.size, msg.vaddr, msg.dma);
-
- return err;
+ return enetc_msg_vsi_send(priv->si, &msg);
}
static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr)
@@ -259,6 +282,7 @@ static void enetc_vf_remove(struct pci_dev *pdev)
{
struct enetc_si *si = pci_get_drvdata(pdev);
struct enetc_ndev_priv *priv;
+ struct enetc_msg_swbd msg;
priv = netdev_priv(si->ndev);
unregister_netdev(si->ndev);
@@ -270,7 +294,9 @@ static void enetc_vf_remove(struct pci_dev *pdev)
free_netdev(si->ndev);
+ msg = si->msg;
enetc_pci_remove(pdev);
+ enetc_msg_dma_free(&pdev->dev, &msg);
}
static const struct pci_device_id enetc_vf_id_table[] = {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c
index 3debf2fae1a4..6f13296303cb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c
@@ -249,34 +249,21 @@ DEFINE_SHOW_ATTRIBUTE(npc_defrag);
int npc_cn20k_debugfs_init(struct rvu *rvu)
{
struct npc_priv_t *npc_priv = npc_priv_get();
- struct dentry *npc_dentry;
- npc_dentry = debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc,
- npc_priv, &npc_mcam_layout_fops);
+ debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc,
+ npc_priv, &npc_mcam_layout_fops);
- if (!npc_dentry)
- return -EFAULT;
+ debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc,
+ rvu, &npc_mcam_default_fops);
- npc_dentry = debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc,
- rvu, &npc_mcam_default_fops);
+ debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc,
+ npc_priv, &npc_vidx2idx_map_fops);
- if (!npc_dentry)
- return -EFAULT;
+ debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc,
+ npc_priv, &npc_idx2vidx_map_fops);
- npc_dentry = debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc,
- npc_priv, &npc_vidx2idx_map_fops);
- if (!npc_dentry)
- return -EFAULT;
-
- npc_dentry = debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc,
- npc_priv, &npc_idx2vidx_map_fops);
- if (!npc_dentry)
- return -EFAULT;
-
- npc_dentry = debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc,
- npc_priv, &npc_defrag_fops);
- if (!npc_dentry)
- return -EFAULT;
+ debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc,
+ npc_priv, &npc_defrag_fops);
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
index 7291fdb89b03..6b3f453fd500 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
@@ -798,7 +798,7 @@ program_mkex_extr:
iounmap(mkex_prfl_addr);
}
-void
+int
npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr,
int index, bool enable)
{
@@ -808,7 +808,12 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr,
u64 cfg, hw_prio;
u8 kw_type;
- npc_mcam_idx_2_key_type(rvu, index, &kw_type);
+ if (index < 0 || index >= mcam->total_entries)
+ return -EINVAL;
+
+ if (npc_mcam_idx_2_key_type(rvu, index, &kw_type))
+ return -EINVAL;
+
if (kw_type == NPC_MCAM_KEY_X2) {
cfg = rvu_read64(rvu, blkaddr,
NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx,
@@ -819,7 +824,7 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr,
rvu_write64(rvu, blkaddr,
NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank),
cfg);
- return;
+ return 0;
}
/* For NPC_CN20K_MCAM_KEY_X4 keys, both the banks
@@ -836,10 +841,12 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr,
NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank),
cfg);
}
+
+ return 0;
}
-void
-npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index)
+static void
+npc_clear_x2_entry(struct rvu *rvu, int blkaddr, int bank, int index)
{
rvu_write64(rvu, blkaddr,
NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1),
@@ -873,6 +880,33 @@ npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index)
NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, bank), 0);
}
+int
+npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int mcam_idx)
+{
+ struct npc_mcam *mcam = &rvu->hw->mcam;
+ int bank = npc_get_bank(mcam, mcam_idx);
+ u8 kw_type;
+ int index;
+
+ if (npc_mcam_idx_2_key_type(rvu, mcam_idx, &kw_type))
+ return -EINVAL;
+
+ index = mcam_idx & (mcam->banksize - 1);
+
+ if (kw_type == NPC_MCAM_KEY_X2) {
+ npc_clear_x2_entry(rvu, blkaddr, bank, index);
+ return 0;
+ }
+
+ /* For NPC_MCAM_KEY_X4 keys, both the banks
+ * need to be programmed with the same value.
+ */
+ for (bank = 0; bank < mcam->banks_per_entry; bank++)
+ npc_clear_x2_entry(rvu, blkaddr, bank, index);
+
+ return 0;
+}
+
static void npc_cn20k_get_keyword(struct cn20k_mcam_entry *entry, int idx,
u64 *cam0, u64 *cam1)
{
@@ -1014,48 +1048,27 @@ static void npc_cn20k_config_kw_x4(struct rvu *rvu, struct npc_mcam *mcam,
kw, req_kw_type);
}
-static void
-npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx,
- int bank, u8 kw_type, bool enable, u8 hw_prio)
-{
- struct npc_mcam *mcam = &rvu->hw->mcam;
- u64 bank_cfg;
-
- bank_cfg = (u64)hw_prio << 24;
- if (enable)
- bank_cfg |= 0x1;
-
- if (kw_type == NPC_MCAM_KEY_X2) {
- rvu_write64(rvu, blkaddr,
- NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank),
- bank_cfg);
- return;
- }
-
- /* For NPC_MCAM_KEY_X4 keys, both the banks
- * need to be programmed with the same value.
- */
- for (bank = 0; bank < mcam->banks_per_entry; bank++) {
- rvu_write64(rvu, blkaddr,
- NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank),
- bank_cfg);
- }
-}
-
-void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index,
- u8 intf, struct cn20k_mcam_entry *entry,
- bool enable, u8 hw_prio, u8 req_kw_type)
+int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index,
+ u8 intf, struct cn20k_mcam_entry *entry,
+ bool enable, u8 hw_prio, u8 req_kw_type)
{
struct npc_mcam *mcam = &rvu->hw->mcam;
int mcam_idx = index % mcam->banksize;
int bank = index / mcam->banksize;
+ u64 bank_cfg = (u64)hw_prio << 24;
int kw = 0;
u8 kw_type;
+ if (index < 0 || index >= mcam->total_entries)
+ return -EINVAL;
+
+ if (npc_mcam_idx_2_key_type(rvu, index, &kw_type))
+ return -EINVAL;
+
/* Disable before mcam entry update */
- npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false);
+ if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false))
+ return -EINVAL;
- npc_mcam_idx_2_key_type(rvu, index, &kw_type);
/* CAM1 takes the comparison value and
* CAM0 specifies match for a bit in key being '0' or '1' or 'dontcare'.
* CAM1<n> = 0 & CAM0<n> = 1 => match if key<n> = 0
@@ -1064,7 +1077,7 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index,
*/
if (kw_type == NPC_MCAM_KEY_X2) {
/* Clear mcam entry to avoid writes being suppressed by NPC */
- npc_cn20k_clear_mcam_entry(rvu, blkaddr, bank, mcam_idx);
+ npc_clear_x2_entry(rvu, blkaddr, bank, mcam_idx);
npc_cn20k_config_kw_x2(rvu, mcam, blkaddr,
mcam_idx, intf, entry,
bank, kw_type, kw, req_kw_type);
@@ -1085,44 +1098,55 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index,
NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx,
bank, 1),
entry->vtag_action);
- goto set_cfg;
- }
-
- /* Clear mcam entry to avoid writes being suppressed by NPC */
- npc_cn20k_clear_mcam_entry(rvu, blkaddr, 0, mcam_idx);
- npc_cn20k_clear_mcam_entry(rvu, blkaddr, 1, mcam_idx);
- npc_cn20k_config_kw_x4(rvu, mcam, blkaddr,
- mcam_idx, intf, entry,
- kw_type, req_kw_type);
- for (bank = 0; bank < mcam->banks_per_entry; bank++) {
- /* Set 'action' */
+ /* Set HW priority */
rvu_write64(rvu, blkaddr,
- NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx,
- bank, 0),
- entry->action);
+ NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank),
+ bank_cfg);
- /* Set TAG 'action' */
- rvu_write64(rvu, blkaddr,
- NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx,
- bank, 1),
- entry->vtag_action);
+ } else {
+ /* Clear mcam entry to avoid writes being suppressed by NPC */
+ npc_clear_x2_entry(rvu, blkaddr, 0, mcam_idx);
+ npc_clear_x2_entry(rvu, blkaddr, 1, mcam_idx);
- /* Set 'action2' for inline receive */
- rvu_write64(rvu, blkaddr,
- NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx,
- bank, 2),
- entry->action2);
+ npc_cn20k_config_kw_x4(rvu, mcam, blkaddr,
+ mcam_idx, intf, entry,
+ kw_type, req_kw_type);
+ for (bank = 0; bank < mcam->banks_per_entry; bank++) {
+ /* Set 'action' */
+ rvu_write64(rvu, blkaddr,
+ NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx,
+ bank, 0),
+ entry->action);
+
+ /* Set TAG 'action' */
+ rvu_write64(rvu, blkaddr,
+ NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx,
+ bank, 1),
+ entry->vtag_action);
+
+ /* Set 'action2' for inline receive */
+ rvu_write64(rvu, blkaddr,
+ NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx,
+ bank, 2),
+ entry->action2);
+
+ /* Set HW priority */
+ rvu_write64(rvu, blkaddr,
+ NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank),
+ bank_cfg);
+ }
}
-set_cfg:
/* TODO: */
/* PF installing VF rule */
- npc_cn20k_set_mcam_bank_cfg(rvu, blkaddr, mcam_idx, bank,
- kw_type, enable, hw_prio);
+ if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable))
+ return -EINVAL;
+
+ return 0;
}
-void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest)
+int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest)
{
struct npc_mcam *mcam = &rvu->hw->mcam;
u64 cfg, sreg, dreg, soff, doff;
@@ -1130,12 +1154,20 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest)
int bank, i, sb, db;
int dbank, sbank;
+ if (src >= mcam->total_entries || dest >= mcam->total_entries)
+ return -EINVAL;
+
dbank = npc_get_bank(mcam, dest);
sbank = npc_get_bank(mcam, src);
- npc_mcam_idx_2_key_type(rvu, src, &src_kwtype);
- npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype);
+
+ if (npc_mcam_idx_2_key_type(rvu, src, &src_kwtype))
+ return -EINVAL;
+
+ if (npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype))
+ return -EINVAL;
+
if (src_kwtype != dest_kwtype)
- return;
+ return -EINVAL;
src &= (mcam->banksize - 1);
dest &= (mcam->banksize - 1);
@@ -1170,6 +1202,8 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest)
if (src_kwtype == NPC_MCAM_KEY_X2)
break;
}
+
+ return 0;
}
static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx,
@@ -1179,21 +1213,37 @@ static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx,
entry->kw_mask[idx] = cam1 ^ cam0;
}
-void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index,
- struct cn20k_mcam_entry *entry,
- u8 *intf, u8 *ena, u8 *hw_prio)
+int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index,
+ struct cn20k_mcam_entry *entry,
+ u8 *intf, u8 *ena, u8 *hw_prio)
{
struct npc_mcam *mcam = &rvu->hw->mcam;
u64 cam0, cam1, bank_cfg, cfg;
int kw = 0, bank;
u8 kw_type;
- npc_mcam_idx_2_key_type(rvu, index, &kw_type);
+ if (index >= mcam->total_entries)
+ return -EINVAL;
+
+ if (npc_mcam_idx_2_key_type(rvu, index, &kw_type))
+ return -EINVAL;
bank = npc_get_bank(mcam, index);
index &= (mcam->banksize - 1);
cfg = rvu_read64(rvu, blkaddr,
+ NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0));
+ entry->action = cfg;
+
+ cfg = rvu_read64(rvu, blkaddr,
+ NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 1));
+ entry->vtag_action = cfg;
+
+ cfg = rvu_read64(rvu, blkaddr,
+ NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 2));
+ entry->action2 = cfg;
+
+ cfg = rvu_read64(rvu, blkaddr,
NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index,
bank, 1)) & 3;
*intf = cfg;
@@ -1242,7 +1292,7 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index,
bank,
0));
npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1);
- goto read_action;
+ return 0;
}
for (bank = 0; bank < mcam->banks_per_entry; bank++, kw = kw + 4) {
@@ -1287,17 +1337,7 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index,
npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1);
}
-read_action:
- /* 'action' is set to same value for both bank '0' and '1'.
- * Hence, reading bank '0' should be enough.
- */
- cfg = rvu_read64(rvu, blkaddr,
- NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 0));
- entry->action = cfg;
-
- cfg = rvu_read64(rvu, blkaddr,
- NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 1));
- entry->vtag_action = cfg;
+ return 0;
}
int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu,
@@ -1335,11 +1375,10 @@ int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu,
if (is_pffunc_af(req->hdr.pcifunc))
nix_intf = req->intf;
- npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf,
- &req->entry_data, req->enable_entry,
- req->hw_prio, req->req_kw_type);
+ rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf,
+ &req->entry_data, req->enable_entry,
+ req->hw_prio, req->req_kw_type);
- rc = 0;
exit:
mutex_unlock(&mcam->lock);
return rc;
@@ -1361,11 +1400,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_read_entry(struct rvu *rvu,
mutex_lock(&mcam->lock);
rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry);
- if (!rc)
- npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry,
- &rsp->entry_data, &rsp->intf,
- &rsp->enable, &rsp->hw_prio);
+ if (rc)
+ goto fail;
+ rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry,
+ &rsp->entry_data, &rsp->intf,
+ &rsp->enable, &rsp->hw_prio);
+fail:
mutex_unlock(&mcam->lock);
return rc;
}
@@ -1375,11 +1416,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu,
struct npc_mcam_alloc_and_write_entry_rsp *rsp)
{
struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+ struct npc_mcam_free_entry_req free_req = { 0 };
struct npc_mcam_alloc_entry_req entry_req;
struct npc_mcam_alloc_entry_rsp entry_rsp;
struct npc_mcam *mcam = &rvu->hw->mcam;
u16 entry = NPC_MCAM_ENTRY_INVALID;
- int blkaddr, rc;
+ struct msg_rsp free_rsp;
+ int blkaddr, rc, err;
u8 nix_intf;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -1415,12 +1458,23 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu,
else
nix_intf = pfvf->nix_rx_intf;
- npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf,
- &req->entry_data, req->enable_entry,
- req->hw_prio, req->req_kw_type);
+ rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf,
+ &req->entry_data, req->enable_entry,
+ req->hw_prio, req->req_kw_type);
mutex_unlock(&mcam->lock);
+ if (rc) {
+ free_req.hdr.pcifunc = req->hdr.pcifunc;
+ free_req.entry = entry_rsp.entry;
+ err = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &free_rsp);
+ if (err)
+ dev_err(rvu->dev,
+ "%s: Error to free mcam idx %u\n",
+ __func__, entry_rsp.entry);
+ return rc;
+ }
+
rsp->entry = entry_rsp.entry;
return 0;
}
@@ -1480,9 +1534,9 @@ int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu,
read_entry:
/* Read the mcam entry */
- npc_cn20k_read_mcam_entry(rvu, blkaddr, index,
- &rsp->entry, &intf,
- &enable, &hw_prio);
+ rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, index,
+ &rsp->entry, &intf,
+ &enable, &hw_prio);
mutex_unlock(&mcam->lock);
out:
return rc;
@@ -2305,6 +2359,7 @@ err2:
__npc_subbank_mark_free(rvu, sb);
err1:
kfree(save);
+ *alloc_cnt = 0;
return rc;
}
@@ -3482,7 +3537,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu,
{
int alloc_cnt1, alloc_cnt2;
struct npc_subbank *sb;
- int rc, sb_off, i;
+ int rc, sb_off, i, err;
bool deleted;
sb = &npc_priv.sb[f->idx];
@@ -3496,6 +3551,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu,
NPC_MCAM_LOWER_PRIO,
false, cnt, save, cnt, true,
&alloc_cnt1);
+
if (alloc_cnt1 < cnt) {
rc = __npc_subbank_alloc(rvu, sb,
NPC_MCAM_KEY_X2, sb->b1b,
@@ -3511,15 +3567,17 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu,
dev_err(rvu->dev,
"%s: Failed to alloc cnt=%u alloc_cnt1=%u alloc_cnt2=%u\n",
__func__, cnt, alloc_cnt1, alloc_cnt2);
+ rc = -ENOSPC;
goto fail_free_alloc;
}
+
return 0;
fail_free_alloc:
for (i = 0; i < alloc_cnt1 + alloc_cnt2; i++) {
- rc = npc_mcam_idx_2_subbank_idx(rvu, save[i],
- &sb, &sb_off);
- if (rc) {
+ err = npc_mcam_idx_2_subbank_idx(rvu, save[i],
+ &sb, &sb_off);
+ if (err) {
dev_err(rvu->dev,
"%s: Error to find subbank for mcam idx=%u\n",
__func__, save[i]);
@@ -3565,9 +3623,10 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu,
struct npc_defrag_node *v,
int cnt, u16 *save)
{
+ u16 new_midx, old_midx, vidx, target_pf;
struct npc_mcam *mcam = &rvu->hw->mcam;
+ struct rvu_npc_mcam_rule *rule, *tmp;
int i, vidx_cnt, rc, sb_off;
- u16 new_midx, old_midx, vidx;
struct npc_subbank *sb;
bool deleted;
u16 pcifunc;
@@ -3607,9 +3666,30 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu,
NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(midx,
bank));
- npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false);
- npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx);
- npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true);
+ /* If bug happened during copy/enable mcam, then there is a bug in allocation
+ * algorithm itself. There is no point in rewinding and returning, as it
+ * will face further issue. Return error after printing error
+ */
+ if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false)) {
+ dev_err(rvu->dev,
+ "%s: Error happened while disabling old_mid=%u\n",
+ __func__, old_midx);
+ return -EFAULT;
+ }
+
+ if (npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx)) {
+ dev_err(rvu->dev,
+ "%s: Error happened while copying old_midx=%u new_midx=%u\n",
+ __func__, old_midx, new_midx);
+ return -EFAULT;
+ }
+
+ if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true)) {
+ dev_err(rvu->dev,
+ "%s: Error happened while enabling new_mid=%u\n",
+ __func__, new_midx);
+ return -EFAULT;
+ }
midx = new_midx % mcam->banksize;
bank = new_midx / mcam->banksize;
@@ -3665,8 +3745,21 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu,
mcam->entry2pfvf_map[new_midx] = pcifunc;
/* Counter is not preserved */
mcam->entry2cntr_map[new_midx] = new_midx;
+ target_pf = mcam->entry2target_pffunc[old_midx];
+ mcam->entry2target_pffunc[new_midx] = target_pf;
+ mcam->entry2target_pffunc[old_midx] = NPC_MCAM_INVALID_MAP;
+
npc_mcam_set_bit(mcam, new_midx);
+ /* Note: list order is not functionally required for mcam_rules */
+ list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) {
+ if (rule->entry != old_midx)
+ continue;
+
+ rule->entry = new_midx;
+ break;
+ }
+
/* Mark as invalid */
v->vidx[vidx_cnt - i - 1] = -1;
save[cnt - i - 1] = -1;
@@ -3935,6 +4028,13 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast,
void *val;
int i, j;
+ for (i = 0; i < ARRAY_SIZE(ptr); i++) {
+ if (!ptr[i])
+ continue;
+
+ *ptr[i] = USHRT_MAX;
+ }
+
if (!npc_priv.init_done)
return 0;
@@ -3950,7 +4050,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast,
npc_dft_rule_name[NPC_DFT_RULE_PROMISC_ID],
pcifunc);
- *ptr[0] = USHRT_MAX;
return -ESRCH;
}
@@ -3970,7 +4069,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast,
npc_dft_rule_name[NPC_DFT_RULE_UCAST_ID],
pcifunc);
- *ptr[3] = USHRT_MAX;
return -ESRCH;
}
@@ -3990,7 +4088,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast,
__func__,
npc_dft_rule_name[i], pcifunc);
- *ptr[j] = USHRT_MAX;
continue;
}
@@ -4085,7 +4182,7 @@ int rvu_mbox_handler_npc_get_dft_rl_idxs(struct rvu *rvu, struct msg_req *req,
return 0;
}
-static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc)
+bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc)
{
return is_pf_cgxmapped(rvu, rvu_get_pf(rvu->pdev, pcifunc)) ||
is_lbk_vf(rvu, pcifunc);
@@ -4093,11 +4190,11 @@ static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc)
void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc)
{
- struct npc_mcam_free_entry_req free_req = { 0 };
+ struct npc_mcam *mcam = &rvu->hw->mcam;
+ u16 ptr[4] = {[0 ... 3] = USHRT_MAX};
+ struct rvu_npc_mcam_rule *rule, *tmp;
unsigned long index;
- struct msg_rsp rsp;
- u16 ptr[4];
- int rc, i;
+ int blkaddr, rc, i;
void *map;
if (!npc_priv.init_done)
@@ -4155,14 +4252,43 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc)
}
free_rules:
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+ if (blkaddr < 0)
+ return;
+ for (int i = 0; i < 4; i++) {
+ if (ptr[i] == USHRT_MAX)
+ continue;
- free_req.hdr.pcifunc = pcifunc;
- free_req.all = 1;
- rc = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp);
- if (rc)
- dev_err(rvu->dev,
- "%s: Error deleting default entries (pcifunc=%#x\n",
- __func__, pcifunc);
+ mutex_lock(&mcam->lock);
+ npc_mcam_clear_bit(mcam, ptr[i]);
+ mcam->entry2pfvf_map[ptr[i]] = NPC_MCAM_INVALID_MAP;
+ npc_cn20k_enable_mcam_entry(rvu, blkaddr, ptr[i], false);
+ mcam->entry2target_pffunc[ptr[i]] = 0x0;
+ mutex_unlock(&mcam->lock);
+
+ rc = npc_cn20k_idx_free(rvu, &ptr[i], 1);
+ if (rc) {
+ /* Non recoverable error. Let us WARN and return. Keep system alive to
+ * enable debugging
+ */
+ WARN(1, "%s Error deleting default entries (pcifunc=%#x) mcam_idx=%u\n",
+ __func__, pcifunc, ptr[i]);
+ return;
+ }
+ }
+
+ mutex_lock(&mcam->lock);
+ list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) {
+ for (int i = 0; i < 4; i++) {
+ if (ptr[i] != rule->entry)
+ continue;
+
+ list_del(&rule->list);
+ kfree(rule);
+ break;
+ }
+ }
+ mutex_unlock(&mcam->lock);
}
int npc_cn20k_dft_rules_alloc(struct rvu *rvu, u16 pcifunc)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h
index 815d0b257a7e..3d5eb952cc07 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h
@@ -320,21 +320,21 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc);
int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast,
u16 *mcast, u16 *promisc, u16 *ucast);
-void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index,
- u8 intf, struct cn20k_mcam_entry *entry,
- bool enable, u8 hw_prio, u8 req_kw_type);
-void npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr,
- int index, bool enable);
-void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr,
- u16 src, u16 dest);
-void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index,
- struct cn20k_mcam_entry *entry, u8 *intf,
- u8 *ena, u8 *hw_prio);
-void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr,
- int bank, int index);
+int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index,
+ u8 intf, struct cn20k_mcam_entry *entry,
+ bool enable, u8 hw_prio, u8 req_kw_type);
+int npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr,
+ int index, bool enable);
+int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr,
+ u16 src, u16 dest);
+int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index,
+ struct cn20k_mcam_entry *entry, u8 *intf,
+ u8 *ena, u8 *hw_prio);
+int npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int index);
int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type);
u16 npc_cn20k_vidx2idx(u16 index);
u16 npc_cn20k_idx2vidx(u16 idx);
int npc_cn20k_defrag(struct rvu *rvu);
+bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc);
#endif /* NPC_CN20K_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index ef5b081162eb..f977734ae712 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3577,6 +3577,9 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
mcam_index = npc_get_nixlf_mcam_index(mcam,
pcifunc & ~RVU_PFVF_FUNC_MASK,
nixlf, type);
+ if (mcam_index < 0)
+ return -EINVAL;
+
err = nix_update_mce_list(rvu, pcifunc, mce_list,
mce_idx, mcam_index, add);
return err;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index c2ca5ed1d028..3c814d157ab9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -163,14 +163,35 @@ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
if (rc)
return -EFAULT;
+ if (is_lbk_vf(rvu, pcifunc)) {
+ if (promisc == USHRT_MAX)
+ return -EINVAL;
+ return promisc;
+ }
+
+ if (is_cgx_vf(rvu, pcifunc)) {
+ if (ucast == USHRT_MAX)
+ return -EINVAL;
+
+ return ucast;
+ }
+
switch (type) {
case NIXLF_BCAST_ENTRY:
+ if (bcast == USHRT_MAX)
+ return -EINVAL;
return bcast;
case NIXLF_ALLMULTI_ENTRY:
+ if (mcast == USHRT_MAX)
+ return -EINVAL;
return mcast;
case NIXLF_PROMISC_ENTRY:
+ if (promisc == USHRT_MAX)
+ return -EINVAL;
return promisc;
case NIXLF_UCAST_ENTRY:
+ if (ucast == USHRT_MAX)
+ return -EINVAL;
return ucast;
default:
return -EINVAL;
@@ -238,10 +259,10 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
int actbank = bank;
if (is_cn20k(rvu->pdev)) {
- if (index < 0 || index >= mcam->banksize * mcam->banks)
- return;
-
- return npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable);
+ if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable))
+ dev_err(rvu->dev, "Error to %s mcam %u entry\n",
+ enable ? "enable" : "disable", index);
+ return;
}
index &= (mcam->banksize - 1);
@@ -258,6 +279,13 @@ static void npc_clear_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
int bank = npc_get_bank(mcam, index);
int actbank = bank;
+ if (is_cn20k(rvu->pdev)) {
+ if (npc_cn20k_clear_mcam_entry(rvu, blkaddr, index))
+ dev_err(rvu->dev, "%s Failed to clear mcam %u\n",
+ __func__, index);
+ return;
+ }
+
index &= (mcam->banksize - 1);
for (; bank < (actbank + mcam->banks_per_entry); bank++) {
rvu_write64(rvu, blkaddr,
@@ -424,6 +452,15 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam,
index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf,
NIXLF_UCAST_ENTRY);
+
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: failed to get ucast entry pcifunc:0x%x\n",
+ __func__, pf_func);
+ /* Action 0 is drop */
+ return 0;
+ }
+
bank = npc_get_bank(mcam, index);
index &= (mcam->banksize - 1);
@@ -589,8 +626,8 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
NPC_AF_MCAMEX_BANKX_CFG(src, sbank)) & 1;
}
-static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
- int blkaddr, u16 src, u16 dest)
+static int npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ int blkaddr, u16 src, u16 dest)
{
int dbank = npc_get_bank(mcam, dest);
int sbank = npc_get_bank(mcam, src);
@@ -630,6 +667,7 @@ static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
NPC_AF_MCAMEX_BANKX_CFG(src, sbank));
rvu_write64(rvu, blkaddr,
NPC_AF_MCAMEX_BANKX_CFG(dest, dbank), cfg);
+ return 0;
}
u64 npc_get_mcam_action(struct rvu *rvu, struct npc_mcam *mcam,
@@ -689,6 +727,12 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_UCAST_ENTRY);
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get ucast entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
/* Don't change the action if entry is already enabled
* Otherwise RSS action may get overwritten.
@@ -744,16 +788,38 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_PROMISC_ENTRY);
+ /* In cn20k, default indexes are installed only for CGX mapped
+ * and lbk interfaces
+ */
if (is_cgx_vf(rvu, pcifunc))
index = npc_get_nixlf_mcam_index(mcam,
pcifunc & ~RVU_PFVF_FUNC_MASK,
nixlf, NIXLF_PROMISC_ENTRY);
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get promisc entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
+
/* If the corresponding PF's ucast action is RSS,
* use the same action for promisc also
+ * Please note that for lbk(s) "index" and "ucast_idx"
+ * will be same.
*/
- ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
- nixlf, NIXLF_UCAST_ENTRY);
+ if (is_lbk_vf(rvu, pcifunc))
+ ucast_idx = index;
+ else
+ ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
+ nixlf, NIXLF_UCAST_ENTRY);
+ if (ucast_idx < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get ucast/promisc entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
+
if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
blkaddr, ucast_idx);
@@ -827,6 +893,14 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc,
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_PROMISC_ENTRY);
+
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get promisc entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
+
npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
}
@@ -867,6 +941,12 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_BCAST_ENTRY);
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get bcast entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
if (!hw->cap.nix_rx_multicast) {
/* Early silicon doesn't support pkt replication,
@@ -931,12 +1011,25 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_ALLMULTI_ENTRY);
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get mcast entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
/* If the corresponding PF's ucast action is RSS,
* use the same action for multicast entry also
*/
ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_UCAST_ENTRY);
+ if (ucast_idx < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get ucast entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
+
if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
blkaddr, ucast_idx);
@@ -1001,6 +1094,13 @@ void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf,
NIXLF_ALLMULTI_ENTRY);
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get mcast entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
+
npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
}
@@ -1113,8 +1213,12 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
index = mcam_index;
}
- if (index >= mcam->total_entries)
+ if (index < 0 || index >= mcam->total_entries) {
+ dev_err(rvu->dev,
+ "%s: Invalid mcam index, pcifunc=%#x\n",
+ __func__, pcifunc);
return;
+ }
bank = npc_get_bank(mcam, index);
index &= (mcam->banksize - 1);
@@ -1158,16 +1262,18 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
/* If PF's promiscuous entry is enabled,
* Set RSS action for that entry as well
*/
- npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index,
- blkaddr, alg_idx);
+ if (index >= 0)
+ npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index,
+ blkaddr, alg_idx);
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_ALLMULTI_ENTRY);
/* If PF's allmulti entry is enabled,
* Set RSS action for that entry as well
*/
- npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index,
- blkaddr, alg_idx);
+ if (index >= 0)
+ npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index,
+ blkaddr, alg_idx);
}
}
@@ -1180,12 +1286,22 @@ void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc,
int index, blkaddr, mce_idx;
struct rvu_pfvf *pfvf;
+ /* multicast pkt replication is not enabled for AF's VFs & SDP links */
+ if (is_lbk_vf(rvu, pcifunc) || is_sdp_pfvf(rvu, pcifunc))
+ return;
+
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
if (blkaddr < 0)
return;
index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK,
nixlf, type);
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get entry for pcifunc=%#x, type=%u\n",
+ __func__, pcifunc, type);
+ return;
+ }
/* disable MCAM entry when packet replication is not supported by hw */
if (!hw->cap.nix_rx_multicast && !is_vf(pcifunc)) {
@@ -1214,6 +1330,10 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
struct npc_mcam *mcam = &rvu->hw->mcam;
int index, blkaddr;
+ /* only CGX or LBK interfaces have default entries */
+ if (is_cn20k(rvu->pdev) && !npc_is_cgx_or_lbk(rvu, pcifunc))
+ return;
+
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
if (blkaddr < 0)
return;
@@ -1223,6 +1343,12 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
pfvf->nix_rx_intf)) {
index = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_UCAST_ENTRY);
+ if (index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get ucast entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
}
@@ -2504,33 +2630,58 @@ void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index)
static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam,
int blkaddr, u16 pcifunc)
{
+ u16 dft_idxs[NPC_DFT_RULE_MAX_ID] = {[0 ... NPC_DFT_RULE_MAX_ID - 1] = USHRT_MAX};
+ bool cn20k_dft_rl;
u16 index, cntr;
int rc;
+ npc_cn20k_dft_rules_idx_get(rvu, pcifunc,
+ &dft_idxs[NPC_DFT_RULE_BCAST_ID],
+ &dft_idxs[NPC_DFT_RULE_MCAST_ID],
+ &dft_idxs[NPC_DFT_RULE_PROMISC_ID],
+ &dft_idxs[NPC_DFT_RULE_UCAST_ID]);
+
/* Scan all MCAM entries and free the ones mapped to 'pcifunc' */
for (index = 0; index < mcam->bmap_entries; index++) {
- if (mcam->entry2pfvf_map[index] == pcifunc) {
+ if (mcam->entry2pfvf_map[index] != pcifunc)
+ continue;
+
+ cn20k_dft_rl = false;
+
+ if (is_cn20k(rvu->pdev)) {
+ if (dft_idxs[NPC_DFT_RULE_BCAST_ID] == index ||
+ dft_idxs[NPC_DFT_RULE_MCAST_ID] == index ||
+ dft_idxs[NPC_DFT_RULE_PROMISC_ID] == index ||
+ dft_idxs[NPC_DFT_RULE_UCAST_ID] == index) {
+ cn20k_dft_rl = true;
+ }
+ }
+
+ /* Disable the entry */
+ npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false);
+
+ if (!cn20k_dft_rl) {
mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP;
/* Free the entry in bitmap */
npc_mcam_clear_bit(mcam, index);
- /* Disable the entry */
- npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false);
-
- /* Update entry2counter mapping */
- cntr = mcam->entry2cntr_map[index];
- if (cntr != NPC_MCAM_INVALID_MAP)
- npc_unmap_mcam_entry_and_cntr(rvu, mcam,
- blkaddr, index,
- cntr);
mcam->entry2target_pffunc[index] = 0x0;
- if (is_cn20k(rvu->pdev)) {
- rc = npc_cn20k_idx_free(rvu, &index, 1);
- if (rc)
- dev_err(rvu->dev,
- "Failed to free mcam idx=%u pcifunc=%#x\n",
- index, pcifunc);
- }
}
+
+ /* Update entry2counter mapping */
+ cntr = mcam->entry2cntr_map[index];
+ if (cntr != NPC_MCAM_INVALID_MAP)
+ npc_unmap_mcam_entry_and_cntr(rvu, mcam,
+ blkaddr, index,
+ cntr);
+
+ if (!is_cn20k(rvu->pdev) || cn20k_dft_rl)
+ continue;
+
+ rc = npc_cn20k_idx_free(rvu, &index, 1);
+ if (rc)
+ dev_err(rvu->dev,
+ "Failed to free mcam idx=%u pcifunc=%#x\n",
+ index, pcifunc);
}
}
@@ -3266,7 +3417,10 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu,
npc_enable_mcam_entry(rvu, mcam, blkaddr, new_entry, false);
/* Copy rule from old entry to new entry */
- npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry);
+ if (npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry)) {
+ rc = NPC_MCAM_INVALID_REQ;
+ break;
+ }
/* Copy counter mapping, if any */
cntr = mcam->entry2cntr_map[old_entry];
@@ -3284,7 +3438,8 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu,
/* If shift has failed then report the failed index */
if (index != req->shift_count) {
- rc = NPC_MCAM_PERM_DENIED;
+ if (!rc)
+ rc = NPC_MCAM_PERM_DENIED;
rsp->failed_entry_idx = index;
}
@@ -3851,6 +4006,12 @@ int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu,
/* Read the default ucast entry if there is no pkt steering rule */
index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf,
NIXLF_UCAST_ENTRY);
+ if (index < 0) {
+ mutex_unlock(&mcam->lock);
+ rc = NIX_AF_ERR_AF_LF_INVALID;
+ goto out;
+ }
+
read_entry:
/* Read the mcam entry */
npc_read_mcam_entry(rvu, mcam, blkaddr, index, &rsp->entry, &intf,
@@ -3924,6 +4085,12 @@ void rvu_npc_clear_ucast_entry(struct rvu *rvu, int pcifunc, int nixlf)
ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
nixlf, NIXLF_UCAST_ENTRY);
+ if (ucast_idx < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get ucast entry for pcifunc=%#x\n",
+ __func__, pcifunc);
+ return;
+ }
npc_enable_mcam_entry(rvu, mcam, blkaddr, ucast_idx, false);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index b45798d9fdab..6ae9cdcb608b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1444,7 +1444,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
struct msg_rsp write_rsp;
struct mcam_entry *entry;
bool new = false;
- u16 entry_index;
+ int entry_index;
int err;
installed_features = req->features;
@@ -1477,6 +1477,14 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
if (req->default_rule) {
entry_index = npc_get_nixlf_mcam_index(mcam, target, nixlf,
NIXLF_UCAST_ENTRY);
+
+ if (entry_index < 0) {
+ dev_err(rvu->dev,
+ "%s: Error to get ucast entry for target=%#x\n",
+ __func__, target);
+ return -EINVAL;
+ }
+
enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, entry_index);
}
@@ -1980,13 +1988,15 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr,
ether_addr_copy(rule->packet.dmac, pfvf->mac_addr);
- if (is_cn20k(rvu->pdev))
- npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry,
- cn20k_entry, &intf,
- &enable, &hw_prio);
- else
+ if (is_cn20k(rvu->pdev)) {
+ if (npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry,
+ cn20k_entry, &intf,
+ &enable, &hw_prio))
+ return -EINVAL;
+ } else {
npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry,
entry, &intf, &enable);
+ }
npc_update_entry(rvu, NPC_DMAC, &mdata,
ether_addr_to_u64(pfvf->mac_addr), 0,
@@ -2038,8 +2048,12 @@ void npc_mcam_enable_flows(struct rvu *rvu, u16 target)
continue;
}
- if (rule->vfvlan_cfg)
- npc_update_dmac_value(rvu, blkaddr, rule, pfvf);
+ if (rule->vfvlan_cfg) {
+ if (npc_update_dmac_value(rvu, blkaddr, rule, pfvf))
+ dev_err(rvu->dev,
+ "Update dmac failed for %u, target=%#x\n",
+ rule->entry, target);
+ }
if (rule->rx_action.op == NIX_RX_ACTION_DEFAULT) {
if (!def_ucast_rule)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c
index 6a50b6dec0fa..d9adb993e64d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c
@@ -1070,29 +1070,37 @@ static struct psp_dev_ops mlx5_psp_ops = {
void mlx5e_psp_unregister(struct mlx5e_priv *priv)
{
- if (!priv->psp || !priv->psp->psp)
+ struct mlx5e_psp *psp = priv->psp;
+
+ if (!psp || !psp->psp)
return;
- psp_dev_unregister(priv->psp->psp);
+ psp_dev_unregister(psp->psp);
+ psp->psp = NULL;
}
void mlx5e_psp_register(struct mlx5e_priv *priv)
{
+ struct mlx5e_psp *psp = priv->psp;
+ struct psp_dev *psd;
+
/* FW Caps missing */
if (!priv->psp)
return;
- priv->psp->caps.assoc_drv_spc = sizeof(u32);
- priv->psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128;
+ psp->caps.assoc_drv_spc = sizeof(u32);
+ psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128;
if (MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_encrypt) &&
MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_decrypt))
- priv->psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256;
+ psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256;
- priv->psp->psp = psp_dev_create(priv->netdev, &mlx5_psp_ops,
- &priv->psp->caps, NULL);
- if (IS_ERR(priv->psp->psp))
+ psd = psp_dev_create(priv->netdev, &mlx5_psp_ops, &psp->caps, NULL);
+ if (IS_ERR(psd)) {
mlx5_core_err(priv->mdev, "PSP failed to register due to %pe\n",
- priv->psp->psp);
+ psd);
+ return;
+ }
+ psp->psp = psd;
}
int mlx5e_psp_init(struct mlx5e_priv *priv)
@@ -1131,22 +1139,18 @@ int mlx5e_psp_init(struct mlx5e_priv *priv)
if (!psp)
return -ENOMEM;
- priv->psp = psp;
fs = mlx5e_accel_psp_fs_init(priv);
if (IS_ERR(fs)) {
err = PTR_ERR(fs);
- goto out_err;
+ kfree(psp);
+ return err;
}
psp->fs = fs;
+ priv->psp = psp;
mlx5_core_dbg(priv->mdev, "PSP attached to netdevice\n");
return 0;
-
-out_err:
- priv->psp = NULL;
- kfree(psp);
- return err;
}
void mlx5e_psp_cleanup(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5a46870c4b74..8f2b3abe0092 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6023,7 +6023,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (take_rtnl)
rtnl_lock();
- mlx5e_psp_register(priv);
/* update XDP supported features */
mlx5e_set_xdp_feature(priv);
@@ -6036,7 +6035,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
mlx5e_health_destroy_reporters(priv);
- mlx5e_psp_unregister(priv);
mlx5e_ktls_cleanup(priv);
mlx5e_psp_cleanup(priv);
mlx5e_fs_cleanup(priv->fs);
@@ -6160,6 +6158,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5e_fs_init_l2_addr(priv->fs, netdev);
mlx5e_ipsec_init(priv);
+ mlx5e_psp_register(priv);
err = mlx5e_macsec_init(priv);
if (err)
@@ -6230,6 +6229,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5_lag_remove_netdev(mdev, priv->netdev);
mlx5_vxlan_reset_to_default(mdev->vxlan);
mlx5e_macsec_cleanup(priv);
+ mlx5e_psp_unregister(priv);
mlx5e_ipsec_cleanup(priv);
}
@@ -6774,9 +6774,11 @@ static int mlx5e_resume(struct auxiliary_device *adev)
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
- if (actual_adev)
- return _mlx5e_resume(actual_adev);
- return 0;
+ if (actual_adev) {
+ err = _mlx5e_resume(actual_adev);
+ mlx5_sd_put_adev(actual_adev, adev);
+ }
+ return err;
}
static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg)
@@ -6815,6 +6817,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
err = _mlx5e_suspend(actual_adev, false);
mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
return err;
}
@@ -6912,9 +6916,19 @@ static int mlx5e_probe(struct auxiliary_device *adev,
return err;
actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
- if (actual_adev)
- return _mlx5e_probe(actual_adev);
+ if (actual_adev) {
+ err = _mlx5e_probe(actual_adev);
+ if (err)
+ goto sd_cleanup;
+ mlx5_sd_put_adev(actual_adev, adev);
+ }
return 0;
+
+sd_cleanup:
+ mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
+ return err;
}
static void _mlx5e_remove(struct auxiliary_device *adev)
@@ -6966,6 +6980,8 @@ static void mlx5e_remove(struct auxiliary_device *adev)
_mlx5e_remove(actual_adev);
mlx5_sd_cleanup(mdev);
+ if (actual_adev)
+ mlx5_sd_put_adev(actual_adev, adev);
}
static const struct auxiliary_device_id mlx5e_id_table[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 762c783156b4..6e199161b008 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -18,6 +18,7 @@ struct mlx5_sd {
u8 host_buses;
struct mlx5_devcom_comp_dev *devcom;
struct dentry *dfs;
+ u8 state;
bool primary;
union {
struct { /* primary */
@@ -31,6 +32,11 @@ struct mlx5_sd {
};
};
+enum mlx5_sd_state {
+ MLX5_SD_STATE_DOWN = 0,
+ MLX5_SD_STATE_UP,
+};
+
static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
@@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
- mlx5_devcom_comp_lock(sd->devcom);
- mlx5_devcom_comp_set_ready(sd->devcom, false);
- mlx5_devcom_comp_unlock(sd->devcom);
mlx5_devcom_unregister_component(sd->devcom);
}
@@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
struct mlx5_core_dev *primary, *pos, *to;
struct mlx5_sd *sd = mlx5_get_sd(dev);
u8 alias_key[ACCESS_KEY_LEN];
+ struct mlx5_sd *primary_sd;
int err, i;
err = sd_init(dev);
@@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_cleanup;
+ mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
- return 0;
+ goto out;
primary = mlx5_sd_get_primary(dev);
+ if (!primary)
+ goto out;
+
+ primary_sd = mlx5_get_sd(primary);
+ if (primary_sd->state != MLX5_SD_STATE_DOWN)
+ goto out;
for (i = 0; i < ACCESS_KEY_LEN; i++)
alias_key[i] = get_random_u8();
@@ -452,9 +463,13 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_unregister;
- sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary));
- debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id);
- debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops);
+ primary_sd->dfs =
+ debugfs_create_dir("multi-pf",
+ mlx5_debugfs_get_dev_root(primary));
+ debugfs_create_x32("group_id", 0400, primary_sd->dfs,
+ &primary_sd->group_id);
+ debugfs_create_file("primary", 0400, primary_sd->dfs, primary,
+ &dev_fops);
mlx5_sd_for_each_secondary(i, primary, pos) {
char name[32];
@@ -464,7 +479,8 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
goto err_unset_secondaries;
snprintf(name, sizeof(name), "secondary_%d", i - 1);
- debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops);
+ debugfs_create_file(name, 0400, primary_sd->dfs, pos,
+ &dev_fops);
}
@@ -472,6 +488,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
sd_print_group(primary);
+ primary_sd->state = MLX5_SD_STATE_UP;
+out:
+ mlx5_devcom_comp_unlock(sd->devcom);
return 0;
err_unset_secondaries:
@@ -479,8 +498,18 @@ err_unset_secondaries:
mlx5_sd_for_each_secondary_to(i, primary, to, pos)
sd_cmd_unset_secondary(pos);
sd_cmd_unset_primary(primary);
- debugfs_remove_recursive(sd->dfs);
+ debugfs_remove_recursive(primary_sd->dfs);
+ primary_sd->dfs = NULL;
err_sd_unregister:
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
+
+ primary_sd->secondaries[i - 1] = NULL;
+ peer_sd->primary_dev = NULL;
+ }
+ primary_sd->primary = false;
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+ mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
err_sd_cleanup:
sd_cleanup(dev);
@@ -491,42 +520,97 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary, *pos;
+ struct mlx5_sd *primary_sd;
int i;
if (!sd)
return;
+ mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
- goto out;
+ goto out_unlock;
primary = mlx5_sd_get_primary(dev);
+ if (!primary)
+ goto out_ready_false;
+
+ primary_sd = mlx5_get_sd(primary);
+ if (primary_sd->state != MLX5_SD_STATE_UP)
+ goto out_clear_peers;
+
mlx5_sd_for_each_secondary(i, primary, pos)
sd_cmd_unset_secondary(pos);
sd_cmd_unset_primary(primary);
- debugfs_remove_recursive(sd->dfs);
+ debugfs_remove_recursive(primary_sd->dfs);
+ primary_sd->dfs = NULL;
sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
-out:
+ primary_sd->state = MLX5_SD_STATE_DOWN;
+out_clear_peers:
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
+
+ primary_sd->secondaries[i - 1] = NULL;
+ peer_sd->primary_dev = NULL;
+ }
+ primary_sd->primary = false;
+out_ready_false:
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+out_unlock:
+ mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
sd_cleanup(dev);
}
+/* Lock order:
+ * primary: actual_adev_lock -> SD devcom comp lock
+ * secondary: SD devcom comp lock -> (drop) -> actual_adev_lock
+ * The two locks are never held together, so no ABBA.
+ */
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary;
+ struct mlx5_adev *primary_adev;
if (!sd)
return adev;
- if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ mlx5_devcom_comp_lock(sd->devcom);
+ if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
+ mlx5_devcom_comp_unlock(sd->devcom);
return NULL;
+ }
primary = mlx5_sd_get_primary(dev);
- if (dev == primary)
+ if (!primary || dev == primary) {
+ mlx5_devcom_comp_unlock(sd->devcom);
return adev;
+ }
+
+ primary_adev = primary->priv.adev[idx];
+ get_device(&primary_adev->adev.dev);
+ mlx5_devcom_comp_unlock(sd->devcom);
+
+ device_lock(&primary_adev->adev.dev);
+ /* Primary may have completed remove between dropping devcom and
+ * acquiring device_lock; recheck.
+ */
+ if (!mlx5_devcom_comp_is_ready(sd->devcom)) {
+ device_unlock(&primary_adev->adev.dev);
+ put_device(&primary_adev->adev.dev);
+ return NULL;
+ }
+ return &primary_adev->adev;
+}
- return &primary->priv.adev[idx]->adev;
+void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
+ struct auxiliary_device *adev)
+{
+ if (actual_adev != adev) {
+ device_unlock(&actual_adev->dev);
+ put_device(&actual_adev->dev);
+ }
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
index 137efaf9aabc..9bfd5b9756b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
@@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c
struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
struct auxiliary_device *adev,
int idx);
+void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
+ struct auxiliary_device *adev);
int mlx5_sd_init(struct mlx5_core_dev *dev);
void mlx5_sd_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index c406a3b56b37..4dea2bb58d2f 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -826,7 +826,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
netif_tx_stop_all_queues(netdev);
if (fbnic_phylink_create(netdev)) {
- fbnic_netdev_free(fbd);
+ free_netdev(netdev);
+ fbd->netdev = NULL;
return NULL;
}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 6a745bb71b5c..eb57b86fbe22 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -31,11 +31,11 @@ enum spx5_target_chiptype {
SPX5_TARGET_CT_7552 = 0x7552, /* SparX-5-128 Enterprise */
SPX5_TARGET_CT_7556 = 0x7556, /* SparX-5-160 Enterprise */
SPX5_TARGET_CT_7558 = 0x7558, /* SparX-5-200 Enterprise */
- SPX5_TARGET_CT_7546TSN = 0x47546, /* SparX-5-64i Industrial */
- SPX5_TARGET_CT_7549TSN = 0x47549, /* SparX-5-90i Industrial */
- SPX5_TARGET_CT_7552TSN = 0x47552, /* SparX-5-128i Industrial */
- SPX5_TARGET_CT_7556TSN = 0x47556, /* SparX-5-160i Industrial */
- SPX5_TARGET_CT_7558TSN = 0x47558, /* SparX-5-200i Industrial */
+ SPX5_TARGET_CT_7546TSN = 0x0546, /* SparX-5-64i Industrial */
+ SPX5_TARGET_CT_7549TSN = 0x0549, /* SparX-5-90i Industrial */
+ SPX5_TARGET_CT_7552TSN = 0x0552, /* SparX-5-128i Industrial */
+ SPX5_TARGET_CT_7556TSN = 0x0556, /* SparX-5-160i Industrial */
+ SPX5_TARGET_CT_7558TSN = 0x0558, /* SparX-5-200i Industrial */
SPX5_TARGET_CT_LAN9694 = 0x9694, /* lan969x-40 */
SPX5_TARGET_CT_LAN9691VAO = 0x9691, /* lan969x-40-VAO */
SPX5_TARGET_CT_LAN9694TSN = 0x9695, /* lan969x-40-TSN */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
index 04bc8fffaf96..62c49893de3c 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -1128,7 +1128,8 @@ int sparx5_port_init(struct sparx5 *sparx5,
DEV2G5_PCS1G_SD_CFG(port->portno));
if (conf->portmode == PHY_INTERFACE_MODE_QSGMII ||
- conf->portmode == PHY_INTERFACE_MODE_SGMII) {
+ conf->portmode == PHY_INTERFACE_MODE_SGMII ||
+ conf->portmode == PHY_INTERFACE_MODE_1000BASEX) {
err = sparx5_serdes_set(sparx5, port, conf);
if (err)
return err;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 098fbda0d128..d8e816882f02 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -43,8 +43,9 @@ static u64 mana_gd_r64(struct gdma_context *g, u64 offset)
static int mana_gd_init_pf_regs(struct pci_dev *pdev)
{
struct gdma_context *gc = pci_get_drvdata(pdev);
- void __iomem *sriov_base_va;
+ u64 remaining_barsize;
u64 sriov_base_off;
+ u64 sriov_shm_off;
gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF;
@@ -73,10 +74,28 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev)
gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off;
sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF);
+ if (sriov_base_off >= gc->bar0_size ||
+ gc->bar0_size - sriov_base_off <
+ GDMA_PF_REG_SHM_OFF + sizeof(u64) ||
+ !IS_ALIGNED(sriov_base_off, sizeof(u64))) {
+ dev_err(gc->dev,
+ "SRIOV base offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n",
+ sriov_base_off, (u64)gc->bar0_size);
+ return -EPROTO;
+ }
- sriov_base_va = gc->bar0_va + sriov_base_off;
- gc->shm_base = sriov_base_va +
- mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF);
+ remaining_barsize = gc->bar0_size - sriov_base_off;
+ sriov_shm_off = mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF);
+ if (sriov_shm_off >= remaining_barsize ||
+ remaining_barsize - sriov_shm_off < SMC_APERTURE_SIZE ||
+ !IS_ALIGNED(sriov_shm_off, sizeof(u32))) {
+ dev_err(gc->dev,
+ "SRIOV SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n",
+ sriov_shm_off, (u64)gc->bar0_size);
+ return -EPROTO;
+ }
+
+ gc->shm_base = gc->bar0_va + sriov_base_off + sriov_shm_off;
return 0;
}
@@ -84,6 +103,7 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev)
static int mana_gd_init_vf_regs(struct pci_dev *pdev)
{
struct gdma_context *gc = pci_get_drvdata(pdev);
+ u64 shm_off;
gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF;
@@ -111,7 +131,17 @@ static int mana_gd_init_vf_regs(struct pci_dev *pdev)
gc->db_page_base = gc->bar0_va + gc->db_page_off;
gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off;
- gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET);
+ shm_off = mana_gd_r64(gc, GDMA_REG_SHM_OFFSET);
+ if (shm_off >= gc->bar0_size ||
+ gc->bar0_size - shm_off < SMC_APERTURE_SIZE ||
+ !IS_ALIGNED(shm_off, sizeof(u32))) {
+ dev_err(gc->dev,
+ "SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n",
+ shm_off, (u64)gc->bar0_size);
+ return -EPROTO;
+ }
+
+ gc->shm_base = gc->bar0_va + shm_off;
return 0;
}
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a654b3699c4c..9afc786b297a 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2520,9 +2520,12 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
napi_disable_locked(napi);
netif_napi_del_locked(napi);
}
- xdp_rxq_info_unreg(&rxq->xdp_rxq);
- mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
+ if (xdp_rxq_info_is_reg(&rxq->xdp_rxq))
+ xdp_rxq_info_unreg(&rxq->xdp_rxq);
+
+ if (rxq->rxobj != INVALID_MANA_HANDLE)
+ mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
mana_deinit_cq(apc, &rxq->rx_cq);
@@ -2796,9 +2799,6 @@ out:
mana_destroy_rxq(apc, rxq, false);
- if (cq)
- mana_deinit_cq(apc, cq);
-
return NULL;
}
diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c
index 0f1679ebad96..d21b5db06e50 100644
--- a/drivers/net/ethernet/microsoft/mana/shm_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c
@@ -61,11 +61,6 @@ union smc_proto_hdr {
};
}; /* HW DATA */
-#define SMC_APERTURE_BITS 256
-#define SMC_BASIC_UNIT (sizeof(u32))
-#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8))
-#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1)
-
static int mana_smc_poll_register(void __iomem *base, bool reset)
{
void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT;
diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c
index 03a2669f0518..ee8381b60b8d 100644
--- a/drivers/net/ethernet/renesas/rtsn.c
+++ b/drivers/net/ethernet/renesas/rtsn.c
@@ -797,11 +797,11 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv)
/* Enter config mode before registering the MDIO bus */
ret = rtsn_reset(priv);
if (ret)
- goto out_free_bus;
+ goto out_put_node;
ret = rtsn_change_mode(priv, OCR_OPC_CONFIG);
if (ret)
- goto out_free_bus;
+ goto out_put_node;
rtsn_modify(priv, MPIC, MPIC_PSMCS_MASK | MPIC_PSMHT_MASK,
MPIC_PSMCS_DEFAULT | MPIC_PSMHT_DEFAULT);
@@ -824,6 +824,8 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv)
return 0;
+out_put_node:
+ of_node_put(mdio_node);
out_free_bus:
mdiobus_free(mii);
return ret;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c
index e2240b68ad98..2ab6ecac6422 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c
@@ -100,6 +100,8 @@ static int nvt_gmac_probe(struct platform_device *pdev)
if (!priv)
return dev_err_probe(dev, -ENOMEM, "Failed to allocate private data\n");
+ priv->dev = dev;
+
priv->regmap = syscon_regmap_lookup_by_phandle_args(dev->of_node, "nuvoton,sys",
1, &priv->macid);
if (IS_ERR(priv->regmap))
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index d3772d01e00b..2451f6b20b11 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -2480,8 +2480,11 @@ int wx_sw_init(struct wx *wx)
wx->oem_svid = pdev->subsystem_vendor;
wx->oem_ssid = pdev->subsystem_device;
wx->bus.device = PCI_SLOT(pdev->devfn);
- wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID,
- rd32(wx, WX_CFG_PORT_ST));
+ if (pdev->is_virtfn)
+ wx->bus.func = PCI_FUNC(pdev->devfn);
+ else
+ wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID,
+ rd32(wx, WX_CFG_PORT_ST));
if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN ||
pdev->is_virtfn) {
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c
index 29cdbed2e5ec..94ff8f5f0b4c 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c
@@ -99,8 +99,8 @@ int wx_request_msix_irqs_vf(struct wx *wx)
}
}
- err = request_threaded_irq(wx->msix_entry->vector, wx_msix_misc_vf,
- NULL, IRQF_ONESHOT, netdev->name, wx);
+ err = request_irq(wx->msix_entry->vector, wx_msix_misc_vf,
+ 0, netdev->name, wx);
if (err) {
wx_err(wx, "request_irq for msix_other failed: %d\n", err);
goto free_queue_irqs;
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index a05af192caf3..a750768912b5 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -1182,7 +1182,8 @@ void nsim_destroy(struct netdevsim *ns)
unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
&ns->nn);
- nsim_psp_uninit(ns);
+ if (nsim_dev_port_is_pf(ns->nsim_dev_port))
+ nsim_psp_uninit(ns);
rtnl_lock();
peer = rtnl_dereference(ns->peer);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 7e129dddbbe7..d909c4160ea1 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -120,7 +120,9 @@ struct netdevsim {
u64_stats_t tx_packets;
u64_stats_t tx_bytes;
struct u64_stats_sync syncp;
- struct psp_dev *dev;
+ struct psp_dev __rcu *dev;
+ struct dentry *rereg;
+ struct mutex rereg_lock;
u32 spi;
u32 assoc_cnt;
} psp;
diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c
index 0b4d717253b0..6936ecb8173e 100644
--- a/drivers/net/netdevsim/psp.c
+++ b/drivers/net/netdevsim/psp.c
@@ -19,6 +19,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns,
struct netdevsim *peer_ns, struct skb_ext **psp_ext)
{
enum skb_drop_reason rc = 0;
+ struct psp_dev *peer_psd;
struct psp_assoc *pas;
struct net *net;
void **ptr;
@@ -48,7 +49,8 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns,
}
/* Now pretend we just received this frame */
- if (peer_ns->psp.dev->config.versions & (1 << pas->version)) {
+ peer_psd = rcu_dereference(peer_ns->psp.dev);
+ if (peer_psd && peer_psd->config.versions & (1 << pas->version)) {
bool strip_icv = false;
u8 generation;
@@ -61,8 +63,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns,
skb_ext_reset(skb);
skb->mac_len = ETH_HLEN;
- if (psp_dev_rcv(skb, peer_ns->psp.dev->id, generation,
- strip_icv)) {
+ if (psp_dev_rcv(skb, peer_psd->id, generation, strip_icv)) {
rc = SKB_DROP_REASON_PSP_OUTPUT;
goto out_unlock;
}
@@ -209,26 +210,50 @@ static struct psp_dev_caps nsim_psp_caps = {
.assoc_drv_spc = sizeof(void *),
};
-void nsim_psp_uninit(struct netdevsim *ns)
+static void __nsim_psp_uninit(struct netdevsim *ns, bool teardown)
{
- if (!IS_ERR(ns->psp.dev))
- psp_dev_unregister(ns->psp.dev);
+ struct psp_dev *psd;
+
+ psd = rcu_dereference_protected(ns->psp.dev,
+ teardown ||
+ lockdep_is_held(&ns->psp.rereg_lock));
+ if (psd) {
+ rcu_assign_pointer(ns->psp.dev, NULL);
+ synchronize_rcu();
+ psp_dev_unregister(psd);
+ }
WARN_ON(ns->psp.assoc_cnt);
}
+void nsim_psp_uninit(struct netdevsim *ns)
+{
+ debugfs_remove(ns->psp.rereg);
+ mutex_destroy(&ns->psp.rereg_lock);
+ __nsim_psp_uninit(ns, true);
+}
+
static ssize_t
nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count,
loff_t *ppos)
{
struct netdevsim *ns = file->private_data;
- int err;
+ struct psp_dev *psd;
+ ssize_t ret;
+
+ mutex_lock(&ns->psp.rereg_lock);
+ __nsim_psp_uninit(ns, false);
- nsim_psp_uninit(ns);
+ psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns);
+ if (IS_ERR(psd)) {
+ ret = PTR_ERR(psd);
+ goto out;
+ }
- ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops,
- &nsim_psp_caps, ns);
- err = PTR_ERR_OR_ZERO(ns->psp.dev);
- return err ?: count;
+ rcu_assign_pointer(ns->psp.dev, psd);
+ ret = count;
+out:
+ mutex_unlock(&ns->psp.rereg_lock);
+ return ret;
}
static const struct file_operations nsim_psp_rereg_fops = {
@@ -241,14 +266,16 @@ static const struct file_operations nsim_psp_rereg_fops = {
int nsim_psp_init(struct netdevsim *ns)
{
struct dentry *ddir = ns->nsim_dev_port->ddir;
- int err;
+ struct psp_dev *psd;
+
+ psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns);
+ if (IS_ERR(psd))
+ return PTR_ERR(psd);
- ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops,
- &nsim_psp_caps, ns);
- err = PTR_ERR_OR_ZERO(ns->psp.dev);
- if (err)
- return err;
+ rcu_assign_pointer(ns->psp.dev, psd);
- debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops);
+ mutex_init(&ns->psp.rereg_lock);
+ ns->psp.rereg = debugfs_create_file("psp_rereg", 0200, ddir, ns,
+ &nsim_psp_rereg_fops);
return 0;
}
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index db43a1f8a07a..22c555dd962e 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -85,17 +85,24 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb)
skb_scrub_packet(skb, true);
/* network header reset in ovpn_decrypt_post() */
+ skb_reset_mac_header(skb);
skb_reset_transport_header(skb);
skb_reset_inner_headers(skb);
/* cause packet to be "received" by the interface */
pkt_len = skb->len;
+ /* we may get here in process context in case of TCP connections,
+ * therefore we have to disable BHs to ensure gro_cells_receive()
+ * and dev_dstats_rx_add() do not get corrupted or enter deadlock
+ */
+ local_bh_disable();
ret = gro_cells_receive(&peer->ovpn->gro_cells, skb);
if (likely(ret == NET_RX_SUCCESS)) {
/* update RX stats with the size of decrypted packet */
ovpn_peer_stats_increment_rx(&peer->vpn_stats, pkt_len);
dev_dstats_rx_add(peer->ovpn->dev, pkt_len);
}
+ local_bh_enable();
}
void ovpn_decrypt_post(void *data, int ret)
diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 5198d66dbbc0..b64beade8dd9 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -563,6 +563,15 @@ void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow,
}
EXPORT_SYMBOL_GPL(bcm_phy_get_stats);
+void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++)
+ bcm_phy_get_stat(phydev, shadow, i);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_update_stats_shadow);
+
void bcm_phy_r_rc_cal_reset(struct phy_device *phydev)
{
/* Reset R_CAL/RC_CAL Engine */
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index bceddbc860eb..bba94ce96195 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -85,6 +85,7 @@ int bcm_phy_get_sset_count(struct phy_device *phydev);
void bcm_phy_get_strings(struct phy_device *phydev, u8 *data);
void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow,
struct ethtool_stats *stats, u64 *data);
+void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow);
void bcm_phy_r_rc_cal_reset(struct phy_device *phydev);
int bcm_phy_28nm_a0b0_afe_config_init(struct phy_device *phydev);
int bcm_phy_enable_jumbo(struct phy_device *phydev);
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 00e8fa14aa77..71a163f62c0e 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -807,6 +807,17 @@ static void bcm7xxx_28nm_get_phy_stats(struct phy_device *phydev,
bcm_phy_get_stats(phydev, priv->stats, stats, data);
}
+static int bcm7xxx_28nm_suspend(struct phy_device *phydev)
+{
+ struct bcm7xxx_phy_priv *priv = phydev->priv;
+
+ mutex_lock(&phydev->lock);
+ bcm_phy_update_stats_shadow(phydev, priv->stats);
+ mutex_unlock(&phydev->lock);
+
+ return genphy_suspend(phydev);
+}
+
static int bcm7xxx_28nm_probe(struct phy_device *phydev)
{
struct bcm7xxx_phy_priv *priv;
@@ -849,6 +860,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_28nm_config_init, \
.resume = bcm7xxx_28nm_resume, \
+ .suspend = bcm7xxx_28nm_suspend, \
.get_tunable = bcm7xxx_28nm_get_tunable, \
.set_tunable = bcm7xxx_28nm_set_tunable, \
.get_sset_count = bcm_phy_get_sset_count, \
@@ -866,6 +878,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_28nm_ephy_config_init, \
.resume = bcm7xxx_28nm_ephy_resume, \
+ .suspend = bcm7xxx_28nm_suspend, \
.get_sset_count = bcm_phy_get_sset_count, \
.get_strings = bcm_phy_get_strings, \
.get_stats = bcm7xxx_28nm_get_phy_stats, \
@@ -902,6 +915,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
.config_aneg = genphy_config_aneg, \
.read_status = genphy_read_status, \
.resume = bcm7xxx_16nm_ephy_resume, \
+ .suspend = bcm7xxx_28nm_suspend, \
}
static struct phy_driver bcm7xxx_driver[] = {
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index bf0c6a04481e..d1a4edb34ad2 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -592,8 +592,13 @@ static int bcm54xx_set_wakeup_irq(struct phy_device *phydev, bool state)
static int bcm54xx_suspend(struct phy_device *phydev)
{
+ struct bcm54xx_phy_priv *priv = phydev->priv;
int ret = 0;
+ mutex_lock(&phydev->lock);
+ bcm_phy_update_stats_shadow(phydev, priv->stats);
+ mutex_unlock(&phydev->lock);
+
bcm54xx_ptp_stop(phydev);
/* Acknowledge any Wake-on-LAN interrupt prior to suspend */
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2aa1dedd21b8..e211a523c258 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -4548,6 +4548,13 @@ static int lan8814_config_init(struct phy_device *phydev)
struct kszphy_priv *lan8814 = phydev->priv;
int ret;
+ if (phy_package_init_once(phydev))
+ /* Reset the PHY */
+ lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
+ LAN8814_QSGMII_SOFT_RESET,
+ LAN8814_QSGMII_SOFT_RESET_BIT,
+ LAN8814_QSGMII_SOFT_RESET_BIT);
+
/* Based on the interface type select how the advertise ability is
* encoded, to set as SGMII or as USGMII.
*/
@@ -4655,13 +4662,7 @@ static int lan8814_probe(struct phy_device *phydev)
priv->is_ptp_available = err == LAN8814_REV_LAN8814 ||
err == LAN8814_REV_LAN8818;
- if (phy_package_init_once(phydev)) {
- /* Reset the PHY */
- lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS,
- LAN8814_QSGMII_SOFT_RESET,
- LAN8814_QSGMII_SOFT_RESET_BIT,
- LAN8814_QSGMII_SOFT_RESET_BIT);
-
+ if (phy_package_probe_once(phydev)) {
err = lan8814_release_coma_mode(phydev);
if (err)
return err;
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index df0bcfedddbc..293ef80c4e30 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -756,6 +756,7 @@ static void ax88772_mac_link_down(struct phylink_config *config,
struct usbnet *dev = netdev_priv(to_net_dev(config->dev));
asix_write_medium_mode(dev, 0, 0);
+ usbnet_link_change(dev, false, false);
}
static void ax88772_mac_link_up(struct phylink_config *config,
@@ -786,6 +787,7 @@ static void ax88772_mac_link_up(struct phylink_config *config,
m |= AX_MEDIUM_RFC;
asix_write_medium_mode(dev, m, 0);
+ usbnet_link_change(dev, true, false);
}
static const struct phylink_mac_ops ax88772_phylink_mac_ops = {
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index bb9929727eb9..0223a172851e 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -2012,6 +2012,14 @@ static const struct usb_device_id cdc_devs[] = {
.driver_info = (unsigned long)&apple_private_interface_info,
},
+ /* Mac */
+ { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 0),
+ .driver_info = (unsigned long)&apple_private_interface_info,
+ },
+ { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 2),
+ .driver_info = (unsigned long)&apple_private_interface_info,
+ },
+
/* Ericsson MBM devices like F5521gw */
{ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_VENDOR,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 7337bf1b7d6a..1ace1d2398c9 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -10138,6 +10138,7 @@ static const struct usb_device_id rtl8152_table[] = {
{ USB_DEVICE(VENDOR_ID_DELL, 0xb097) },
{ USB_DEVICE(VENDOR_ID_ASUS, 0x1976) },
{ USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) },
+ { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02c) },
{}
};
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index e35df717e65e..0cfb19b760dd 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -972,7 +972,8 @@ static int veth_poll(struct napi_struct *napi, int budget)
/* NAPI functions as RCU section */
peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held());
- peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL;
+ peer_txq = (peer_dev && queue_idx < peer_dev->real_num_tx_queues) ?
+ netdev_get_tx_queue(peer_dev, queue_idx) : NULL;
xdp_set_return_frame_no_direct();
done = veth_xdp_rcv(rq, budget, &bq, &stats);
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 3bd57527b1be..15bfb78381d4 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -773,11 +773,6 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv)
kfree(priv->tx_skbuff);
priv->tx_skbuff = NULL;
- if (priv->uf_regs) {
- iounmap(priv->uf_regs);
- priv->uf_regs = NULL;
- }
-
if (priv->uccf) {
ucc_fast_free(priv->uccf);
priv->uccf = NULL;
@@ -1255,12 +1250,12 @@ static void ucc_hdlc_remove(struct platform_device *pdev)
uhdlc_memclean(priv);
- if (priv->utdm->si_regs) {
+ if (priv->utdm && priv->utdm->si_regs) {
iounmap(priv->utdm->si_regs);
priv->utdm->si_regs = NULL;
}
- if (priv->utdm->siram) {
+ if (priv->utdm && priv->utdm->siram) {
iounmap(priv->utdm->siram);
priv->utdm->siram = NULL;
}
diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig
index 876aed765833..efb9f022d8c6 100644
--- a/drivers/net/wireless/ath/ath10k/Kconfig
+++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -46,6 +46,7 @@ config ATH10K_SNOC
depends on ARCH_QCOM || COMPILE_TEST
depends on QCOM_SMEM
depends on QCOM_RPROC_COMMON || QCOM_RPROC_COMMON=n
+ select POWER_SEQUENCING
select QCOM_SCM
select QCOM_QMI_HELPERS
help
diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c
index 2519e2400d58..980a12fb2c6e 100644
--- a/drivers/net/wireless/ath/ath12k/core.c
+++ b/drivers/net/wireless/ath/ath12k/core.c
@@ -1838,10 +1838,22 @@ static struct ath12k_hw_group *ath12k_core_hw_group_alloc(struct ath12k_base *ab
return ag;
}
+static void ath12k_core_free_wsi_info(struct ath12k_hw_group *ag)
+{
+ int i;
+
+ for (i = 0; i < ag->num_devices; i++) {
+ of_node_put(ag->wsi_node[i]);
+ ag->wsi_node[i] = NULL;
+ }
+ ag->num_devices = 0;
+}
+
static void ath12k_core_hw_group_free(struct ath12k_hw_group *ag)
{
mutex_lock(&ath12k_hw_group_mutex);
+ ath12k_core_free_wsi_info(ag);
list_del(&ag->list);
kfree(ag);
@@ -1867,52 +1879,59 @@ static struct ath12k_hw_group *ath12k_core_hw_group_find_by_dt(struct ath12k_bas
static int ath12k_core_get_wsi_info(struct ath12k_hw_group *ag,
struct ath12k_base *ab)
{
- struct device_node *wsi_dev = ab->dev->of_node, *next_wsi_dev;
- struct device_node *tx_endpoint, *next_rx_endpoint;
- int device_count = 0;
-
- next_wsi_dev = wsi_dev;
+ struct device_node *next_wsi_dev;
+ int device_count = 0, ret = 0;
+ struct device_node *wsi_dev;
- if (!next_wsi_dev)
+ wsi_dev = of_node_get(ab->dev->of_node);
+ if (!wsi_dev)
return -ENODEV;
do {
- ag->wsi_node[device_count] = next_wsi_dev;
+ if (device_count >= ATH12K_MAX_DEVICES) {
+ ath12k_warn(ab, "device count in DT %d is more than limit %d\n",
+ device_count, ATH12K_MAX_DEVICES);
+ ret = -EINVAL;
+ break;
+ }
+
+ ag->wsi_node[device_count++] = of_node_get(wsi_dev);
- tx_endpoint = of_graph_get_endpoint_by_regs(next_wsi_dev, 0, -1);
+ struct device_node *tx_endpoint __free(device_node) =
+ of_graph_get_endpoint_by_regs(wsi_dev, 0, -1);
if (!tx_endpoint) {
- of_node_put(next_wsi_dev);
- return -ENODEV;
+ ret = -ENODEV;
+ break;
}
- next_rx_endpoint = of_graph_get_remote_endpoint(tx_endpoint);
+ struct device_node *next_rx_endpoint __free(device_node) =
+ of_graph_get_remote_endpoint(tx_endpoint);
if (!next_rx_endpoint) {
- of_node_put(next_wsi_dev);
- of_node_put(tx_endpoint);
- return -ENODEV;
+ ret = -ENODEV;
+ break;
}
- of_node_put(tx_endpoint);
- of_node_put(next_wsi_dev);
-
next_wsi_dev = of_graph_get_port_parent(next_rx_endpoint);
if (!next_wsi_dev) {
- of_node_put(next_rx_endpoint);
- return -ENODEV;
+ ret = -ENODEV;
+ break;
}
- of_node_put(next_rx_endpoint);
+ of_node_put(wsi_dev);
+ wsi_dev = next_wsi_dev;
+ } while (ab->dev->of_node != wsi_dev);
- device_count++;
- if (device_count > ATH12K_MAX_DEVICES) {
- ath12k_warn(ab, "device count in DT %d is more than limit %d\n",
- device_count, ATH12K_MAX_DEVICES);
- of_node_put(next_wsi_dev);
- return -EINVAL;
+ if (ret) {
+ while (--device_count >= 0) {
+ of_node_put(ag->wsi_node[device_count]);
+ ag->wsi_node[device_count] = NULL;
}
- } while (wsi_dev != next_wsi_dev);
- of_node_put(next_wsi_dev);
+ of_node_put(wsi_dev);
+ return ret;
+ }
+
+ of_node_put(wsi_dev);
ag->num_devices = device_count;
return 0;
@@ -1983,9 +2002,9 @@ static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *a
ath12k_core_get_wsi_index(ag, ab)) {
ath12k_dbg(ab, ATH12K_DBG_BOOT,
"unable to get wsi info from dt, grouping single device");
+ ath12k_core_free_wsi_info(ag);
ag->id = ATH12K_INVALID_GROUP_ID;
ag->num_devices = 1;
- memset(ag->wsi_node, 0, sizeof(ag->wsi_node));
wsi->index = 0;
}
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c
index 250459facff3..b108ccd0f637 100644
--- a/drivers/net/wireless/ath/ath12k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
@@ -565,6 +565,9 @@ static int ath12k_dp_prepare_reo_update_elem(struct ath12k_dp *dp,
lockdep_assert_held(&dp->dp_lock);
+ if (!peer->primary_link)
+ return 0;
+
elem = kzalloc_obj(*elem, GFP_ATOMIC);
if (!elem)
return -ENOMEM;
@@ -1337,7 +1340,7 @@ void ath12k_dp_rx_deliver_msdu(struct ath12k_pdev_dp *dp_pdev, struct napi_struc
bool is_mcbc = rxcb->is_mcbc;
bool is_eapol = rxcb->is_eapol;
- peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rx_info->peer_id);
+ peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rxcb->peer_id);
pubsta = peer ? peer->sta : NULL;
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index fbdfe6424fd7..df2334f3bad6 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -788,7 +788,7 @@ struct ath12k_link_vif *ath12k_mac_get_arvif(struct ath12k *ar, u32 vdev_id)
/* To use the arvif returned, caller must have held rcu read lock.
*/
- WARN_ON(!rcu_read_lock_any_held());
+ lockdep_assert_in_rcu_read_lock();
arvif_iter.vdev_id = vdev_id;
arvif_iter.ar = ar;
diff --git a/drivers/net/wireless/ath/ath12k/p2p.c b/drivers/net/wireless/ath/ath12k/p2p.c
index 59589748f1a8..19ebcd1d8eb2 100644
--- a/drivers/net/wireless/ath/ath12k/p2p.c
+++ b/drivers/net/wireless/ath/ath12k/p2p.c
@@ -123,7 +123,7 @@ static void ath12k_p2p_noa_update_vdev_iter(void *data, u8 *mac,
struct ath12k_p2p_noa_arg *arg = data;
struct ath12k_link_vif *arvif;
- WARN_ON(!rcu_read_lock_any_held());
+ lockdep_assert_in_rcu_read_lock();
arvif = &ahvif->deflink;
if (!arvif->is_created || arvif->ar != arg->ar || arvif->vdev_id != arg->vdev_id)
return;
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c
index 65a05a9520ff..b5e904a55aea 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -9778,7 +9778,7 @@ static void
ath12k_wmi_rssi_dbm_conversion_params_info_event(struct ath12k_base *ab,
struct sk_buff *skb)
{
- struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info;
+ struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info = {};
struct ath12k *ar;
s32 noise_floor;
u32 pdev_id;
@@ -10251,7 +10251,7 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a
{
struct wmi_hw_data_filter_cmd *cmd;
struct sk_buff *skb;
- int len;
+ int ret, len;
len = sizeof(*cmd);
skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -10275,7 +10275,13 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a
"wmi hw data filter enable %d filter_bitmap 0x%x\n",
arg->enable, arg->hw_filter_bitmap);
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_HW_DATA_FILTER_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar)
@@ -10283,6 +10289,7 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar)
struct wmi_wow_host_wakeup_cmd *cmd;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd);
skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -10295,14 +10302,20 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar)
ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow host wakeup ind\n");
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_wow_enable(struct ath12k *ar)
{
struct wmi_wow_enable_cmd *cmd;
struct sk_buff *skb;
- int len;
+ int ret, len;
len = sizeof(*cmd);
skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -10317,7 +10330,13 @@ int ath12k_wmi_wow_enable(struct ath12k *ar)
cmd->pause_iface_config = cpu_to_le32(WOW_IFACE_PAUSE_ENABLED);
ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow enable\n");
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id,
@@ -10327,6 +10346,7 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id,
struct wmi_wow_add_del_event_cmd *cmd;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd);
skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -10343,7 +10363,13 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id,
ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow add wakeup event %s enable %d vdev_id %d\n",
wow_wakeup_event(event), enable, vdev_id);
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id,
@@ -10356,6 +10382,7 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id,
struct sk_buff *skb;
void *ptr;
size_t len;
+ int ret;
len = sizeof(*cmd) +
sizeof(*tlv) + /* array struct */
@@ -10435,7 +10462,13 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id,
ath12k_dbg_dump(ar->ab, ATH12K_DBG_WMI, NULL, "wow bitmask: ",
bitmap->bitmaskbuf, pattern_len);
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_WOW_ADD_WAKE_PATTERN_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id)
@@ -10443,6 +10476,7 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id)
struct wmi_wow_del_pattern_cmd *cmd;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd);
skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -10459,7 +10493,13 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id)
ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow del pattern vdev_id %d pattern_id %d\n",
vdev_id, pattern_id);
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_WOW_DEL_WAKE_PATTERN_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
static struct sk_buff *
@@ -10595,6 +10635,7 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id,
struct wmi_pno_scan_req_arg *pno_scan)
{
struct sk_buff *skb;
+ int ret;
if (pno_scan->enable)
skb = ath12k_wmi_op_gen_config_pno_start(ar, vdev_id, pno_scan);
@@ -10604,7 +10645,13 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id,
if (IS_ERR_OR_NULL(skb))
return -ENOMEM;
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
static void ath12k_wmi_fill_ns_offload(struct ath12k *ar,
@@ -10717,6 +10764,7 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar,
void *buf_ptr;
size_t len;
u8 ns_cnt, ns_ext_tuples = 0;
+ int ret;
ns_cnt = offload->ipv6_count;
@@ -10752,7 +10800,13 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar,
if (ns_ext_tuples)
ath12k_wmi_fill_ns_offload(ar, offload, &buf_ptr, enable, 1);
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_SET_ARP_NS_OFFLOAD_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar,
@@ -10762,7 +10816,7 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar,
struct wmi_gtk_rekey_offload_cmd *cmd;
struct sk_buff *skb;
__le64 replay_ctr;
- int len;
+ int ret, len;
len = sizeof(*cmd);
skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -10789,7 +10843,13 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar,
ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "offload gtk rekey vdev: %d %d\n",
arvif->vdev_id, enable);
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID offload\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar,
@@ -10797,7 +10857,7 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar,
{
struct wmi_gtk_rekey_offload_cmd *cmd;
struct sk_buff *skb;
- int len;
+ int ret, len;
len = sizeof(*cmd);
skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -10811,7 +10871,13 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar,
ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "get gtk rekey vdev_id: %d\n",
arvif->vdev_id);
- return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID getinfo\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_sta_keepalive(struct ath12k *ar,
@@ -10822,6 +10888,7 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar,
struct wmi_sta_keepalive_cmd *cmd;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd) + sizeof(*arp);
skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len);
@@ -10849,7 +10916,13 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar,
"wmi sta keepalive vdev %d enabled %d method %d interval %d\n",
arg->vdev_id, arg->enabled, arg->method, arg->interval);
- return ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID);
+ ret = ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to send WMI_STA_KEEPALIVE_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath12k_wmi_mlo_setup(struct ath12k *ar, struct wmi_mlo_setup_arg *mlo_params)
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index 05c9c07591fc..6ca31d4ea437 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -1738,7 +1738,8 @@ ath5k_tx_frame_completed(struct ath5k_hw *ah, struct sk_buff *skb,
}
info->status.rates[ts->ts_final_idx].count = ts->ts_final_retry;
- info->status.rates[ts->ts_final_idx + 1].idx = -1;
+ if (ts->ts_final_idx + 1 < IEEE80211_TX_MAX_RATES)
+ info->status.rates[ts->ts_final_idx + 1].idx = -1;
if (unlikely(ts->ts_status)) {
ah->stats.ack_fail++;
diff --git a/drivers/net/wireless/broadcom/b43/xmit.c b/drivers/net/wireless/broadcom/b43/xmit.c
index 7651b1bdb592..f0b082596637 100644
--- a/drivers/net/wireless/broadcom/b43/xmit.c
+++ b/drivers/net/wireless/broadcom/b43/xmit.c
@@ -702,7 +702,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
* key index, but the ucode passed it slightly different.
*/
keyidx = b43_kidx_to_raw(dev, keyidx);
- B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key));
+ if (B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key)))
+ goto drop;
if (dev->key[keyidx].algorithm != B43_SEC_ALGO_NONE) {
wlhdr_len = ieee80211_hdrlen(fctl);
diff --git a/drivers/net/wireless/broadcom/b43legacy/xmit.c b/drivers/net/wireless/broadcom/b43legacy/xmit.c
index efd63f4ce74f..ee199d4eaf03 100644
--- a/drivers/net/wireless/broadcom/b43legacy/xmit.c
+++ b/drivers/net/wireless/broadcom/b43legacy/xmit.c
@@ -476,7 +476,8 @@ void b43legacy_rx(struct b43legacy_wldev *dev,
* key index, but the ucode passed it slightly different.
*/
keyidx = b43legacy_kidx_to_raw(dev, keyidx);
- B43legacy_WARN_ON(keyidx >= dev->max_nr_keys);
+ if (B43legacy_WARN_ON(keyidx >= dev->max_nr_keys))
+ goto drop;
if (dev->key[keyidx].algorithm != B43legacy_SEC_ALGO_NONE) {
/* Remove PROTECTED flag to mark it as decrypted. */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 30f6fcb68632..8fb595733b9c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -2476,8 +2476,9 @@ static void brcmf_sdio_bus_stop(struct device *dev)
brcmf_dbg(TRACE, "Enter\n");
if (bus->watchdog_tsk) {
+ get_task_struct(bus->watchdog_tsk);
send_sig(SIGTERM, bus->watchdog_tsk, 1);
- kthread_stop(bus->watchdog_tsk);
+ kthread_stop_put(bus->watchdog_tsk);
bus->watchdog_tsk = NULL;
}
@@ -4567,8 +4568,9 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus)
if (bus) {
/* Stop watchdog task */
if (bus->watchdog_tsk) {
+ get_task_struct(bus->watchdog_tsk);
send_sig(SIGTERM, bus->watchdog_tsk, 1);
- kthread_stop(bus->watchdog_tsk);
+ kthread_stop_put(bus->watchdog_tsk);
bus->watchdog_tsk = NULL;
}
diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c
index 4fae0e335136..5cc0c5cac257 100644
--- a/drivers/net/wireless/marvell/libertas/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas/if_usb.c
@@ -310,6 +310,7 @@ static void if_usb_disconnect(struct usb_interface *intf)
struct lbs_private *priv = cardp->priv;
cardp->surprise_removed = 1;
+ wake_up(&cardp->fw_wq);
if (priv) {
lbs_stop_card(priv);
@@ -633,9 +634,10 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
unsigned long flags;
u8 i;
- if (recvlength > LBS_CMD_BUFFER_SIZE) {
+ if (recvlength < MESSAGE_HEADER_LEN ||
+ recvlength > LBS_CMD_BUFFER_SIZE) {
lbs_deb_usbd(&cardp->udev->dev,
- "The receive buffer is too large\n");
+ "The receive buffer is invalid: %d\n", recvlength);
kfree_skb(skb);
return;
}
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index 591602beeec6..3cdf9ded876d 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -70,12 +70,11 @@ static inline int rsi_create_kthread(struct rsi_common *common,
return 0;
}
-static inline int rsi_kill_thread(struct rsi_thread *handle)
+static inline void rsi_kill_thread(struct rsi_thread *handle)
{
atomic_inc(&handle->thread_done);
rsi_set_event(&handle->event);
-
- return kthread_stop(handle->task);
+ wait_for_completion(&handle->completion);
}
void rsi_mac80211_detach(struct rsi_hw *hw);
diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c
index 84eb15d729c7..120f0379f81d 100644
--- a/drivers/net/wireless/st/cw1200/pm.c
+++ b/drivers/net/wireless/st/cw1200/pm.c
@@ -264,14 +264,12 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
wiphy_err(priv->hw->wiphy,
"PM request failed: %d. WoW is disabled.\n", ret);
cw1200_wow_resume(hw);
- mutex_unlock(&priv->conf_mutex);
return -EBUSY;
}
/* Force resume if event is coming from the device. */
if (atomic_read(&priv->bh_rx)) {
cw1200_wow_resume(hw);
- mutex_unlock(&priv->conf_mutex);
return -EAGAIN;
}
diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
index 7968e208dd37..adb29d30c63f 100644
--- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c
+++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
@@ -457,8 +457,20 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf
offset = sizeof(struct feature_query);
for (i = 0; i < FEATURE_COUNT && offset < data_length; i++) {
+ size_t remaining = data_length - offset;
+ size_t feat_data_len, feat_total;
+
+ if (remaining < sizeof(*rt_feature))
+ break;
+
rt_feature = data + offset;
- offset += sizeof(*rt_feature) + le32_to_cpu(rt_feature->data_len);
+ feat_data_len = le32_to_cpu(rt_feature->data_len);
+
+ if (feat_data_len > remaining - sizeof(*rt_feature))
+ break;
+
+ feat_total = sizeof(*rt_feature) + feat_data_len;
+ offset += feat_total;
ft_spt_cfg = FIELD_GET(FEATURE_MSK, core->feature_set[i]);
if (ft_spt_cfg != MTK_FEATURE_MUST_BE_SUPPORTED)
@@ -468,8 +480,10 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf
if (ft_spt_st != MTK_FEATURE_MUST_BE_SUPPORTED)
return -EINVAL;
- if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM)
- t7xx_port_enum_msg_handler(ctl->md, rt_feature->data);
+ if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) {
+ t7xx_port_enum_msg_handler(ctl->md, rt_feature->data,
+ feat_data_len);
+ }
}
return 0;
diff --git a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c
index ae632ef96698..f869e4ed9ee9 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c
+++ b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c
@@ -117,6 +117,7 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c
* t7xx_port_enum_msg_handler() - Parse the port enumeration message to create/remove nodes.
* @md: Modem context.
* @msg: Message.
+ * @msg_len: Length of @msg in bytes.
*
* Used to control create/remove device node.
*
@@ -124,12 +125,18 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c
* * 0 - Success.
* * -EFAULT - Message check failure.
*/
-int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg)
+int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len)
{
struct device *dev = &md->t7xx_dev->pdev->dev;
unsigned int version, port_count, i;
struct port_msg *port_msg = msg;
+ if (msg_len < sizeof(*port_msg)) {
+ dev_err(dev, "Port enum msg too short for header: need %zu, have %zu\n",
+ sizeof(*port_msg), msg_len);
+ return -EINVAL;
+ }
+
version = FIELD_GET(PORT_MSG_VERSION, le32_to_cpu(port_msg->info));
if (version != PORT_ENUM_VER ||
le32_to_cpu(port_msg->head_pattern) != PORT_ENUM_HEAD_PATTERN ||
@@ -141,6 +148,13 @@ int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg)
}
port_count = FIELD_GET(PORT_MSG_PRT_CNT, le32_to_cpu(port_msg->info));
+
+ if (msg_len < struct_size(port_msg, data, port_count)) {
+ dev_err(dev, "Port enum msg too short: need %zu, have %zu\n",
+ struct_size(port_msg, data, port_count), msg_len);
+ return -EINVAL;
+ }
+
for (i = 0; i < port_count; i++) {
u32 port_info = le32_to_cpu(port_msg->data[i]);
unsigned int ch_id;
@@ -191,7 +205,7 @@ static int control_msg_handler(struct t7xx_port *port, struct sk_buff *skb)
case CTL_ID_PORT_ENUM:
skb_pull(skb, sizeof(*ctrl_msg_h));
- ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data);
+ ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data, skb->len);
if (!ret)
ret = port_ctl_send_msg_to_md(port, CTL_ID_PORT_ENUM, 0);
else
diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.h b/drivers/net/wwan/t7xx/t7xx_port_proxy.h
index f0918b36e899..7c3190bf0fcf 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_proxy.h
+++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.h
@@ -103,7 +103,7 @@ void t7xx_port_proxy_reset(struct port_proxy *port_prox);
void t7xx_port_proxy_uninit(struct port_proxy *port_prox);
int t7xx_port_proxy_init(struct t7xx_modem *md);
void t7xx_port_proxy_md_status_notify(struct port_proxy *port_prox, unsigned int state);
-int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg);
+int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len);
int t7xx_port_proxy_chl_enable_disable(struct port_proxy *port_prox, unsigned int ch_id,
bool en_flag);
void t7xx_port_proxy_set_cfg(struct t7xx_modem *md, enum port_cfg_id cfg_id);
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 77c778d84d4c..a81b46af5118 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -146,6 +146,9 @@ struct xt_match {
/* Called when user tries to insert an entry of this type. */
int (*checkentry)(const struct xt_mtchk_param *);
+ /* Called to validate hooks based on the match configuration. */
+ int (*check_hooks)(const struct xt_mtchk_param *);
+
/* Called when entry of this type deleted. */
void (*destroy)(const struct xt_mtdtor_param *);
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -187,6 +190,9 @@ struct xt_target {
/* Should return 0 on success or an error code otherwise (-Exxxx). */
int (*checkentry)(const struct xt_tgchk_param *);
+ /* Called to validate hooks based on the target configuration. */
+ int (*check_hooks)(const struct xt_tgchk_param *);
+
/* Called when entry of this type deleted. */
void (*destroy)(const struct xt_tgdtor_param *);
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -279,8 +285,10 @@ bool xt_find_jump_offset(const unsigned int *offsets,
int xt_check_proc_name(const char *name, unsigned int size);
+int xt_check_hooks_match(struct xt_mtchk_param *par);
int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto,
bool inv_proto);
+int xt_check_hooks_target(struct xt_tgchk_param *par);
int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto,
bool inv_proto);
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index dc3975ff1b2e..cf0fd03dd7a2 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -21,6 +21,11 @@ enum hk_type {
HK_TYPE_MAX,
/*
+ * HK_TYPE_KTHREAD is now an alias of HK_TYPE_DOMAIN
+ */
+ HK_TYPE_KTHREAD = HK_TYPE_DOMAIN,
+
+ /*
* The following housekeeping types are only set by the nohz_full
* boot commandline option. So they can share the same value.
*/
@@ -29,7 +34,6 @@ enum hk_type {
HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE,
HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE,
HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE,
- HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE
};
#ifdef CONFIG_CPU_ISOLATION
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a7bffb908c1e..aa600fbf9a53 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -2495,7 +2495,7 @@ void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
bdaddr_t *bdaddr, u8 addr_type);
int hci_abort_conn(struct hci_conn *conn, u8 reason);
-u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
+void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
u16 to_multiplier);
void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
__u8 ltk[16], __u8 key_size);
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index e0ca3904ff8e..2f312d1f67d6 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -99,6 +99,7 @@
FN(FRAG_TOO_FAR) \
FN(TCP_MINTTL) \
FN(IPV6_BAD_EXTHDR) \
+ FN(IPV6_TOO_MANY_EXTHDRS) \
FN(IPV6_NDISC_FRAG) \
FN(IPV6_NDISC_HOP_LIMIT) \
FN(IPV6_NDISC_BAD_CODE) \
@@ -494,6 +495,11 @@ enum skb_drop_reason {
SKB_DROP_REASON_TCP_MINTTL,
/** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */
SKB_DROP_REASON_IPV6_BAD_EXTHDR,
+ /**
+ * @SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS: Number of IPv6 extension
+ * headers in the packet exceeds IP6_MAX_EXT_HDRS_CNT.
+ */
+ SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS,
/** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */
SKB_DROP_REASON_IPV6_NDISC_FRAG,
/** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 72d325c81313..02762ce73a0c 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -491,6 +491,7 @@ struct ip_vs_est_kt_data {
DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */
unsigned long est_timer; /* estimation timer (jiffies) */
struct ip_vs_stats *calc_stats; /* Used for calculation */
+ int needed; /* task is needed */
int tick_len[IPVS_EST_NTICKS]; /* est count */
int id; /* ktid per netns */
int chain_max; /* max ests per tick chain */
@@ -1411,7 +1412,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
return ipvs->sysctl_run_estimation;
}
-static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
+static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs)
{
if (ipvs->est_cpulist_valid)
return ipvs->sysctl_est_cpulist;
@@ -1529,7 +1530,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
return 1;
}
-static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
+static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs)
{
return housekeeping_cpumask(HK_TYPE_KTHREAD);
}
@@ -1564,6 +1565,18 @@ static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs)
return READ_ONCE(ipvs->sysctl_svc_lfactor);
}
+static inline bool sysctl_est_cpulist_empty(struct netns_ipvs *ipvs)
+{
+ guard(rcu)();
+ return cpumask_empty(__sysctl_est_cpulist(ipvs));
+}
+
+static inline unsigned int sysctl_est_cpulist_weight(struct netns_ipvs *ipvs)
+{
+ guard(rcu)();
+ return cpumask_weight(__sysctl_est_cpulist(ipvs));
+}
+
/* IPVS core functions
* (from ip_vs_core.c)
*/
@@ -1884,18 +1897,26 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
void ip_vs_zero_estimator(struct ip_vs_stats *stats);
void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
-void ip_vs_est_reload_start(struct netns_ipvs *ipvs);
+void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart);
int ip_vs_est_kthread_start(struct netns_ipvs *ipvs,
struct ip_vs_est_kt_data *kd);
void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd);
+static inline void ip_vs_stop_estimator_tot_stats(struct netns_ipvs *ipvs)
+{
+#ifdef CONFIG_SYSCTL
+ ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);
+ ipvs->tot_stats->s.est.ktid = -2;
+#endif
+}
+
static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs)
{
#ifdef CONFIG_SYSCTL
/* Stop tasks while cpulist is empty or if disabled with flag */
ipvs->est_stopped = !sysctl_run_estimation(ipvs) ||
(ipvs->est_cpulist_valid &&
- cpumask_empty(sysctl_est_cpulist(ipvs)));
+ sysctl_est_cpulist_empty(ipvs));
#endif
}
@@ -1911,7 +1932,7 @@ static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs)
static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs)
{
unsigned int limit = IPVS_EST_CPU_KTHREADS *
- cpumask_weight(sysctl_est_cpulist(ipvs));
+ sysctl_est_cpulist_weight(ipvs);
return max(1U, limit);
}
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d042afe7a245..1dec81faff28 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -90,6 +90,9 @@ struct ip_tunnel_info;
#define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */
#define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */
+/* Hard limit on traversed IPv6 extension headers */
+#define IP6_MAX_EXT_HDRS_CNT 12
+
/*
* Addr type
*
diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h
index 5199b41497ff..dbabcfb95daf 100644
--- a/include/net/mana/shm_channel.h
+++ b/include/net/mana/shm_channel.h
@@ -4,6 +4,12 @@
#ifndef _SHM_CHANNEL_H
#define _SHM_CHANNEL_H
+#define SMC_APERTURE_BITS 256
+#define SMC_BASIC_UNIT (sizeof(u32))
+#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8))
+#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1)
+#define SMC_APERTURE_SIZE (SMC_APERTURE_BITS / 8)
+
struct shm_channel {
struct device *dev;
void __iomem *base;
diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h
index b175d271aec9..609bcf422a9b 100644
--- a/include/net/netfilter/nf_dup_netdev.h
+++ b/include/net/netfilter/nf_dup_netdev.h
@@ -3,10 +3,23 @@
#define _NF_DUP_NETDEV_H_
#include <net/netfilter/nf_tables.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif);
void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif);
+#define NF_RECURSION_LIMIT 2
+
+static inline u8 *nf_get_nf_dup_skb_recursion(void)
+{
+#ifndef CONFIG_PREEMPT_RT
+ return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion);
+#else
+ return &current->net_xmit.nf_dup_skb_recursion;
+#endif
+}
+
struct nft_offload_ctx;
struct nft_flow_rule;
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index b09c11c048d5..7b23b245a5a8 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -148,9 +148,10 @@ struct flow_offload_tuple {
/* All members above are keys for lookups, see flow_offload_hash(). */
struct { } __hash;
- u8 dir:2,
+ u16 dir:2,
xmit_type:3,
encap_num:2,
+ needs_gso_segment:1,
tun_num:2,
in_vlan_ingress:2;
u16 mtu;
@@ -232,6 +233,7 @@ struct nf_flow_route {
u32 hw_ifindex;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
+ u8 needs_gso_segment:1;
} out;
enum flow_offload_xmit_type xmit_type;
} tuple[FLOW_OFFLOAD_DIR_MAX];
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 80ccd4dda8e0..6e27c56514df 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -275,7 +275,7 @@ struct netns_ipv4 {
#ifdef CONFIG_IP_MROUTE
#ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
- struct mr_table *mrt;
+ struct mr_table __rcu *mrt;
#else
struct list_head mr_tables;
struct fib_rules_ops *mr_rules_ops;
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index d44987d4515c..853c8d7644b5 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -330,11 +330,18 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
goto badframe;
break;
case BNEP_FILTER_MULTI_ADDR_SET:
- case BNEP_FILTER_NET_TYPE_SET:
- /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */
- if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2))
+ case BNEP_FILTER_NET_TYPE_SET: {
+ u8 *hdr;
+
+ /* Pull ctrl type (1 b) + len (2 b) */
+ hdr = skb_pull_data(skb, 3);
+ if (!hdr)
+ goto badframe;
+ /* Pull data (len bytes); length is big-endian */
+ if (!skb_pull(skb, get_unaligned_be16(&hdr[1])))
goto badframe;
break;
+ }
default:
kfree_skb(skb);
return 0;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 3a0592599086..17b46ad6a349 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -480,40 +480,107 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
return hci_setup_sync_conn(conn, handle);
}
-u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
- u16 to_multiplier)
+struct le_conn_update_data {
+ struct hci_conn *conn;
+ u16 min;
+ u16 max;
+ u16 latency;
+ u16 to_multiplier;
+};
+
+static int le_conn_update_sync(struct hci_dev *hdev, void *data)
{
- struct hci_dev *hdev = conn->hdev;
+ struct le_conn_update_data *d = data;
+ struct hci_conn *conn = d->conn;
struct hci_conn_params *params;
struct hci_cp_le_conn_update cp;
+ u16 timeout;
+ u8 store_hint;
+ int err;
+ /* Verify connection is still alive and read conn fields under
+ * the same lock to prevent a concurrent disconnect from freeing
+ * or reusing the connection while we build the HCI command.
+ */
hci_dev_lock(hdev);
- params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
- if (params) {
- params->conn_min_interval = min;
- params->conn_max_interval = max;
- params->conn_latency = latency;
- params->supervision_timeout = to_multiplier;
+ if (!hci_conn_valid(hdev, conn)) {
+ hci_dev_unlock(hdev);
+ return -ECANCELED;
}
- hci_dev_unlock(hdev);
-
memset(&cp, 0, sizeof(cp));
cp.handle = cpu_to_le16(conn->handle);
- cp.conn_interval_min = cpu_to_le16(min);
- cp.conn_interval_max = cpu_to_le16(max);
- cp.conn_latency = cpu_to_le16(latency);
- cp.supervision_timeout = cpu_to_le16(to_multiplier);
+ cp.conn_interval_min = cpu_to_le16(d->min);
+ cp.conn_interval_max = cpu_to_le16(d->max);
+ cp.conn_latency = cpu_to_le16(d->latency);
+ cp.supervision_timeout = cpu_to_le16(d->to_multiplier);
cp.min_ce_len = cpu_to_le16(0x0000);
cp.max_ce_len = cpu_to_le16(0x0000);
+ timeout = conn->conn_timeout;
+
+ hci_dev_unlock(hdev);
- hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp);
+ err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CONN_UPDATE,
+ sizeof(cp), &cp,
+ HCI_EV_LE_CONN_UPDATE_COMPLETE,
+ timeout, NULL);
+ if (err)
+ return err;
+
+ /* Update stored connection parameters after the controller has
+ * confirmed the update via the LE Connection Update Complete event.
+ */
+ hci_dev_lock(hdev);
+
+ params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
+ if (params) {
+ params->conn_min_interval = d->min;
+ params->conn_max_interval = d->max;
+ params->conn_latency = d->latency;
+ params->supervision_timeout = d->to_multiplier;
+ store_hint = 0x01;
+ } else {
+ store_hint = 0x00;
+ }
- if (params)
- return 0x01;
+ hci_dev_unlock(hdev);
- return 0x00;
+ mgmt_new_conn_param(hdev, &conn->dst, conn->dst_type, store_hint,
+ d->min, d->max, d->latency, d->to_multiplier);
+
+ return 0;
+}
+
+static void le_conn_update_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct le_conn_update_data *d = data;
+
+ hci_conn_put(d->conn);
+ kfree(d);
+}
+
+void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
+ u16 to_multiplier)
+{
+ struct le_conn_update_data *d;
+
+ d = kzalloc_obj(*d);
+ if (!d)
+ return;
+
+ hci_conn_get(conn);
+ d->conn = conn;
+ d->min = min;
+ d->max = max;
+ d->latency = latency;
+ d->to_multiplier = to_multiplier;
+
+ if (hci_cmd_sync_queue(conn->hdev, le_conn_update_sync, d,
+ le_conn_update_complete) < 0) {
+ hci_conn_put(conn);
+ kfree(d);
+ }
}
void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
@@ -2130,6 +2197,9 @@ static int create_big_sync(struct hci_dev *hdev, void *data)
u32 flags = 0;
int err;
+ if (!hci_conn_valid(hdev, conn))
+ return -ECANCELED;
+
if (qos->bcast.out.phys == BIT(1))
flags |= MGMT_ADV_FLAG_SEC_2M;
@@ -2204,11 +2274,24 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "conn %p", conn);
+ if (err == -ECANCELED)
+ goto done;
+
+ hci_dev_lock(hdev);
+
+ if (!hci_conn_valid(hdev, conn))
+ goto unlock;
+
if (err) {
bt_dev_err(hdev, "Unable to create BIG: %d", err);
hci_connect_cfm(conn, err);
hci_conn_del(conn);
}
+
+unlock:
+ hci_dev_unlock(hdev);
+done:
+ hci_conn_put(conn);
}
struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid,
@@ -2336,10 +2419,11 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
BT_BOUND, &data);
/* Queue start periodic advertising and create BIG */
- err = hci_cmd_sync_queue(hdev, create_big_sync, conn,
+ err = hci_cmd_sync_queue(hdev, create_big_sync, hci_conn_get(conn),
create_big_complete);
if (err < 0) {
hci_conn_drop(conn);
+ hci_conn_put(conn);
return ERR_PTR(err);
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b2ee6b6a0f56..eea2f810aafa 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -7118,9 +7118,29 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
continue;
}
+ if (ev->num_bis <= i) {
+ bt_dev_err(hdev,
+ "Not enough BIS handles for BIG 0x%2.2x",
+ ev->handle);
+ ev->status = HCI_ERROR_UNSPECIFIED;
+ hci_connect_cfm(conn, ev->status);
+ hci_conn_del(conn);
+ continue;
+ }
+
if (hci_conn_set_handle(conn,
- __le16_to_cpu(ev->bis_handle[i++])))
+ __le16_to_cpu(ev->bis_handle[i++]))) {
+ bt_dev_err(hdev,
+ "Failed to set BIS handle for BIG 0x%2.2x",
+ ev->handle);
+ /* Force error so BIG gets terminated as not all BIS
+ * could be connected.
+ */
+ ev->status = HCI_ERROR_UNSPECIFIED;
+ hci_connect_cfm(conn, ev->status);
+ hci_conn_del(conn);
continue;
+ }
conn->state = BT_CONNECTED;
set_bit(HCI_CONN_BIG_CREATED, &conn->flags);
@@ -7129,7 +7149,10 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
hci_iso_setup_path(conn);
}
- if (!ev->status && !i)
+ /* If there is an unexpected error or if no BISes have been connected
+ * for the BIG, terminate it.
+ */
+ if (ev->status == HCI_ERROR_UNSPECIFIED || (!ev->status && !i))
/* If no BISes have been connected for the BIG,
* terminate. This is in case all bound connections
* have been closed before the BIG creation
@@ -7168,7 +7191,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
conn->num_bis = 0;
- memset(conn->bis, 0, sizeof(conn->num_bis));
+ memset(conn->bis, 0, sizeof(conn->bis));
for (i = 0; i < ev->num_bis; i++) {
u16 handle = le16_to_cpu(ev->bis[i]);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 7bcf8c5ceaee..976f91eeb745 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1036,6 +1036,28 @@ static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr)
}
/*
+ * Consume session->conn: clear the member under hidp_session_sem, then
+ * l2cap_unregister_user() and l2cap_conn_put() the snapshot outside the
+ * sem. At most one caller wins; later callers see NULL and skip. The
+ * reference is the one hidp_session_new() took via l2cap_conn_get().
+ */
+static void hidp_session_unregister_conn(struct hidp_session *session)
+{
+ struct l2cap_conn *conn;
+
+ down_write(&hidp_session_sem);
+ conn = session->conn;
+ if (conn)
+ session->conn = NULL;
+ up_write(&hidp_session_sem);
+
+ if (conn) {
+ l2cap_unregister_user(conn, &session->user);
+ l2cap_conn_put(conn);
+ }
+}
+
+/*
* Start session synchronously
* This starts a session thread and waits until initialization
* is done or returns an error if it couldn't be started.
@@ -1311,8 +1333,7 @@ static int hidp_session_thread(void *arg)
* Instead, this call has the same semantics as if user-space tried to
* delete the session.
*/
- if (session->conn)
- l2cap_unregister_user(session->conn, &session->user);
+ hidp_session_unregister_conn(session);
hidp_session_put(session);
@@ -1418,7 +1439,7 @@ int hidp_connection_del(struct hidp_conndel_req *req)
HIDP_CTRL_VIRTUAL_CABLE_UNPLUG,
NULL, 0);
else
- l2cap_unregister_user(session->conn, &session->user);
+ hidp_session_unregister_conn(session);
hidp_session_put(session);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index be145e2736b7..7cb2864fe872 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -347,6 +347,7 @@ static int iso_connect_bis(struct sock *sk)
return -EHOSTUNREACH;
hci_dev_lock(hdev);
+ lock_sock(sk);
if (!bis_capable(hdev)) {
err = -EOPNOTSUPP;
@@ -399,13 +400,9 @@ static int iso_connect_bis(struct sock *sk)
goto unlock;
}
- lock_sock(sk);
-
err = iso_chan_add(conn, sk, NULL);
- if (err) {
- release_sock(sk);
+ if (err)
goto unlock;
- }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -421,9 +418,8 @@ static int iso_connect_bis(struct sock *sk)
iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo));
}
- release_sock(sk);
-
unlock:
+ release_sock(sk);
hci_dev_unlock(hdev);
hci_dev_put(hdev);
return err;
@@ -444,6 +440,7 @@ static int iso_connect_cis(struct sock *sk)
return -EHOSTUNREACH;
hci_dev_lock(hdev);
+ lock_sock(sk);
if (!cis_central_capable(hdev)) {
err = -EOPNOTSUPP;
@@ -498,13 +495,9 @@ static int iso_connect_cis(struct sock *sk)
goto unlock;
}
- lock_sock(sk);
-
err = iso_chan_add(conn, sk, NULL);
- if (err) {
- release_sock(sk);
+ if (err)
goto unlock;
- }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -520,9 +513,8 @@ static int iso_connect_cis(struct sock *sk)
iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo));
}
- release_sock(sk);
-
unlock:
+ release_sock(sk);
hci_dev_unlock(hdev);
hci_dev_put(hdev);
return err;
@@ -1193,7 +1185,7 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr,
release_sock(sk);
- if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY))
+ if (bacmp(&sa->iso_bdaddr, BDADDR_ANY))
err = iso_connect_cis(sk);
else
err = iso_connect_bis(sk);
@@ -2256,8 +2248,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN,
iso_match_sid, ev1);
if (sk && !ev1->status) {
+ lock_sock(sk);
iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
iso_pi(sk)->bc_sid = ev1->sid;
+ release_sock(sk);
}
goto done;
@@ -2268,8 +2262,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN,
iso_match_sid_past, ev1a);
if (sk && !ev1a->status) {
+ lock_sock(sk);
iso_pi(sk)->sync_handle = le16_to_cpu(ev1a->sync_handle);
iso_pi(sk)->bc_sid = ev1a->sid;
+ release_sock(sk);
}
goto done;
@@ -2296,27 +2292,35 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev2);
if (sk) {
- int err;
- struct hci_conn *hcon = iso_pi(sk)->conn->hcon;
+ int err = 0;
+ bool big_sync;
+ struct hci_conn *hcon;
+ lock_sock(sk);
+
+ hcon = iso_pi(sk)->conn->hcon;
iso_pi(sk)->qos.bcast.encryption = ev2->encryption;
if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
iso_pi(sk)->bc_num_bis = ev2->num_bis;
- if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
- !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
+ big_sync = !test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
+ !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags);
+
+ if (big_sync)
err = hci_conn_big_create_sync(hdev, hcon,
&iso_pi(sk)->qos,
iso_pi(sk)->sync_handle,
iso_pi(sk)->bc_num_bis,
iso_pi(sk)->bc_bis);
- if (err) {
- bt_dev_err(hdev, "hci_le_big_create_sync: %d",
- err);
- sock_put(sk);
- sk = NULL;
- }
+
+ release_sock(sk);
+
+ if (big_sync && err) {
+ bt_dev_err(hdev, "hci_le_big_create_sync: %d",
+ err);
+ sock_put(sk);
+ sk = NULL;
}
}
@@ -2370,8 +2374,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (!base || base_len > BASE_MAX_LENGTH)
goto done;
+ lock_sock(sk);
memcpy(iso_pi(sk)->base, base, base_len);
iso_pi(sk)->base_len = base_len;
+ release_sock(sk);
} else {
/* This is a PA data fragment. Keep pa_data_len set to 0
* until all data has been reassembled.
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 77dec104a9c3..7701528f1167 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4706,16 +4706,8 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP,
sizeof(rsp), &rsp);
- if (!err) {
- u8 store_hint;
-
- store_hint = hci_le_conn_update(hcon, min, max, latency,
- to_multiplier);
- mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type,
- store_hint, min, max, latency,
- to_multiplier);
-
- }
+ if (!err)
+ hci_le_conn_update(hcon, min, max, latency, to_multiplier);
return 0;
}
@@ -5428,7 +5420,7 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn,
* configured, the MPS field may be less than the current MPS
* of that channel.
*/
- if (chan[i]->remote_mps >= mps && i) {
+ if (chan[i]->remote_mps > mps && num_scid > 1) {
BT_ERR("chan %p decreased MPS %u -> %u", chan[i],
chan[i]->remote_mps, mps);
result = L2CAP_RECONF_INVALID_MPS;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 71e8c1b45bce..cf590a67d364 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1498,6 +1498,9 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
{
struct sock *sk, *parent = chan->data;
+ if (!parent)
+ return NULL;
+
lock_sock(parent);
/* Check for backlog size */
@@ -1657,6 +1660,9 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state,
{
struct sock *sk = chan->data;
+ if (!sk)
+ return;
+
sk->sk_state = state;
if (err)
@@ -1758,6 +1764,9 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (!sk)
+ return 0;
+
return READ_ONCE(sk->sk_sndtimeo);
}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 611a9a94151e..d11bd5337d57 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1715,9 +1715,12 @@ static int rfcomm_recv_data(struct rfcomm_session *s, u8 dlci, int pf, struct sk
}
if (pf && d->cfc) {
- u8 credits = *(u8 *) skb->data; skb_pull(skb, 1);
+ u8 *credits = skb_pull_data(skb, 1);
- d->tx_credits += credits;
+ if (!credits)
+ goto drop;
+
+ d->tx_credits += *credits;
if (d->tx_credits)
clear_bit(RFCOMM_TX_THROTTLED, &d->flags);
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 18826d4b9c0b..eba44525d41d 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -472,9 +472,13 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src)
sk1 = sk;
}
+ sk = sk ? sk : sk1;
+ if (sk)
+ sock_hold(sk);
+
read_unlock(&sco_sk_list.lock);
- return sk ? sk : sk1;
+ return sk;
}
static void sco_sock_destruct(struct sock *sk)
@@ -515,11 +519,13 @@ static void sco_sock_kill(struct sock *sk)
BT_DBG("sk %p state %d", sk, sk->sk_state);
/* Sock is dead, so set conn->sk to NULL to avoid possible UAF */
+ lock_sock(sk);
if (sco_pi(sk)->conn) {
sco_conn_lock(sco_pi(sk)->conn);
sco_pi(sk)->conn->sk = NULL;
sco_conn_unlock(sco_pi(sk)->conn);
}
+ release_sock(sk);
/* Kill poor orphan */
bt_sock_unlink(&sco_sk_list, sk);
@@ -1365,40 +1371,51 @@ static int sco_sock_release(struct socket *sock)
static void sco_conn_ready(struct sco_conn *conn)
{
- struct sock *parent;
- struct sock *sk = conn->sk;
+ struct sock *parent, *sk;
+
+ sco_conn_lock(conn);
+ sk = sco_sock_hold(conn);
+ sco_conn_unlock(conn);
BT_DBG("conn %p", conn);
if (sk) {
lock_sock(sk);
- sco_sock_clear_timer(sk);
- sk->sk_state = BT_CONNECTED;
- sk->sk_state_change(sk);
+
+ /* conn->sk may have become NULL if racing with sk close, but
+ * due to held hdev->lock, it can't become different sk.
+ */
+ if (conn->sk) {
+ sco_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECTED;
+ sk->sk_state_change(sk);
+ }
+
release_sock(sk);
+ sock_put(sk);
} else {
- sco_conn_lock(conn);
-
- if (!conn->hcon) {
- sco_conn_unlock(conn);
+ if (!conn->hcon)
return;
- }
+
+ lockdep_assert_held(&conn->hcon->hdev->lock);
parent = sco_get_sock_listen(&conn->hcon->src);
- if (!parent) {
- sco_conn_unlock(conn);
+ if (!parent)
return;
- }
lock_sock(parent);
+ sco_conn_lock(conn);
+
+ /* hdev->lock guarantees conn->sk == NULL still here */
+
+ if (parent->sk_state != BT_LISTEN)
+ goto release;
+
sk = sco_sock_alloc(sock_net(parent), NULL,
BTPROTO_SCO, GFP_ATOMIC, 0);
- if (!sk) {
- release_sock(parent);
- sco_conn_unlock(conn);
- return;
- }
+ if (!sk)
+ goto release;
sco_sock_init(sk, parent);
@@ -1417,9 +1434,10 @@ static void sco_conn_ready(struct sco_conn *conn)
/* Wake up parent */
parent->sk_data_ready(parent);
- release_sock(parent);
-
+release:
sco_conn_unlock(conn);
+ release_sock(parent);
+ sock_put(parent);
}
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 06c195906231..8bfa8313ef62 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,7 +371,7 @@ static void netdev_name_node_alt_free(struct rcu_head *head)
static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
netdev_name_node_del(name_node);
- list_del(&name_node->list);
+ list_del_rcu(&name_node->list);
call_rcu(&name_node->rcu, netdev_name_node_alt_free);
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4381e0fc25bf..84faace50ac2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -608,14 +608,16 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
/*
* Returns a pointer to a string representation of the identifier used
* to select the egress interface for the given netpoll instance. buf
- * must be a buffer of length at least MAC_ADDR_STR_LEN + 1.
+ * is used to format np->dev_mac when np->dev_name is empty; bufsz must
+ * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address
+ * and its NUL terminator.
*/
-static char *egress_dev(struct netpoll *np, char *buf)
+static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz)
{
if (np->dev_name[0])
return np->dev_name;
- snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac);
+ snprintf(buf, bufsz, "%pM", np->dev_mac);
return buf;
}
@@ -645,7 +647,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev)
if (!IS_ENABLED(CONFIG_IPV6)) {
np_err(np, "IPv6 is not supported %s, aborting\n",
- egress_dev(np, buf));
+ egress_dev(np, buf, sizeof(buf)));
return -EINVAL;
}
@@ -667,7 +669,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev)
}
if (err) {
np_err(np, "no IPv6 address for %s, aborting\n",
- egress_dev(np, buf));
+ egress_dev(np, buf, sizeof(buf)));
return err;
}
@@ -687,14 +689,14 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev)
in_dev = __in_dev_get_rtnl(ndev);
if (!in_dev) {
np_err(np, "no IP address for %s, aborting\n",
- egress_dev(np, buf));
+ egress_dev(np, buf, sizeof(buf)));
return -EDESTADDRREQ;
}
ifa = rtnl_dereference(in_dev->ifa_list);
if (!ifa) {
np_err(np, "no IP address for %s, aborting\n",
- egress_dev(np, buf));
+ egress_dev(np, buf, sizeof(buf)));
return -EDESTADDRREQ;
}
@@ -736,7 +738,8 @@ int netpoll_setup(struct netpoll *np)
ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac);
if (!ndev) {
- np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf));
+ np_err(np, "%s doesn't exist, aborting\n",
+ egress_dev(np, buf, sizeof(buf)));
err = -ENODEV;
goto unlock;
}
@@ -744,14 +747,14 @@ int netpoll_setup(struct netpoll *np)
if (netdev_master_upper_dev_get(ndev)) {
np_err(np, "%s is a slave device, aborting\n",
- egress_dev(np, buf));
+ egress_dev(np, buf, sizeof(buf)));
err = -EBUSY;
goto put;
}
if (!netif_running(ndev)) {
np_info(np, "device %s not up yet, forcing it\n",
- egress_dev(np, buf));
+ egress_dev(np, buf, sizeof(buf)));
err = dev_open(ndev, NULL);
if (err) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b613bb6e07df..df042da422ef 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1572,6 +1572,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
port_guid.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+ memset(&vf_broadcast, 0, sizeof(vf_broadcast));
memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len);
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 5fb812443a08..4366cbac3f06 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -124,9 +124,14 @@ static void ah_output_done(void *data, int err)
struct iphdr *top_iph = ip_hdr(skb);
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
int ihl = ip_hdrlen(skb);
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ if (x->props.flags & XFRM_STATE_ESN)
+ seqhi_len = sizeof(*seqhi);
iph = AH_SKB_CB(skb)->tmp;
- icv = ah_tmp_icv(iph, ihl);
+ seqhi = (__be32 *)((char *)iph + ihl);
+ icv = ah_tmp_icv(seqhi, seqhi_len);
memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
top_iph->tos = iph->tos;
@@ -270,12 +275,17 @@ static void ah_input_done(void *data, int err)
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
int ihl = ip_hdrlen(skb);
int ah_hlen = (ah->hdrlen + 2) << 2;
+ int seqhi_len = 0;
+ __be32 *seqhi;
if (err)
goto out;
+ if (x->props.flags & XFRM_STATE_ESN)
+ seqhi_len = sizeof(*seqhi);
work_iph = AH_SKB_CB(skb)->tmp;
- auth_data = ah_tmp_auth(work_iph, ihl);
+ seqhi = (__be32 *)((char *)work_iph + ihl);
+ auth_data = ah_tmp_auth(seqhi, seqhi_len);
icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len);
err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 6dfc0bcdef65..6a5febbdbee4 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -873,7 +873,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
nfrags = 1;
goto skip_cow;
- } else if (!skb_has_frag_list(skb)) {
+ } else if (!skb_has_frag_list(skb) &&
+ !skb_has_shared_frag(skb)) {
nfrags = skb_shinfo(skb)->nr_frags;
nfrags++;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a674fb44ec25..a9ad39064f3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -122,16 +122,29 @@
* contradict to specs provided this delay is small enough.
*/
-#define IGMP_V1_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
- IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
- ((in_dev)->mr_v1_seen && \
- time_before(jiffies, (in_dev)->mr_v1_seen)))
-#define IGMP_V2_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
- IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
- ((in_dev)->mr_v2_seen && \
- time_before(jiffies, (in_dev)->mr_v2_seen)))
+static bool IGMP_V1_SEEN(const struct in_device *in_dev)
+{
+ unsigned long seen;
+
+ if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1)
+ return true;
+ if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1)
+ return true;
+ seen = READ_ONCE(in_dev->mr_v1_seen);
+ return seen && time_before(jiffies, seen);
+}
+
+static bool IGMP_V2_SEEN(const struct in_device *in_dev)
+{
+ unsigned long seen;
+
+ if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2)
+ return true;
+ if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2)
+ return true;
+ seen = READ_ONCE(in_dev->mr_v2_seen);
+ return seen && time_before(jiffies, seen);
+}
static int unsolicited_report_interval(struct in_device *in_dev)
{
@@ -954,23 +967,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
int max_delay;
int mark = 0;
struct net *net = dev_net(in_dev->dev);
-
+ unsigned long seen;
if (len == 8) {
+ seen = jiffies + READ_ONCE(in_dev->mr_qrv) * READ_ONCE(in_dev->mr_qi) +
+ READ_ONCE(in_dev->mr_qri);
if (ih->code == 0) {
/* Alas, old v1 router presents here. */
max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_v1_seen = jiffies +
- (in_dev->mr_qrv * in_dev->mr_qi) +
- in_dev->mr_qri;
+ WRITE_ONCE(in_dev->mr_v1_seen, seen);
group = 0;
} else {
/* v2 router present */
max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
- in_dev->mr_v2_seen = jiffies +
- (in_dev->mr_qrv * in_dev->mr_qi) +
- in_dev->mr_qri;
+ WRITE_ONCE(in_dev->mr_v2_seen, seen);
}
/* cancel the interface change timer */
WRITE_ONCE(in_dev->mr_ifc_count, 0);
@@ -995,6 +1006,8 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (!max_delay)
max_delay = 1; /* can't mod w/ 0 */
} else { /* v3 */
+ unsigned long mr_qi;
+
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
return true;
@@ -1015,15 +1028,16 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
- in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
-
+ WRITE_ONCE(in_dev->mr_qrv,
+ ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
+ mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+ WRITE_ONCE(in_dev->mr_qi, mr_qi);
/* RFC3376, 8.3. Query Response Interval:
* The number of seconds represented by the [Query Response
* Interval] must be less than the [Query Interval].
*/
- if (in_dev->mr_qri >= in_dev->mr_qi)
- in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+ if (READ_ONCE(in_dev->mr_qri) >= mr_qi)
+ WRITE_ONCE(in_dev->mr_qri, (mr_qi/HZ - 1) * HZ);
if (!group) { /* general query */
if (ih3->nsrcs)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d8083b9033c2..5b957a831e7c 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -179,7 +179,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
seq = read_seqbegin(&base->lock);
p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
- if (p)
+ /* Make sure tree was not modified during our lookup. */
+ if (p && !read_seqretry(&base->lock, seq))
return p;
/* retry an exact lookup, taking the lock before.
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e4790cc7b5c2..5bcd73cbdb41 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1233,6 +1233,8 @@ alloc_new_skb:
if (err < 0)
goto error;
copy = err;
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
wmem_alloc_delta += copy;
} else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2058ca860294..2628cd3a93a6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -537,15 +537,16 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
};
int err;
+ rcu_read_lock();
err = ipmr_fib_lookup(net, &fl4, &mrt);
if (err < 0) {
+ rcu_read_unlock();
kfree_skb(skb);
return err;
}
DEV_STATS_ADD(dev, tx_bytes, skb->len);
DEV_STATS_INC(dev, tx_packets);
- rcu_read_lock();
/* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
@@ -1112,11 +1113,12 @@ static int ipmr_cache_report(const struct mr_table *mrt,
msg->im_vif_hi = vifi >> 8;
ipv4_pktinfo_prepare(mroute_sk, pkt, false);
memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
- /* Add our header */
- igmp = skb_put(skb, sizeof(struct igmphdr));
+ /* Add our header.
+ * Note that code, csum and group fields are cleared.
+ */
+ igmp = skb_put_zero(skb, sizeof(struct igmphdr));
igmp->type = assert;
msg->im_msgtype = assert;
- igmp->code = 0;
ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
skb->transport_header = skb->network_header;
}
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index 5080fa5fbf6a..f9c6755f5ec5 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -94,6 +94,9 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
#endif
int doff = 0;
+ if (ntohs(iph->frag_off) & IP_OFFSET)
+ return NULL;
+
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
struct tcphdr _hdr;
struct udphdr *hp;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8fc24c3743c5..c0526cc03980 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1827,7 +1827,6 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
enum skb_drop_reason reason;
- struct sock *rsk;
reason = psp_sk_rx_policy_check(sk, skb);
if (reason)
@@ -1863,24 +1862,21 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
if (nsk != sk) {
reason = tcp_child_process(sk, nsk, skb);
- if (reason) {
- rsk = nsk;
+ sock_put(nsk);
+ if (reason)
goto reset;
- }
return 0;
}
} else
sock_rps_save_rxhash(sk, skb);
reason = tcp_rcv_state_process(sk, skb);
- if (reason) {
- rsk = sk;
+ if (reason)
goto reset;
- }
return 0;
reset:
- tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
+ tcp_v4_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
discard:
sk_skb_reason_drop(sk, skb, reason);
/* Be careful here. If this function gets more complicated and
@@ -2193,8 +2189,10 @@ lookup:
rst_reason = sk_rst_convert_drop_reason(drop_reason);
tcp_v4_send_reset(nsk, skb, rst_reason);
+ sock_put(nsk);
goto discard_and_relse;
}
+ sock_put(nsk);
sock_put(sk);
return 0;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 199f0b579e89..e6092c3ac840 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -1012,6 +1012,6 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
}
bh_unlock_sock(child);
- sock_put(child);
+
return reason;
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index c024aa77f25b..c3806c6ac96f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -164,7 +164,7 @@ config IPV6_SIT
select INET_TUNNEL
select NET_IP_TUNNEL
select IPV6_NDISC_NODETYPE
- default y
+ default m
help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -172,7 +172,7 @@ config IPV6_SIT
into IPv4 packets. This is useful if you want to connect two IPv6
networks over an IPv4-only path.
- Saying M here will produce a module called sit. If unsure, say Y.
+ Saying M here will produce a module called sit. If unsure, say M.
config IPV6_SIT_6RD
bool "IPv6: IPv6 Rapid Deployment (6RD)"
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index cb26beea4398..de1e68199a01 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -317,14 +317,19 @@ static void ah6_output_done(void *data, int err)
struct ipv6hdr *top_iph = ipv6_hdr(skb);
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN)
+ seqhi_len = sizeof(*seqhi);
iph_base = AH_SKB_CB(skb)->tmp;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(seqhi, seqhi_len);
memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
@@ -471,13 +476,18 @@ static void ah6_input_done(void *data, int err)
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
int hdr_len = skb_network_header_len(skb);
int ah_hlen = ipv6_authlen(ah);
+ int seqhi_len = 0;
+ __be32 *seqhi;
if (err)
goto out;
+ if (x->props.flags & XFRM_STATE_ESN)
+ seqhi_len = sizeof(*seqhi);
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(seqhi, seqhi_len);
err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
if (err)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9f75313734f8..9c06c5a1419d 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -915,7 +915,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
nfrags = 1;
goto skip_cow;
- } else if (!skb_has_frag_list(skb)) {
+ } else if (!skb_has_frag_list(skb) &&
+ !skb_has_shared_frag(skb)) {
nfrags = skb_shinfo(skb)->nr_frags;
nfrags++;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 49e31e4ae7b7..9d06d487e8b1 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -73,6 +73,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
__be16 *frag_offp)
{
u8 nexthdr = *nexthdrp;
+ int exthdr_cnt = 0;
*frag_offp = 0;
@@ -82,6 +83,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
if (nexthdr == NEXTHDR_NONE)
return -1;
+ if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT))
+ return -1;
hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
if (!hp)
return -1;
@@ -190,6 +193,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
{
unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ int exthdr_cnt = 0;
bool found;
if (fragoff)
@@ -216,6 +220,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return -ENOENT;
}
+ if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT))
+ return -EBADMSG;
+
hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
if (!hp)
return -EBADMSG;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 63fc8556b475..365b4059eb20 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -2262,10 +2262,11 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+ struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm p;
- struct ip6_tnl *t;
+ struct ip6gre_net *ign;
+ ign = net_generic(t->net, ip6gre_net_id);
t = ip6gre_changelink_common(dev, tb, data, &p, extack);
if (IS_ERR(t))
return PTR_ERR(t);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 967b07aeb683..8972863c93ee 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -403,6 +403,7 @@ INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *));
void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
bool have_final)
{
+ int exthdr_cnt = IP6CB(skb)->flags & IP6SKB_HOPBYHOP ? 1 : 0;
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
@@ -487,6 +488,10 @@ resubmit_final:
nexthdr = ret;
goto resubmit_final;
} else {
+ if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) {
+ SKB_DR_SET(reason, IPV6_TOO_MANY_EXTHDRS);
+ goto discard;
+ }
goto resubmit;
}
} else if (ret == 0) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7e92909ab5be..c14adcdd4396 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -468,6 +468,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
default:
break;
}
+ hdr = ipv6_hdr(skb);
}
/*
@@ -582,6 +583,8 @@ int ip6_forward(struct sk_buff *skb)
if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) {
int proxied = ip6_forward_proxy_check(skb);
+
+ hdr = ipv6_hdr(skb);
if (proxied > 0) {
/* It's tempting to decrease the hop limit
* here by 1, as we do at the end of the
@@ -1794,6 +1797,8 @@ alloc_new_skb:
if (err < 0)
goto error;
copy = err;
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
wmem_alloc_delta += copy;
} else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c468c83af0f2..9d1037ac082f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -399,11 +399,15 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
unsigned int nhoff = raw - skb->data;
unsigned int off = nhoff + sizeof(*ipv6h);
u8 nexthdr = ipv6h->nexthdr;
+ int exthdr_cnt = 0;
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
struct ipv6_opt_hdr *hdr;
u16 optlen;
+ if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT))
+ break;
+
if (!pskb_may_pull(skb, off + sizeof(*hdr)))
break;
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index ced8bd44828e..893f2aeb4711 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -100,6 +100,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
const struct in6_addr *daddr = NULL, *saddr = NULL;
struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var;
struct sk_buff *data_skb = NULL;
+ unsigned short fragoff = 0;
int doff = 0;
int thoff = 0, tproto;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -107,8 +108,8 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
struct nf_conn const *ct;
#endif
- tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
- if (tproto < 0) {
+ tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL);
+ if (tproto < 0 || fragoff) {
pr_debug("unable to find transport header in IPv6 packet, dropping\n");
return NULL;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 19eb6b702227..e3d355d1fbd6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1645,6 +1645,10 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
rcu_read_lock();
idev = __in6_dev_get(dev);
+ if (!idev) {
+ rcu_read_unlock();
+ return 0;
+ }
mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
}
@@ -4995,6 +4999,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ fib6_update_sernum(net, rt);
rt6_multipath_rebalance(rt);
break;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2c3f7a739709..d13d49bfef19 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -288,8 +288,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
saddr = &fl6->saddr;
err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
- if (err)
+ if (err) {
+ dst_release(dst);
goto failure;
+ }
}
/* set the source address */
@@ -1617,12 +1619,13 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_LISTEN) {
struct sock *nsk = tcp_v6_cookie_check(sk, skb);
+ if (!nsk)
+ return 0;
if (nsk != sk) {
- if (nsk) {
- reason = tcp_child_process(sk, nsk, skb);
- if (reason)
- goto reset;
- }
+ reason = tcp_child_process(sk, nsk, skb);
+ sock_put(nsk);
+ if (reason)
+ goto reset;
return 0;
}
} else
@@ -1827,8 +1830,10 @@ lookup:
rst_reason = sk_rst_convert_drop_reason(drop_reason);
tcp_v6_send_reset(nsk, skb, rst_reason);
+ sock_put(nsk);
goto discard_and_relse;
}
+ sock_put(nsk);
sock_put(sk);
return 0;
}
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index ea2f805d3b01..9b586fcec485 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c
@@ -88,8 +88,10 @@ int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6,
skb, flags);
- if (dst->error)
+ if (dst->error) {
+ dst_release(dst);
goto drop;
+ }
skb_dst_set(skb, dst);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 160ae65a5c64..0a0f27836d57 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -438,6 +438,15 @@ ieee80211_verify_sta_ht_mcs_support(struct ieee80211_sub_if_data *sdata,
ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
/*
+ * Some Xfinity XB8 firmware advertises >1 spatial stream MCS indexes in
+ * their basic HT-MCS set. On cards with lower spatial streams, the check
+ * would fail, and we'd be stuck with no HT when it in fact work fine with
+ * its own supported rate. So check it only in strict mode.
+ */
+ if (!ieee80211_hw_check(&sdata->local->hw, STRICT))
+ return true;
+
+ /*
* P802.11REVme/D7.0 - 6.5.4.2.4
* ...
* If the MLME of an HT STA receives an MLME-JOIN.request primitive
@@ -9140,7 +9149,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
struct ieee80211_bss *bss = (void *)cbss->priv;
struct sta_info *new_sta = NULL;
struct ieee80211_link_data *link;
- bool have_sta = false;
+ struct sta_info *have_sta = NULL;
bool mlo;
int err;
u16 new_links;
@@ -9159,11 +9168,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
mlo = false;
}
- if (assoc) {
- rcu_read_lock();
+ if (assoc)
have_sta = sta_info_get(sdata, ap_mld_addr);
- rcu_read_unlock();
- }
if (mlo && !have_sta &&
WARN_ON(sdata->vif.valid_links || sdata->vif.active_links))
@@ -9327,6 +9333,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
out_release_chan:
ieee80211_link_release_channel(link);
out_err:
+ if (mlo && have_sta)
+ WARN_ON(__sta_info_destroy(have_sta));
ieee80211_vif_set_links(sdata, 0, 0);
return err;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3e5d1c47a5b0..d18e962126ce 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4971,7 +4971,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
struct sk_buff *skb = rx->skb;
struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- static ieee80211_rx_result res;
+ ieee80211_rx_result res;
int orig_len = skb->len;
int hdrlen = ieee80211_hdrlen(hdr->frame_control);
int snap_offs = hdrlen;
@@ -5380,7 +5380,9 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (!link_sta)
goto out;
- ieee80211_rx_data_set_link(&rx, link_sta->link_id);
+ if (!ieee80211_rx_data_set_link(&rx,
+ link_sta->link_id))
+ goto out;
}
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c
index adc069065e73..fa370831d617 100644
--- a/net/mac80211/tests/chan-mode.c
+++ b/net/mac80211/tests/chan-mode.c
@@ -65,6 +65,7 @@ static const struct determine_chan_mode_case {
.ht_capa_mask = {
.mcs.rx_mask[0] = 0xf7,
},
+ .strict = true,
}, {
.desc = "Masking out a RX rate in VHT capabilities",
.conn_mode = IEEE80211_CONN_MODE_EHT,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b093bc203c81..2529b01e2cd5 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3700,11 +3700,11 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, radar_detected_work);
struct cfg80211_chan_def chandef;
- struct ieee80211_chanctx *ctx;
+ struct ieee80211_chanctx *ctx, *tmp;
lockdep_assert_wiphy(local->hw.wiphy);
- list_for_each_entry(ctx, &local->chanctx_list, list) {
+ list_for_each_entry_safe(ctx, tmp, &local->chanctx_list, list) {
if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index e1033643fab0..e4b230ef6099 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -920,9 +920,9 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test)
static void mctp_test_route_input_null_eid(struct kunit *test)
{
struct mctp_hdr hdr = RX_HDR(1, 10, 0, FL_S | FL_E | FL_TO);
+ struct sockaddr_mctp addr = { 0 };
struct sk_buff *skb_pkt, *skb_sk;
struct mctp_test_dev *dev;
- struct sockaddr_mctp addr;
struct socket *sock;
u8 type = 0;
int rc;
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
index c3987d5ade7a..6eef8d485c25 100644
--- a/net/mctp/test/utils.c
+++ b/net/mctp/test/utils.c
@@ -116,7 +116,7 @@ void mctp_test_destroy_dev(struct mctp_test_dev *dev)
static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb)
{
skb->dev = dst->dev->dev;
- dev_queue_xmit(skb);
+ dev_direct_xmit(skb, 0);
return 0;
}
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index 82ec15bcfd7f..082c46c0f50e 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
struct sock *sk, *ssk;
struct sk_buff *skb;
struct tcp_sock *tp;
+ bool has_rxtstamp;
/* on early fallback the subflow context is deleted by
* subflow_syn_recv_sock()
@@ -40,12 +41,13 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
*/
tp->copied_seq += skb->len;
subflow->ssn_offset += skb->len;
+ has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
/* Only the sequence delta is relevant */
MPTCP_SKB_CB(skb)->map_seq = -skb->len;
MPTCP_SKB_CB(skb)->end_seq = 0;
MPTCP_SKB_CB(skb)->offset = 0;
- MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+ MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
MPTCP_SKB_CB(skb)->cant_coalesce = 1;
mptcp_data_lock(sk);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 57a456690406..3c152bf66cd5 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
u8 retrans_times;
+ bool timer_done;
struct timer_list add_timer;
struct mptcp_sock *sock;
struct rcu_head rcu;
@@ -283,6 +284,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_addr_info local, remote;
+ if (!__mptcp_subflow_active(subflow))
+ continue;
+
mptcp_local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
@@ -305,18 +309,31 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk)
const struct net *net = sock_net((struct sock *)msk);
unsigned int rto = mptcp_get_add_addr_timeout(net);
struct mptcp_subflow_context *subflow;
- unsigned int max = 0;
+ unsigned int max = 0, max_stale = 0;
+
+ if (!rto)
+ return 0;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct inet_connection_sock *icsk = inet_csk(ssk);
- if (icsk->icsk_rto > max)
+ if (!__mptcp_subflow_active(subflow))
+ continue;
+
+ if (unlikely(subflow->stale)) {
+ if (icsk->icsk_rto > max_stale)
+ max_stale = icsk->icsk_rto;
+ } else if (icsk->icsk_rto > max) {
max = icsk->icsk_rto;
+ }
}
- if (max && max < rto)
- rto = max;
+ if (max)
+ return min(max, rto);
+
+ if (max_stale)
+ return min(max_stale, rto);
return rto;
}
@@ -327,26 +344,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
add_timer);
struct mptcp_sock *msk = entry->sock;
struct sock *sk = (struct sock *)msk;
- unsigned int timeout;
+ unsigned int timeout = 0;
pr_debug("msk=%p\n", msk);
- if (!msk)
- return;
-
- if (inet_sk_state_load(sk) == TCP_CLOSE)
- return;
-
- if (!entry->addr.id)
- return;
+ bh_lock_sock(sk);
+ if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
+ goto out;
- if (mptcp_pm_should_add_signal_addr(msk)) {
- sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+ timeout = HZ / 20;
goto out;
}
timeout = mptcp_adjust_add_addr_timeout(msk);
- if (!timeout)
+ if (!timeout || mptcp_pm_should_add_signal_addr(msk))
goto out;
spin_lock_bh(&msk->pm.lock);
@@ -359,8 +372,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
}
if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
- sk_reset_timer(sk, timer,
- jiffies + (timeout << entry->retrans_times));
+ timeout <<= entry->retrans_times;
+ else
+ timeout = 0;
spin_unlock_bh(&msk->pm.lock);
@@ -368,7 +382,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
mptcp_pm_subflow_established(msk);
out:
- __sock_put(sk);
+ if (timeout)
+ sk_reset_timer(sk, timer, jiffies + timeout);
+ else
+ /* if sock_put calls sk_free: avoid waiting for this timer */
+ entry->timer_done = true;
+ bh_unlock_sock(sk);
+ sock_put(sk);
}
struct mptcp_pm_add_entry *
@@ -431,6 +451,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
reset_timer:
+ add_entry->timer_done = false;
timeout = mptcp_adjust_add_addr_timeout(msk);
if (timeout)
sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
@@ -451,7 +472,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
list_for_each_entry_safe(entry, tmp, &free_list, list) {
- sk_stop_timer_sync(sk, &entry->add_timer);
+ if (!entry->timer_done)
+ sk_stop_timer_sync(sk, &entry->add_timer);
kfree_rcu(entry, rcu);
}
}
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index c9f1e5af3cd3..fc818b63752e 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < endp_signal_max) {
+ u8 endp_id;
+
/* due to racing events on both ends we can reach here while
* previous add address is still running: if we invoke now
* mptcp_pm_announce_addr(), that will fail and the
@@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (!select_signal_address(pernet, msk, &local))
goto subflow;
+ /* Special case for ID0: set the correct ID */
+ endp_id = local.addr.id;
+ if (endp_id == msk->mpc_endpoint_id)
+ local.addr.id = 0;
+
/* If the alloc fails, we are on memory pressure, not worth
* continuing, and trying to create subflows.
*/
if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
return;
- __clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
+ __clear_bit(endp_id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
- /* Special case for ID0: set the correct ID */
- if (local.addr.id == msk->mpc_endpoint_id)
- local.addr.id = 0;
-
mptcp_pm_announce_addr(msk, &local.addr, false);
mptcp_pm_addr_send_ack(msk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 0efe40be2fde..1cf608e7357b 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
if (ret)
break;
}
+
+ if (!ret)
+ sockopt_seq_inc(msk);
+
return ret;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e2cb9d23e4a0..d562e149606f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -581,7 +581,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup);
if (!subflow_thmac_valid(subflow)) {
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC);
subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
@@ -908,7 +908,7 @@ create_child:
if (!subflow_hmac_valid(subflow_req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
- subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
+ subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
goto dispose_child;
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 2082bfb2d93c..9ea6b4fa78bf 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -267,27 +267,20 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
hash_key2 = hash_key;
use2 = false;
}
+
conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */,
&head, &head2);
- spin_lock(&cp->lock);
-
- if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
- cp->flags |= IP_VS_CONN_F_HASHED;
- WRITE_ONCE(cp->hn0.hash_key, hash_key);
- WRITE_ONCE(cp->hn1.hash_key, hash_key2);
- refcount_inc(&cp->refcnt);
- hlist_bl_add_head_rcu(&cp->hn0.node, head);
- if (use2)
- hlist_bl_add_head_rcu(&cp->hn1.node, head2);
- ret = 1;
- } else {
- pr_err("%s(): request for already hashed, called from %pS\n",
- __func__, __builtin_return_address(0));
- ret = 0;
- }
- spin_unlock(&cp->lock);
+ cp->flags |= IP_VS_CONN_F_HASHED;
+ WRITE_ONCE(cp->hn0.hash_key, hash_key);
+ WRITE_ONCE(cp->hn1.hash_key, hash_key2);
+ refcount_inc(&cp->refcnt);
+ hlist_bl_add_head_rcu(&cp->hn0.node, head);
+ if (use2)
+ hlist_bl_add_head_rcu(&cp->hn1.node, head2);
+
conn_tab_unlock(head, head2);
+ ret = 1;
/* Schedule resizing if load increases */
if (atomic_read(&ipvs->conn_count) > t->u_thresh &&
@@ -321,7 +314,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */,
&head, &head2);
- spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
/* Decrease refcnt and unlink conn only if we are last user */
@@ -334,7 +326,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
}
}
- spin_unlock(&cp->lock);
conn_tab_unlock(head, head2);
rcu_read_unlock();
@@ -637,6 +628,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
struct ip_vs_conn_hnode *hn;
u32 hash_key, hash_key_new;
struct ip_vs_conn_param p;
+ bool by_me = false;
int ntbl;
int dir;
@@ -664,8 +656,16 @@ retry:
t = rcu_dereference(t->new_tbl);
ntbl++;
/* We are lost? */
- if (ntbl >= 2)
+ if (ntbl >= 2) {
+ spin_lock_bh(&cp->lock);
+ if (cp->flags & IP_VS_CONN_F_NO_CPORT && by_me)
+ cp->cport = 0;
+ /* hn1 will be rehashed on next packet */
+ spin_unlock_bh(&cp->lock);
+ IP_VS_ERR_RL("%s(): Too many ht changes for dir %d\n",
+ __func__, dir);
return;
+ }
}
/* Rehashing during resize? Use the recent table for adds */
@@ -683,10 +683,13 @@ retry:
if (head > head2 && t == t2)
swap(head, head2);
+ /* Protect the cp->flags modification */
+ spin_lock_bh(&cp->lock);
+
/* Lock seqcount only for the old bucket, even if we are on new table
* because it affects the del operation, not the adding.
*/
- spin_lock_bh(&t->lock[hash_key & t->lock_mask].l);
+ spin_lock(&t->lock[hash_key & t->lock_mask].l);
preempt_disable_nested();
write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]);
@@ -704,14 +707,23 @@ retry:
hlist_bl_unlock(head);
write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]);
preempt_enable_nested();
- spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l);
+ spin_unlock(&t->lock[hash_key & t->lock_mask].l);
+ spin_unlock_bh(&cp->lock);
hash_key = hash_key_new;
goto retry;
}
- spin_lock(&cp->lock);
- if ((cp->flags & IP_VS_CONN_F_NO_CPORT) &&
- (cp->flags & IP_VS_CONN_F_HASHED)) {
+ /* Fill cport once, even if multiple packets try to do it */
+ if (cp->flags & IP_VS_CONN_F_NO_CPORT && (!cp->cport || by_me)) {
+ /* If we race with resizing make sure cport is set for dir 1 */
+ if (!cp->cport) {
+ cp->cport = cport;
+ by_me = true;
+ }
+ if (!dir) {
+ atomic_dec(&ipvs->no_cport_conns[af_id]);
+ cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
+ }
/* We do not recalc hash_key_r under lock, we assume the
* parameters in cp do not change, i.e. cport is
* the only possible change.
@@ -726,21 +738,17 @@ retry:
hlist_bl_del_rcu(&hn->node);
hlist_bl_add_head_rcu(&hn->node, head_new);
}
- if (!dir) {
- atomic_dec(&ipvs->no_cport_conns[af_id]);
- cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
- cp->cport = cport;
- }
}
- spin_unlock(&cp->lock);
if (head != head2)
hlist_bl_unlock(head2);
hlist_bl_unlock(head);
write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]);
preempt_enable_nested();
- spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l);
- if (dir--)
+ spin_unlock(&t->lock[hash_key & t->lock_mask].l);
+
+ spin_unlock_bh(&cp->lock);
+ if (dir-- && by_me)
goto next_dir;
}
@@ -1835,7 +1843,7 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
if (!rcu_dereference_protected(ipvs->conn_tab, 1))
return;
- cancel_delayed_work_sync(&ipvs->conn_resize_work);
+ disable_delayed_work_sync(&ipvs->conn_resize_work);
if (!atomic_read(&ipvs->conn_count))
goto unreg;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index f5b7a2047291..d40b404c1bf6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -237,7 +237,7 @@ int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n,
{
if (!t)
return 1 << min_bits;
- n = roundup_pow_of_two(n);
+ n = n > 0 ? roundup_pow_of_two(n) : 1;
if (lfactor < 0) {
int factor = min(-lfactor, max_bits);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6632daa87ded..c7c7f6a7a9f6 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -261,12 +261,28 @@ static void est_reload_work_handler(struct work_struct *work)
if (!kd)
continue;
/* New config ? Stop kthread tasks */
- if (genid != genid_done)
- ip_vs_est_kthread_stop(kd);
+ if (genid != genid_done) {
+ if (!id) {
+ /* Only we can stop kt 0 but not under mutex */
+ mutex_unlock(&ipvs->est_mutex);
+ ip_vs_est_kthread_stop(kd);
+ mutex_lock(&ipvs->est_mutex);
+ if (!READ_ONCE(ipvs->enable))
+ goto unlock;
+ /* kd for kt 0 is never destroyed */
+ } else {
+ ip_vs_est_kthread_stop(kd);
+ }
+ }
if (!kd->task && !ip_vs_est_stopped(ipvs)) {
+ bool start;
+
/* Do not start kthreads above 0 in calc phase */
- if ((!id || !ipvs->est_calc_phase) &&
- ip_vs_est_kthread_start(ipvs, kd) < 0)
+ if (id)
+ start = !ipvs->est_calc_phase;
+ else
+ start = kd->needed;
+ if (start && ip_vs_est_kthread_start(ipvs, kd) < 0)
repeat = true;
}
}
@@ -1102,6 +1118,24 @@ out:
return dest;
}
+/* Put destination in trash */
+static void ip_vs_trash_put_dest(struct netns_ipvs *ipvs,
+ struct ip_vs_dest *dest, unsigned long istart,
+ bool cleanup)
+{
+ spin_lock_bh(&ipvs->dest_trash_lock);
+ IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
+ refcount_read(&dest->refcnt));
+ if (list_empty(&ipvs->dest_trash) && !cleanup)
+ mod_timer(&ipvs->dest_trash_timer,
+ jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
+ /* dest lives in trash with reference */
+ list_add(&dest->t_list, &ipvs->dest_trash);
+ dest->idle_start = istart;
+ spin_unlock_bh(&ipvs->dest_trash_lock);
+}
+
static void ip_vs_dest_rcu_free(struct rcu_head *head)
{
struct ip_vs_dest *dest;
@@ -1461,9 +1495,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ntohs(dest->vport));
ret = ip_vs_start_estimator(svc->ipvs, &dest->stats);
+ /* On error put back dest into the trash */
if (ret < 0)
- return ret;
- __ip_vs_update_dest(svc, dest, udest, 1);
+ ip_vs_trash_put_dest(svc->ipvs, dest, dest->idle_start,
+ false);
+ else
+ __ip_vs_update_dest(svc, dest, udest, 1);
} else {
/*
* Allocate and initialize the dest structure
@@ -1533,17 +1570,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
*/
ip_vs_rs_unhash(dest);
- spin_lock_bh(&ipvs->dest_trash_lock);
- IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
- IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
- refcount_read(&dest->refcnt));
- if (list_empty(&ipvs->dest_trash) && !cleanup)
- mod_timer(&ipvs->dest_trash_timer,
- jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
- /* dest lives in trash with reference */
- list_add(&dest->t_list, &ipvs->dest_trash);
- dest->idle_start = 0;
- spin_unlock_bh(&ipvs->dest_trash_lock);
+ ip_vs_trash_put_dest(ipvs, dest, 0, cleanup);
/* Queue up delayed work to expire all no destination connections.
* No-op when CONFIG_SYSCTL is disabled.
@@ -1812,11 +1839,16 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
*svc_p = svc;
if (!READ_ONCE(ipvs->enable)) {
+ mutex_lock(&ipvs->est_mutex);
+
/* Now there is a service - full throttle */
WRITE_ONCE(ipvs->enable, 1);
+ ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
+
/* Start estimation for first time */
- ip_vs_est_reload_start(ipvs);
+ ip_vs_est_reload_start(ipvs, true);
+ mutex_unlock(&ipvs->est_mutex);
}
return 0;
@@ -2032,6 +2064,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
cancel_delayed_work_sync(&ipvs->svc_resize_work);
if (t) {
rcu_assign_pointer(ipvs->svc_table, NULL);
+ /* Inform readers that table is removed */
+ smp_mb__before_atomic();
+ atomic_inc(&ipvs->svc_table_changes);
while (1) {
p = rcu_dereference_protected(t->new_tbl, 1);
call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
@@ -2078,6 +2113,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
t = rcu_dereference_protected(ipvs->svc_table, 1);
if (t) {
rcu_assign_pointer(ipvs->svc_table, NULL);
+ /* Inform readers that table is removed */
+ smp_mb__before_atomic();
+ atomic_inc(&ipvs->svc_table_changes);
while (1) {
p = rcu_dereference_protected(t->new_tbl, 1);
call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
@@ -2086,6 +2124,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
t = p;
}
}
+ /* Stop the tot_stats estimator early under service_mutex
+ * to avoid locking it again later.
+ */
+ if (cleanup)
+ ip_vs_stop_estimator_tot_stats(ipvs);
return 0;
}
@@ -2331,7 +2374,7 @@ static int ipvs_proc_est_cpumask_set(const struct ctl_table *table,
/* est_max_threads may depend on cpulist size */
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
ipvs->est_calc_phase = 1;
- ip_vs_est_reload_start(ipvs);
+ ip_vs_est_reload_start(ipvs, true);
unlock:
mutex_unlock(&ipvs->est_mutex);
@@ -2351,11 +2394,14 @@ static int ipvs_proc_est_cpumask_get(const struct ctl_table *table,
mutex_lock(&ipvs->est_mutex);
- if (ipvs->est_cpulist_valid)
- mask = *valp;
- else
- mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD);
- ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask));
+ /* HK_TYPE_KTHREAD cpumask needs RCU protection */
+ scoped_guard(rcu) {
+ if (ipvs->est_cpulist_valid)
+ mask = *valp;
+ else
+ mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD);
+ ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask));
+ }
mutex_unlock(&ipvs->est_mutex);
@@ -2411,7 +2457,7 @@ static int ipvs_proc_est_nice(const struct ctl_table *table, int write,
mutex_lock(&ipvs->est_mutex);
if (*valp != val) {
*valp = val;
- ip_vs_est_reload_start(ipvs);
+ ip_vs_est_reload_start(ipvs, true);
}
mutex_unlock(&ipvs->est_mutex);
}
@@ -2438,7 +2484,7 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write,
mutex_lock(&ipvs->est_mutex);
if (*valp != val) {
*valp = val;
- ip_vs_est_reload_start(ipvs);
+ ip_vs_est_reload_start(ipvs, true);
}
mutex_unlock(&ipvs->est_mutex);
}
@@ -2463,7 +2509,7 @@ static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write,
if (val < -8 || val > 8) {
ret = -EINVAL;
} else {
- *valp = val;
+ WRITE_ONCE(*valp, val);
if (rcu_access_pointer(ipvs->conn_tab))
mod_delayed_work(system_unbound_wq,
&ipvs->conn_resize_work, 0);
@@ -2490,10 +2536,16 @@ static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write,
if (val < -8 || val > 8) {
ret = -EINVAL;
} else {
- *valp = val;
- if (rcu_access_pointer(ipvs->svc_table))
+ mutex_lock(&ipvs->service_mutex);
+ WRITE_ONCE(*valp, val);
+ /* Make sure the services are present */
+ if (rcu_access_pointer(ipvs->svc_table) &&
+ READ_ONCE(ipvs->enable) &&
+ !test_bit(IP_VS_WORK_SVC_NORESIZE,
+ &ipvs->work_flags))
mod_delayed_work(system_unbound_wq,
&ipvs->svc_resize_work, 0);
+ mutex_unlock(&ipvs->service_mutex);
}
}
return ret;
@@ -3004,7 +3056,8 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
int old_gen, new_gen;
u32 counts[8];
u32 bucket;
- int count;
+ u32 count;
+ int loops;
u32 sum1;
u32 sum;
int i;
@@ -3020,6 +3073,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
if (!atomic_read(&ipvs->conn_count))
goto after_conns;
old_gen = atomic_read(&ipvs->conn_tab_changes);
+ loops = 0;
repeat_conn:
smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */
@@ -3032,8 +3086,11 @@ repeat_conn:
resched_score++;
ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
count = 0;
- hlist_bl_for_each_entry_rcu(hn, e, head, node)
+ hlist_bl_for_each_entry_rcu(hn, e, head, node) {
count++;
+ if (count >= ARRAY_SIZE(counts) - 1)
+ break;
+ }
}
resched_score += count;
if (resched_score >= 100) {
@@ -3042,37 +3099,41 @@ repeat_conn:
new_gen = atomic_read(&ipvs->conn_tab_changes);
/* New table installed ? */
if (old_gen != new_gen) {
+ /* Too many changes? */
+ if (++loops >= 5)
+ goto after_conns;
old_gen = new_gen;
goto repeat_conn;
}
}
- counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
+ counts[count]++;
}
}
for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
sum += counts[i];
sum1 = sum - counts[0];
- seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n",
- counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
+ seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n",
+ counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
for (i = 1; i < ARRAY_SIZE(counts); i++) {
if (!counts[i])
continue;
- seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n",
+ seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n",
i, counts[i],
- (unsigned long)counts[i] * 100 / max(sum1, 1U));
+ div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
}
after_conns:
t = rcu_dereference(ipvs->svc_table);
count = ip_vs_get_num_services(ipvs);
- seq_printf(seq, "Services:\t%d\n", count);
+ seq_printf(seq, "Services:\t%u\n", count);
seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n",
t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0);
if (!count)
goto after_svc;
old_gen = atomic_read(&ipvs->svc_table_changes);
+ loops = 0;
repeat_svc:
smp_rmb(); /* ipvs->svc_table and svc_table_changes */
@@ -3086,8 +3147,11 @@ repeat_svc:
ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
count = 0;
hlist_bl_for_each_entry_rcu(svc, e, head,
- s_list)
+ s_list) {
count++;
+ if (count >= ARRAY_SIZE(counts) - 1)
+ break;
+ }
}
resched_score += count;
if (resched_score >= 100) {
@@ -3096,24 +3160,27 @@ repeat_svc:
new_gen = atomic_read(&ipvs->svc_table_changes);
/* New table installed ? */
if (old_gen != new_gen) {
+ /* Too many changes? */
+ if (++loops >= 5)
+ goto after_svc;
old_gen = new_gen;
goto repeat_svc;
}
}
- counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
+ counts[count]++;
}
}
for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
sum += counts[i];
sum1 = sum - counts[0];
- seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n",
- counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
+ seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n",
+ counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
for (i = 1; i < ARRAY_SIZE(counts); i++) {
if (!counts[i])
continue;
- seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n",
+ seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n",
i, counts[i],
- (unsigned long)counts[i] * 100 / max(sum1, 1U));
+ div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
}
after_svc:
@@ -4967,7 +5034,14 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
- ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);
+ if (ipvs->tot_stats->s.est.ktid != -2) {
+ /* Not stopped yet? This happens only on netns init error and
+ * we even do not need to lock the service_mutex for this case.
+ */
+ mutex_lock(&ipvs->service_mutex);
+ ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);
+ mutex_unlock(&ipvs->service_mutex);
+ }
if (ipvs->est_cpulist_valid)
free_cpumask_var(ipvs->sysctl_est_cpulist);
@@ -5039,7 +5113,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
ipvs->net->proc_net,
ip_vs_stats_percpu_show, NULL))
goto err_percpu;
- if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net,
+ if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net,
ip_vs_status_show, NULL))
goto err_status;
#endif
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 433ba3cab58c..ab09f5182951 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -68,6 +68,11 @@
and the limit of estimators per kthread
- est_add_ktid: ktid where to add new ests, can point to empty slot where
we should add kt data
+ - data protected by service_mutex: est_temp_list, est_add_ktid,
+ est_kt_count(R/W), est_kt_arr(R/W), est_genid_done, kd->needed(R/W)
+ - data protected by est_mutex: est_genid, est_max_threads, sysctl_est_cpulist,
+ est_cpulist_valid, sysctl_est_nice, est_stopped, sysctl_run_estimation,
+ est_kt_count(R), est_kt_arr(R), kd->needed(R), kd->task (id > 0)
*/
static struct lock_class_key __ipvs_est_key;
@@ -227,14 +232,17 @@ static int ip_vs_estimation_kthread(void *data)
}
/* Schedule stop/start for kthread tasks */
-void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
+void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart)
{
+ lockdep_assert_held(&ipvs->est_mutex);
+
/* Ignore reloads before first service is added */
if (!READ_ONCE(ipvs->enable))
return;
ip_vs_est_stopped_recalc(ipvs);
- /* Bump the kthread configuration genid */
- atomic_inc(&ipvs->est_genid);
+ /* Bump the kthread configuration genid if stopping is requested */
+ if (restart)
+ atomic_inc(&ipvs->est_genid);
queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0);
}
@@ -304,12 +312,17 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
void *arr = NULL;
int i;
- if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
- READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
- return -EINVAL;
-
mutex_lock(&ipvs->est_mutex);
+ /* Allow kt 0 data to be created before the services are added
+ * and limit the kthreads when services are present.
+ */
+ if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
+ READ_ONCE(ipvs->enable) && ipvs->est_max_threads) {
+ ret = -EINVAL;
+ goto out;
+ }
+
for (i = 0; i < id; i++) {
if (!ipvs->est_kt_arr[i])
break;
@@ -333,6 +346,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
kd->est_timer = jiffies;
kd->id = id;
ip_vs_est_set_params(ipvs, kd);
+ kd->needed = 1;
/* Pre-allocate stats used in calc phase */
if (!id && !kd->calc_stats) {
@@ -341,12 +355,8 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
goto out;
}
- /* Start kthread tasks only when services are present */
- if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
- ret = ip_vs_est_kthread_start(ipvs, kd);
- if (ret < 0)
- goto out;
- }
+ /* Request kthread to be started */
+ ip_vs_est_reload_start(ipvs, false);
if (arr)
ipvs->est_kt_count++;
@@ -482,12 +492,11 @@ out:
/* Start estimation for stats */
int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{
+ struct ip_vs_est_kt_data *kd = ipvs->est_kt_count > 0 ?
+ ipvs->est_kt_arr[0] : NULL;
struct ip_vs_estimator *est = &stats->est;
int ret;
- if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
- ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
-
est->ktid = -1;
est->ktrow = IPVS_EST_NTICKS - 1; /* Initial delay */
@@ -496,8 +505,15 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
* will not allocate much memory, just for kt 0.
*/
ret = 0;
- if (!ipvs->est_kt_count || !ipvs->est_kt_arr[0])
+ if (!kd) {
ret = ip_vs_est_add_kthread(ipvs);
+ } else if (!kd->needed) {
+ mutex_lock(&ipvs->est_mutex);
+ /* We have job for the kt 0 task */
+ kd->needed = 1;
+ ip_vs_est_reload_start(ipvs, true);
+ mutex_unlock(&ipvs->est_mutex);
+ }
if (ret >= 0)
hlist_add_head(&est->list, &ipvs->est_temp_list);
else
@@ -578,16 +594,14 @@ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
}
end_kt0:
- /* kt 0 is freed after all other kthreads and chains are empty */
+ /* kt 0 task is stopped after all other kt slots and chains are empty */
if (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) {
kd = ipvs->est_kt_arr[0];
- if (!kd || !kd->est_count) {
+ if (kd && !kd->est_count) {
mutex_lock(&ipvs->est_mutex);
- if (kd) {
- ip_vs_est_kthread_destroy(kd);
- ipvs->est_kt_arr[0] = NULL;
- }
- ipvs->est_kt_count--;
+ /* Keep the kt0 data but request kthread_stop */
+ kd->needed = 0;
+ ip_vs_est_reload_start(ipvs, true);
mutex_unlock(&ipvs->est_mutex);
ipvs->est_add_ktid = 0;
}
@@ -647,9 +661,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
u64 val;
INIT_HLIST_HEAD(&chain);
- mutex_lock(&ipvs->service_mutex);
+ mutex_lock(&ipvs->est_mutex);
kd = ipvs->est_kt_arr[0];
- mutex_unlock(&ipvs->service_mutex);
+ mutex_unlock(&ipvs->est_mutex);
s = kd ? kd->calc_stats : NULL;
if (!s)
goto out;
@@ -748,16 +762,16 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
if (!ip_vs_est_calc_limits(ipvs, &chain_max))
return;
- mutex_lock(&ipvs->service_mutex);
-
/* Stop all other tasks, so that we can immediately move the
* estimators to est_temp_list without RCU grace period
*/
mutex_lock(&ipvs->est_mutex);
for (id = 1; id < ipvs->est_kt_count; id++) {
/* netns clean up started, abort */
- if (!READ_ONCE(ipvs->enable))
- goto unlock2;
+ if (kthread_should_stop() || !READ_ONCE(ipvs->enable)) {
+ mutex_unlock(&ipvs->est_mutex);
+ return;
+ }
kd = ipvs->est_kt_arr[id];
if (!kd)
continue;
@@ -765,9 +779,11 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
}
mutex_unlock(&ipvs->est_mutex);
+ mutex_lock(&ipvs->service_mutex);
+
/* Move all estimators to est_temp_list but carefully,
* all estimators and kthread data can be released while
- * we reschedule. Even for kthread 0.
+ * we reschedule.
*/
step = 0;
@@ -849,9 +865,7 @@ walk_chain:
ip_vs_stop_estimator(ipvs, stats);
/* Tasks are stopped, move without RCU grace period */
est->ktid = -1;
- est->ktrow = row - kd->est_row;
- if (est->ktrow < 0)
- est->ktrow += IPVS_EST_NTICKS;
+ est->ktrow = delay;
hlist_add_head(&est->list, &ipvs->est_temp_list);
/* kd freed ? */
if (last)
@@ -889,7 +903,6 @@ end_dequeue:
if (genid == atomic_read(&ipvs->est_genid))
ipvs->est_calc_phase = 0;
-unlock2:
mutex_unlock(&ipvs->est_mutex);
unlock:
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index e348fb90b8dc..3b0a70e154cd 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -13,22 +13,6 @@
#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
-#define NF_RECURSION_LIMIT 2
-
-#ifndef CONFIG_PREEMPT_RT
-static u8 *nf_get_nf_dup_skb_recursion(void)
-{
- return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion);
-}
-#else
-
-static u8 *nf_get_nf_dup_skb_recursion(void)
-{
- return &current->net_xmit.nf_dup_skb_recursion;
-}
-
-#endif
-
static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
enum nf_dev_hooks hook)
{
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 2c4140e6f53c..785d8c244a77 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -122,6 +122,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->tun = route->tuple[dir].in.tun;
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
+ flow_tuple->needs_gso_segment = route->tuple[dir].out.needs_gso_segment;
flow_tuple->tun_num = route->tuple[dir].in.num_tuns;
switch (route->tuple[dir].xmit_type) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index fd56d663cb5b..9c05a50d6013 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -445,13 +445,13 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
switch (skb->protocol) {
case htons(ETH_P_8021Q):
vlan_hdr = (struct vlan_hdr *)skb->data;
- __skb_pull(skb, VLAN_HLEN);
+ skb_pull_rcsum(skb, VLAN_HLEN);
vlan_set_encap_proto(skb, vlan_hdr);
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
skb->protocol = __nf_flow_pppoe_proto(skb);
- skb_pull(skb, PPPOE_SES_HLEN);
+ skb_pull_rcsum(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;
}
@@ -462,23 +462,6 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
nf_flow_ip_tunnel_pop(ctx, skb);
}
-struct nf_flow_xmit {
- const void *dest;
- const void *source;
- struct net_device *outdev;
-};
-
-static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
- struct nf_flow_xmit *xmit)
-{
- skb->dev = xmit->outdev;
- dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
- xmit->dest, xmit->source, skb->len);
- dev_queue_xmit(skb);
-
- return NF_STOLEN;
-}
-
static struct flow_offload_tuple_rhash *
nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
struct nf_flowtable *flow_table, struct sk_buff *skb)
@@ -524,7 +507,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
return 0;
}
- if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ if (skb_ensure_writable(skb, thoff + ctx->hdrsize))
return -1;
flow_offload_refresh(flow_table, flow, false);
@@ -544,7 +527,34 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
return 1;
}
-static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
+/* Similar to skb_vlan_push. */
+static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id,
+ u32 needed_headroom)
+{
+ if (skb_vlan_tag_present(skb)) {
+ struct vlan_hdr *vhdr;
+
+ if (skb_cow_head(skb, needed_headroom + VLAN_HLEN))
+ return -1;
+
+ __skb_push(skb, VLAN_HLEN);
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header -= VLAN_HLEN;
+
+ vhdr = (struct vlan_hdr *)skb->data;
+ skb->network_header -= VLAN_HLEN;
+ vhdr->h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ vhdr->h_vlan_encapsulated_proto = skb->protocol;
+ skb->protocol = skb->vlan_proto;
+ skb_postpush_rcsum(skb, skb->data, VLAN_HLEN);
+ }
+ __vlan_hwaccel_put_tag(skb, proto, id);
+
+ return 0;
+}
+
+static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id,
+ u32 needed_headroom)
{
int data_len = skb->len + sizeof(__be16);
struct ppp_hdr {
@@ -553,7 +563,7 @@ static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
} *ph;
__be16 proto;
- if (skb_cow_head(skb, PPPOE_SES_HLEN))
+ if (skb_cow_head(skb, needed_headroom + PPPOE_SES_HLEN))
return -1;
switch (skb->protocol) {
@@ -730,21 +740,24 @@ static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
}
static int nf_flow_encap_push(struct sk_buff *skb,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple,
+ struct net_device *outdev)
{
+ u32 needed_headroom = LL_RESERVED_SPACE(outdev);
int i;
- for (i = 0; i < tuple->encap_num; i++) {
+ for (i = tuple->encap_num - 1; i >= 0; i--) {
switch (tuple->encap[i].proto) {
case htons(ETH_P_8021Q):
case htons(ETH_P_8021AD):
- skb_reset_mac_header(skb);
- if (skb_vlan_push(skb, tuple->encap[i].proto,
- tuple->encap[i].id) < 0)
+ if (nf_flow_vlan_push(skb, tuple->encap[i].proto,
+ tuple->encap[i].id,
+ needed_headroom) < 0)
return -1;
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0)
+ if (nf_flow_pppoe_push(skb, tuple->encap[i].id,
+ needed_headroom) < 0)
return -1;
break;
}
@@ -753,6 +766,76 @@ static int nf_flow_encap_push(struct sk_buff *skb,
return 0;
}
+struct nf_flow_xmit {
+ const void *dest;
+ const void *source;
+ struct net_device *outdev;
+ struct flow_offload_tuple *tuple;
+ bool needs_gso_segment;
+};
+
+static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ struct net_device *dev = xmit->outdev;
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
+
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
+ return;
+ }
+
+ skb->dev = dev;
+ dev_hard_header(skb, dev, ntohs(skb->protocol),
+ xmit->dest, xmit->source, skb->len);
+ dev_queue_xmit(skb);
+}
+
+static unsigned int nf_flow_encap_gso_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ struct sk_buff *segs, *nskb;
+
+ segs = skb_gso_segment(skb, 0);
+ if (IS_ERR(segs))
+ return NF_DROP;
+
+ if (segs)
+ consume_skb(skb);
+ else
+ segs = skb;
+
+ skb_list_walk_safe(segs, segs, nskb) {
+ skb_mark_not_on_list(segs);
+
+ if (nf_flow_encap_push(segs, xmit->tuple, xmit->outdev) < 0) {
+ kfree_skb(segs);
+ kfree_skb_list(nskb);
+ return NF_STOLEN;
+ }
+ __nf_flow_queue_xmit(net, segs, xmit);
+ }
+
+ return NF_STOLEN;
+}
+
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ if (xmit->tuple->encap_num) {
+ if (skb_is_gso(skb) && xmit->needs_gso_segment)
+ return nf_flow_encap_gso_xmit(net, skb, xmit);
+
+ if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0)
+ return NF_DROP;
+ }
+
+ __nf_flow_queue_xmit(net, skb, xmit);
+
+ return NF_STOLEN;
+}
+
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -797,9 +880,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0)
return NF_DROP;
- if (nf_flow_encap_push(skb, other_tuple) < 0)
- return NF_DROP;
-
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rtable(tuplehash->tuple.dst_cache);
@@ -829,6 +909,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
WARN_ON_ONCE(1);
return NF_DROP;
}
+ xmit.tuple = other_tuple;
+ xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment;
return nf_flow_queue_xmit(state->net, skb, &xmit);
}
@@ -1037,7 +1119,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
return 0;
}
- if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ if (skb_ensure_writable(skb, thoff + ctx->hdrsize))
return -1;
flow_offload_refresh(flow_table, flow, false);
@@ -1119,9 +1201,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
&ip6_daddr, encap_limit) < 0)
return NF_DROP;
- if (nf_flow_encap_push(skb, other_tuple) < 0)
- return NF_DROP;
-
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rt6_info(tuplehash->tuple.dst_cache);
@@ -1151,6 +1230,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
WARN_ON_ONCE(1);
return NF_DROP;
}
+ xmit.tuple = other_tuple;
+ xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment;
return nf_flow_queue_xmit(state->net, skb, &xmit);
}
diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
index 6bb9579dcc2a..9e88ea6a2eef 100644
--- a/net/netfilter/nf_flow_table_path.c
+++ b/net/netfilter/nf_flow_table_path.c
@@ -86,6 +86,7 @@ struct nft_forward_info {
u8 ingress_vlans;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
+ bool needs_gso_segment;
enum flow_offload_xmit_type xmit_type;
};
@@ -138,8 +139,11 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
path->encap.proto;
info->num_encaps++;
}
- if (path->type == DEV_PATH_PPPOE)
+ if (path->type == DEV_PATH_PPPOE) {
memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+ info->needs_gso_segment = 1;
+ }
break;
case DEV_PATH_BRIDGE:
if (is_zero_ether_addr(info->h_source))
@@ -279,6 +283,7 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
route->tuple[dir].xmit_type = info.xmit_type;
}
+ route->tuple[dir].out.needs_gso_segment = info.needs_gso_segment;
}
int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d20ce5c36d31..87387adbca65 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -407,6 +407,7 @@ static void nft_netdev_unregister_trans_hook(struct net *net,
}
static void nft_netdev_unregister_hooks(struct net *net,
+ const struct nft_table *table,
struct list_head *hook_list,
bool release_netdev)
{
@@ -414,8 +415,10 @@ static void nft_netdev_unregister_hooks(struct net *net,
struct nf_hook_ops *ops;
list_for_each_entry_safe(hook, next, hook_list, list) {
- list_for_each_entry(ops, &hook->ops_list, list)
- nf_unregister_net_hook(net, ops);
+ if (!(table->flags & NFT_TABLE_F_DORMANT)) {
+ list_for_each_entry(ops, &hook->ops_list, list)
+ nf_unregister_net_hook(net, ops);
+ }
if (release_netdev)
nft_netdev_hook_unlink_free_rcu(hook);
}
@@ -452,20 +455,25 @@ static void __nf_tables_unregister_hook(struct net *net,
struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
- if (table->flags & NFT_TABLE_F_DORMANT ||
- !nft_is_base_chain(chain))
+ if (!nft_is_base_chain(chain))
return;
basechain = nft_base_chain(chain);
ops = &basechain->ops;
+ /* must also be called for dormant tables */
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) {
+ nft_netdev_unregister_hooks(net, table, &basechain->hook_list,
+ release_netdev);
+ return;
+ }
+
+ if (table->flags & NFT_TABLE_F_DORMANT)
+ return;
+
if (basechain->type->ops_unregister)
return basechain->type->ops_unregister(net, ops);
- if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
- nft_netdev_unregister_hooks(net, &basechain->hook_list,
- release_netdev);
- else
- nf_unregister_net_hook(net, &basechain->ops);
+ nf_unregister_net_hook(net, &basechain->ops);
}
static void nf_tables_unregister_hook(struct net *net,
@@ -4205,6 +4213,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
struct nft_chain *chain;
struct nft_ctx ctx = {
.net = net,
+ .table = (struct nft_table *)table,
.family = table->family,
};
int err = 0;
@@ -11281,11 +11290,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- if (!(table->flags & NFT_TABLE_F_DORMANT)) {
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
- }
+ nft_netdev_unregister_hooks(net, table,
+ &nft_trans_chain_hooks(trans),
+ true);
free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 5ddd5b6e135f..8ab186f86dd4 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -153,7 +153,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
ptr = skb_network_header(skb) + pkt->nhoff;
else {
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
return false;
ptr = skb->data + nft_thoff(pkt);
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index decc725a33c2..0caa9304d2d0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -261,10 +261,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return ret;
}
- nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
-
nft_compat_wait_for_destructors(ctx->net);
+ nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
+
ret = xt_check_target(&par, size, proto, inv);
if (ret < 0) {
if (ret == -ENOENT) {
@@ -353,8 +353,6 @@ nla_put_failure:
static int nft_target_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
- struct xt_target *target = expr->ops->data;
- unsigned int hook_mask = 0;
int ret;
if (ctx->family != NFPROTO_IPV4 &&
@@ -377,11 +375,21 @@ static int nft_target_validate(const struct nft_ctx *ctx,
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
const struct nf_hook_ops *ops = &basechain->ops;
+ unsigned int hook_mask = 1 << ops->hooknum;
+ struct xt_target *target = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+ struct xt_tgchk_param par;
+ union nft_entry e = {};
- hook_mask = 1 << ops->hooknum;
if (target->hooks && !(hook_mask & target->hooks))
return -EINVAL;
+ nft_target_set_tgchk_param(&par, ctx, target, info, &e, 0, false);
+
+ ret = xt_check_hooks_target(&par);
+ if (ret < 0)
+ return ret;
+
ret = nft_compat_chain_validate_dependency(ctx, target->table);
if (ret < 0)
return ret;
@@ -515,10 +523,10 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return ret;
}
- nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
-
nft_compat_wait_for_destructors(ctx->net);
+ nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
+
return xt_check_match(&par, size, proto, inv);
}
@@ -614,8 +622,6 @@ static int nft_match_large_dump(struct sk_buff *skb,
static int nft_match_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
- struct xt_match *match = expr->ops->data;
- unsigned int hook_mask = 0;
int ret;
if (ctx->family != NFPROTO_IPV4 &&
@@ -638,11 +644,30 @@ static int nft_match_validate(const struct nft_ctx *ctx,
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
const struct nf_hook_ops *ops = &basechain->ops;
+ unsigned int hook_mask = 1 << ops->hooknum;
+ struct xt_match *match = expr->ops->data;
+ size_t size = XT_ALIGN(match->matchsize);
+ struct xt_mtchk_param par;
+ union nft_entry e = {};
+ void *info;
- hook_mask = 1 << ops->hooknum;
if (match->hooks && !(hook_mask & match->hooks))
return -EINVAL;
+ if (NFT_EXPR_SIZE(size) > NFT_MATCH_LARGE_THRESH) {
+ struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+ info = priv->info;
+ } else {
+ info = nft_expr_priv(expr);
+ }
+
+ nft_match_set_mtchk_param(&par, ctx, match, info, &e, 0, false);
+
+ ret = xt_check_hooks_match(&par);
+ if (ret < 0)
+ return ret;
+
ret = nft_compat_chain_validate_dependency(ctx, match->table);
if (ret < 0)
return ret;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 0407d6f708ae..e6a07c0df207 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -376,7 +376,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
- if (pkt->tprot != IPPROTO_SCTP)
+ if (pkt->tprot != IPPROTO_SCTP || pkt->fragoff)
goto err;
do {
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 4bce36c3a6a0..b9e88d7cf308 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -95,12 +95,15 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
+ u8 *nf_dup_skb_recursion = nf_get_nf_dup_skb_recursion();
struct nft_fwd_neigh *priv = nft_expr_priv(expr);
void *addr = &regs->data[priv->sreg_addr];
int oif = regs->data[priv->sreg_dev];
unsigned int verdict = NF_STOLEN;
struct sk_buff *skb = pkt->skb;
+ int nhoff = skb_network_offset(skb);
struct net_device *dev;
+ unsigned int hh_len;
int neigh_table;
switch (priv->nfproto) {
@@ -111,7 +114,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
verdict = NFT_BREAK;
goto out;
}
- if (skb_try_make_writable(skb, sizeof(*iph))) {
+ if (skb_ensure_writable(skb, nhoff + sizeof(*iph))) {
verdict = NF_DROP;
goto out;
}
@@ -132,7 +135,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
verdict = NFT_BREAK;
goto out;
}
- if (skb_try_make_writable(skb, sizeof(*ip6h))) {
+ if (skb_ensure_writable(skb, nhoff + sizeof(*ip6h))) {
verdict = NF_DROP;
goto out;
}
@@ -151,13 +154,31 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
goto out;
}
+ if (*nf_dup_skb_recursion > NF_RECURSION_LIMIT) {
+ verdict = NF_DROP;
+ goto out;
+ }
+
dev = dev_get_by_index_rcu(nft_net(pkt), oif);
- if (dev == NULL)
- return;
+ if (dev == NULL) {
+ verdict = NF_DROP;
+ goto out;
+ }
+
+ hh_len = LL_RESERVED_SPACE(dev);
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb) {
+ verdict = NF_STOLEN;
+ goto out;
+ }
+ }
skb->dev = dev;
skb_clear_tstamp(skb);
+ (*nf_dup_skb_recursion)++;
neigh_xmit(neigh_table, dev, addr, skb);
+ (*nf_dup_skb_recursion)--;
out:
regs->verdict.code = verdict;
}
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index c02d5cb52143..45fe56da5044 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -33,7 +33,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
return;
}
- if (pkt->tprot != IPPROTO_TCP) {
+ if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) {
regs->verdict.code = NFT_BREAK;
return;
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index f2101af8c867..89be443734f6 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -30,8 +30,8 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
__be16 tport = 0;
struct sock *sk;
- if (pkt->tprot != IPPROTO_TCP &&
- pkt->tprot != IPPROTO_UDP) {
+ if ((pkt->tprot != IPPROTO_TCP &&
+ pkt->tprot != IPPROTO_UDP) || pkt->fragoff) {
regs->verdict.code = NFT_BREAK;
return;
}
@@ -97,8 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
memset(&taddr, 0, sizeof(taddr));
- if (pkt->tprot != IPPROTO_TCP &&
- pkt->tprot != IPPROTO_UDP) {
+ if ((pkt->tprot != IPPROTO_TCP &&
+ pkt->tprot != IPPROTO_UDP) || pkt->fragoff) {
regs->verdict.code = NFT_BREAK;
return;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9f837fb5ceb4..2c67c2e6b132 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -477,11 +477,9 @@ int xt_check_proc_name(const char *name, unsigned int size)
}
EXPORT_SYMBOL(xt_check_proc_name);
-int xt_check_match(struct xt_mtchk_param *par,
- unsigned int size, u16 proto, bool inv_proto)
+static int xt_check_match_common(struct xt_mtchk_param *par,
+ unsigned int size, u16 proto, bool inv_proto)
{
- int ret;
-
if (XT_ALIGN(par->match->matchsize) != size &&
par->match->matchsize != -1) {
/*
@@ -530,6 +528,14 @@ int xt_check_match(struct xt_mtchk_param *par,
par->match->proto);
return -EINVAL;
}
+
+ return 0;
+}
+
+static int xt_checkentry_match(struct xt_mtchk_param *par)
+{
+ int ret;
+
if (par->match->checkentry != NULL) {
ret = par->match->checkentry(par);
if (ret < 0)
@@ -538,8 +544,34 @@ int xt_check_match(struct xt_mtchk_param *par,
/* Flag up potential errors. */
return -EIO;
}
+
+ return 0;
+}
+
+int xt_check_hooks_match(struct xt_mtchk_param *par)
+{
+ if (par->match->check_hooks != NULL)
+ return par->match->check_hooks(par);
+
return 0;
}
+EXPORT_SYMBOL_GPL(xt_check_hooks_match);
+
+int xt_check_match(struct xt_mtchk_param *par,
+ unsigned int size, u16 proto, bool inv_proto)
+{
+ int ret;
+
+ ret = xt_check_match_common(par, size, proto, inv_proto);
+ if (ret < 0)
+ return ret;
+
+ ret = xt_check_hooks_match(par);
+ if (ret < 0)
+ return ret;
+
+ return xt_checkentry_match(par);
+}
EXPORT_SYMBOL_GPL(xt_check_match);
/** xt_check_entry_match - check that matches end before start of target
@@ -1012,11 +1044,9 @@ bool xt_find_jump_offset(const unsigned int *offsets,
}
EXPORT_SYMBOL(xt_find_jump_offset);
-int xt_check_target(struct xt_tgchk_param *par,
- unsigned int size, u16 proto, bool inv_proto)
+static int xt_check_target_common(struct xt_tgchk_param *par,
+ unsigned int size, u16 proto, bool inv_proto)
{
- int ret;
-
if (XT_ALIGN(par->target->targetsize) != size) {
pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n",
xt_prefix[par->family], par->target->name,
@@ -1061,6 +1091,23 @@ int xt_check_target(struct xt_tgchk_param *par,
par->target->proto);
return -EINVAL;
}
+
+ return 0;
+}
+
+int xt_check_hooks_target(struct xt_tgchk_param *par)
+{
+ if (par->target->check_hooks != NULL)
+ return par->target->check_hooks(par);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xt_check_hooks_target);
+
+static int xt_checkentry_target(struct xt_tgchk_param *par)
+{
+ int ret;
+
if (par->target->checkentry != NULL) {
ret = par->target->checkentry(par);
if (ret < 0)
@@ -1071,6 +1118,22 @@ int xt_check_target(struct xt_tgchk_param *par,
}
return 0;
}
+
+int xt_check_target(struct xt_tgchk_param *par,
+ unsigned int size, u16 proto, bool inv_proto)
+{
+ int ret;
+
+ ret = xt_check_target_common(par, size, proto, inv_proto);
+ if (ret < 0)
+ return ret;
+
+ ret = xt_check_hooks_target(par);
+ if (ret < 0)
+ return ret;
+
+ return xt_checkentry_target(par);
+}
EXPORT_SYMBOL_GPL(xt_check_target);
/**
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 498f5871c84a..d2aeacf94230 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -354,7 +354,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.revision = 1,
.targetsize = sizeof(struct xt_ct_target_info_v1),
- .usersize = offsetof(struct xt_ct_target_info, ct),
+ .usersize = offsetof(struct xt_ct_target_info_v1, ct),
.checkentry = xt_ct_tg_check_v1,
.destroy = xt_ct_tg_destroy_v1,
.target = xt_ct_target_v1,
@@ -366,7 +366,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.revision = 2,
.targetsize = sizeof(struct xt_ct_target_info_v1),
- .usersize = offsetof(struct xt_ct_target_info, ct),
+ .usersize = offsetof(struct xt_ct_target_info_v1, ct),
.checkentry = xt_ct_tg_check_v2,
.destroy = xt_ct_tg_destroy_v1,
.target = xt_ct_target_v1,
@@ -398,7 +398,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.family = NFPROTO_IPV6,
.revision = 1,
.targetsize = sizeof(struct xt_ct_target_info_v1),
- .usersize = offsetof(struct xt_ct_target_info, ct),
+ .usersize = offsetof(struct xt_ct_target_info_v1, ct),
.checkentry = xt_ct_tg_check_v1,
.destroy = xt_ct_tg_destroy_v1,
.target = xt_ct_target_v1,
@@ -410,7 +410,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.family = NFPROTO_IPV6,
.revision = 2,
.targetsize = sizeof(struct xt_ct_target_info_v1),
- .usersize = offsetof(struct xt_ct_target_info, ct),
+ .usersize = offsetof(struct xt_ct_target_info_v1, ct),
.checkentry = xt_ct_tg_check_v2,
.destroy = xt_ct_tg_destroy_v1,
.target = xt_ct_target_v1,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 116a885adb3c..80e1634bc51f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -247,6 +247,21 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
}
#endif
+static int tcpmss_tg4_check_hooks(const struct xt_tgchk_param *par)
+{
+ const struct xt_tcpmss_info *info = par->targinfo;
+
+ if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
+ (par->hook_mask & ~((1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING))) != 0) {
+ pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* Must specify -p tcp --syn */
static inline bool find_syn_match(const struct xt_entry_match *m)
{
@@ -262,17 +277,9 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
{
- const struct xt_tcpmss_info *info = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
const struct xt_entry_match *ematch;
- if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
- (par->hook_mask & ~((1 << NF_INET_FORWARD) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_POST_ROUTING))) != 0) {
- pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
- return -EINVAL;
- }
if (par->nft_compat)
return 0;
@@ -286,17 +293,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
{
- const struct xt_tcpmss_info *info = par->targinfo;
const struct ip6t_entry *e = par->entryinfo;
const struct xt_entry_match *ematch;
- if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
- (par->hook_mask & ~((1 << NF_INET_FORWARD) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_POST_ROUTING))) != 0) {
- pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
- return -EINVAL;
- }
if (par->nft_compat)
return 0;
@@ -312,6 +311,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = {
{
.family = NFPROTO_IPV4,
.name = "TCPMSS",
+ .check_hooks = tcpmss_tg4_check_hooks,
.checkentry = tcpmss_tg4_check,
.target = tcpmss_tg4,
.targetsize = sizeof(struct xt_tcpmss_info),
@@ -322,6 +322,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = {
{
.family = NFPROTO_IPV6,
.name = "TCPMSS",
+ .check_hooks = tcpmss_tg4_check_hooks,
.checkentry = tcpmss_tg6_check,
.target = tcpmss_tg6,
.targetsize = sizeof(struct xt_tcpmss_info),
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e4bea1d346cf..5f60e7298a1e 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -86,6 +86,9 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info *tgi = par->targinfo;
+ if (par->fragoff)
+ return NF_DROP;
+
return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport,
tgi->mark_mask, tgi->mark_value);
}
@@ -95,6 +98,9 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+ if (par->fragoff)
+ return NF_DROP;
+
return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport,
tgi->mark_mask, tgi->mark_value);
}
@@ -106,6 +112,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+ unsigned short fragoff = 0;
struct udphdr _hdr, *hp;
struct sock *sk;
const struct in6_addr *laddr;
@@ -113,8 +120,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
int thoff = 0;
int tproto;
- tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
- if (tproto < 0)
+ tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL);
+ if (tproto < 0 || fragoff)
return NF_DROP;
hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index a77088943107..913dbe3aa5e2 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -153,14 +153,10 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
}
-static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+static int addrtype_mt_check_hooks(const struct xt_mtchk_param *par)
{
- const char *errmsg = "both incoming and outgoing interface limitation cannot be selected";
struct xt_addrtype_info_v1 *info = par->matchinfo;
-
- if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
- info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
- goto err;
+ const char *errmsg;
if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN)) &&
@@ -176,6 +172,21 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
goto err;
}
+ return 0;
+err:
+ pr_info_ratelimited("%s\n", errmsg);
+ return -EINVAL;
+}
+
+static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+{
+ const char *errmsg = "both incoming and outgoing interface limitation cannot be selected";
+ struct xt_addrtype_info_v1 *info = par->matchinfo;
+
+ if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
+ info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
+ goto err;
+
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
if (par->family == NFPROTO_IPV6) {
if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
@@ -211,6 +222,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.revision = 1,
.match = addrtype_mt_v1,
+ .check_hooks = addrtype_mt_check_hooks,
.checkentry = addrtype_mt_checkentry_v1,
.matchsize = sizeof(struct xt_addrtype_info_v1),
.me = THIS_MODULE
@@ -221,6 +233,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV6,
.revision = 1,
.match = addrtype_mt_v1,
+ .check_hooks = addrtype_mt_check_hooks,
.checkentry = addrtype_mt_checkentry_v1,
.matchsize = sizeof(struct xt_addrtype_info_v1),
.me = THIS_MODULE
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
index 9520dd00070b..6d1a44ab5eee 100644
--- a/net/netfilter/xt_devgroup.c
+++ b/net/netfilter/xt_devgroup.c
@@ -33,14 +33,10 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
return true;
}
-static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
+static int devgroup_mt_check_hooks(const struct xt_mtchk_param *par)
{
const struct xt_devgroup_info *info = par->matchinfo;
- if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC |
- XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST))
- return -EINVAL;
-
if (info->flags & XT_DEVGROUP_MATCH_SRC &&
par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
@@ -56,9 +52,21 @@ static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
return 0;
}
+static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct xt_devgroup_info *info = par->matchinfo;
+
+ if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC |
+ XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST))
+ return -EINVAL;
+
+ return 0;
+}
+
static struct xt_match devgroup_mt_reg __read_mostly = {
.name = "devgroup",
.match = devgroup_mt,
+ .check_hooks = devgroup_mt_check_hooks,
.checkentry = devgroup_mt_checkentry,
.matchsize = sizeof(struct xt_devgroup_info),
.family = NFPROTO_UNSPEC,
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
index b96e8203ac54..a8503f5d26bf 100644
--- a/net/netfilter/xt_ecn.c
+++ b/net/netfilter/xt_ecn.c
@@ -30,6 +30,10 @@ static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par)
struct tcphdr _tcph;
const struct tcphdr *th;
+ /* this is fine for IPv6 as ecn_mt_check6() enforces -p tcp */
+ if (par->fragoff)
+ return false;
+
/* In practice, TCP match does this, so can't fail. But let's
* be good citizens.
*/
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 3bd127bfc114..2704b4b60d1e 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -658,6 +658,8 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
if (!(hinfo->cfg.mode &
(XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
return 0;
+ if (ntohs(ip_hdr(skb)->frag_off) & IP_OFFSET)
+ return -1;
nexthdr = ip_hdr(skb)->protocol;
break;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
@@ -681,7 +683,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
return 0;
nexthdr = ipv6_hdr(skb)->nexthdr;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
- if ((int)protoff < 0)
+ if ((int)protoff < 0 || ntohs(frag_off) & IP6_OFFSET)
return -1;
break;
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index dc9485854002..e8807caede68 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -27,6 +27,9 @@
static bool
xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
{
+ if (p->fragoff)
+ return false;
+
return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers);
}
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index d2b0b52434fa..dd98f758176c 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -91,14 +91,10 @@ match_outdev:
return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
}
-static int physdev_mt_check(const struct xt_mtchk_param *par)
+static int physdev_mt_check_hooks(const struct xt_mtchk_param *par)
{
const struct xt_physdev_info *info = par->matchinfo;
- static bool brnf_probed __read_mostly;
- if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
- info->bitmask & ~XT_PHYSDEV_OP_MASK)
- return -EINVAL;
if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) &&
(!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
@@ -107,6 +103,18 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
}
+ return 0;
+}
+
+static int physdev_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_physdev_info *info = par->matchinfo;
+ static bool brnf_probed __read_mostly;
+
+ if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
+ info->bitmask & ~XT_PHYSDEV_OP_MASK)
+ return -EINVAL;
+
#define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb)
if (info->bitmask & XT_PHYSDEV_OP_IN) {
if (info->physindev[0] == '\0')
@@ -141,6 +149,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = {
{
.name = "physdev",
.family = NFPROTO_IPV4,
+ .check_hooks = physdev_mt_check_hooks,
.checkentry = physdev_mt_check,
.match = physdev_mt,
.matchsize = sizeof(struct xt_physdev_info),
@@ -149,6 +158,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = {
{
.name = "physdev",
.family = NFPROTO_IPV6,
+ .check_hooks = physdev_mt_check_hooks,
.checkentry = physdev_mt_check,
.match = physdev_mt,
.matchsize = sizeof(struct xt_physdev_info),
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index b5fa65558318..ff54e3a8581e 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -126,13 +126,10 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
}
-static int policy_mt_check(const struct xt_mtchk_param *par)
+static int policy_mt_check_hooks(const struct xt_mtchk_param *par)
{
const struct xt_policy_info *info = par->matchinfo;
- const char *errmsg = "neither incoming nor outgoing policy selected";
-
- if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT)))
- goto err;
+ const char *errmsg;
if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
@@ -144,6 +141,21 @@ static int policy_mt_check(const struct xt_mtchk_param *par)
errmsg = "input policy not valid in POSTROUTING and OUTPUT";
goto err;
}
+
+ return 0;
+err:
+ pr_info_ratelimited("%s\n", errmsg);
+ return -EINVAL;
+}
+
+static int policy_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_policy_info *info = par->matchinfo;
+ const char *errmsg = "neither incoming nor outgoing policy selected";
+
+ if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT)))
+ goto err;
+
if (info->len > XT_POLICY_MAX_ELEM) {
errmsg = "too many policy elements";
goto err;
@@ -158,6 +170,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = {
{
.name = "policy",
.family = NFPROTO_IPV4,
+ .check_hooks = policy_mt_check_hooks,
.checkentry = policy_mt_check,
.match = policy_mt,
.matchsize = sizeof(struct xt_policy_info),
@@ -166,6 +179,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = {
{
.name = "policy",
.family = NFPROTO_IPV6,
+ .check_hooks = policy_mt_check_hooks,
.checkentry = policy_mt_check,
.match = policy_mt,
.matchsize = sizeof(struct xt_policy_info),
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 731bc2cafae4..4ae04bba9358 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -431,6 +431,29 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
}
static int
+set_target_v3_check_hooks(const struct xt_tgchk_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ if (strncmp(par->table, "mangle", 7)) {
+ pr_info_ratelimited("--map-set only usable from mangle table\n");
+ return -EINVAL;
+ }
+ if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+ (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+ (par->hook_mask & ~(1 << NF_INET_FORWARD |
+ 1 << NF_INET_LOCAL_OUT |
+ 1 << NF_INET_POST_ROUTING))) {
+ pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int
set_target_v3_checkentry(const struct xt_tgchk_param *par)
{
const struct xt_set_info_target_v3 *info = par->targinfo;
@@ -459,20 +482,6 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
}
if (info->map_set.index != IPSET_INVALID_ID) {
- if (strncmp(par->table, "mangle", 7)) {
- pr_info_ratelimited("--map-set only usable from mangle table\n");
- ret = -EINVAL;
- goto cleanup_del;
- }
- if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
- (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
- (par->hook_mask & ~(1 << NF_INET_FORWARD |
- 1 << NF_INET_LOCAL_OUT |
- 1 << NF_INET_POST_ROUTING))) {
- pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
- ret = -EINVAL;
- goto cleanup_del;
- }
index = ip_set_nfnl_get_byindex(par->net,
info->map_set.index);
if (index == IPSET_INVALID_ID) {
@@ -672,6 +681,7 @@ static struct xt_target set_targets[] __read_mostly = {
.family = NFPROTO_IPV4,
.target = set_target_v3,
.targetsize = sizeof(struct xt_set_info_target_v3),
+ .check_hooks = set_target_v3_check_hooks,
.checkentry = set_target_v3_checkentry,
.destroy = set_target_v3_destroy,
.me = THIS_MODULE
@@ -682,6 +692,7 @@ static struct xt_target set_targets[] __read_mostly = {
.family = NFPROTO_IPV6,
.target = set_target_v3,
.targetsize = sizeof(struct xt_set_info_target_v3),
+ .check_hooks = set_target_v3_check_hooks,
.checkentry = set_target_v3_checkentry,
.destroy = set_target_v3_destroy,
.me = THIS_MODULE
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 0d32d4841cb3..b9da8269161d 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -32,6 +32,10 @@ tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
u8 _opt[15 * 4 - sizeof(_tcph)];
unsigned int i, optlen;
+ /* this is fine for IPv6 as xt_tcpmss enforces -p tcp */
+ if (par->fragoff)
+ return false;
+
/* If we don't have the whole header, drop packet. */
th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
if (th == NULL)
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index b10e1602c6b1..cb5ea4424ffc 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -97,6 +97,9 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
goto error;
}
+ vport->dev = dev;
+ netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL);
+
rtnl_unlock();
return vport;
error:
@@ -111,7 +114,7 @@ static struct vport *geneve_create(const struct vport_parms *parms)
if (IS_ERR(vport))
return vport;
- return ovs_netdev_link(vport, parms->name);
+ return ovs_netdev_link(vport, true);
}
static struct vport_ops ovs_geneve_vport_ops = {
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 4014c9b5eb79..6cb5a697b396 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -63,6 +63,9 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
return ERR_PTR(err);
}
+ vport->dev = dev;
+ netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL);
+
rtnl_unlock();
return vport;
}
@@ -75,7 +78,7 @@ static struct vport *gre_create(const struct vport_parms *parms)
if (IS_ERR(vport))
return vport;
- return ovs_netdev_link(vport, parms->name);
+ return ovs_netdev_link(vport, true);
}
static struct vport_ops ovs_gre_vport_ops = {
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 12055af832dc..c42642075685 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -73,37 +73,21 @@ static struct net_device *get_dpdev(const struct datapath *dp)
return local->dev;
}
-struct vport *ovs_netdev_link(struct vport *vport, const char *name)
+struct vport *ovs_netdev_link(struct vport *vport, bool tunnel)
{
int err;
- vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name);
- if (!vport->dev) {
+ if (WARN_ON_ONCE(!vport->dev)) {
err = -ENODEV;
goto error_free_vport;
}
- /* Ensure that the device exists and that the provided
- * name is not one of its aliases.
- */
- if (strcmp(name, ovs_vport_name(vport))) {
- err = -ENODEV;
- goto error_put;
- }
- netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL);
- if (vport->dev->flags & IFF_LOOPBACK ||
- (vport->dev->type != ARPHRD_ETHER &&
- vport->dev->type != ARPHRD_NONE) ||
- ovs_is_internal_dev(vport->dev)) {
- err = -EINVAL;
- goto error_put;
- }
rtnl_lock();
err = netdev_master_upper_dev_link(vport->dev,
get_dpdev(vport->dp),
NULL, NULL, NULL);
if (err)
- goto error_unlock;
+ goto error_put_unlock;
err = netdev_rx_handler_register(vport->dev, netdev_frame_hook,
vport);
@@ -119,10 +103,11 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
error_master_upper_dev_unlink:
netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp));
-error_unlock:
- rtnl_unlock();
-error_put:
+error_put_unlock:
+ if (tunnel && vport->dev->reg_state == NETREG_REGISTERED)
+ rtnl_delete_link(vport->dev, 0, NULL);
netdev_put(vport->dev, &vport->dev_tracker);
+ rtnl_unlock();
error_free_vport:
ovs_vport_free(vport);
return ERR_PTR(err);
@@ -132,12 +117,39 @@ EXPORT_SYMBOL_GPL(ovs_netdev_link);
static struct vport *netdev_create(const struct vport_parms *parms)
{
struct vport *vport;
+ int err;
vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms);
if (IS_ERR(vport))
return vport;
- return ovs_netdev_link(vport, parms->name);
+ vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
+ if (!vport->dev) {
+ err = -ENODEV;
+ goto error_free_vport;
+ }
+ netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL);
+
+ /* Ensure that the provided name is not an alias. */
+ if (strcmp(parms->name, ovs_vport_name(vport))) {
+ err = -ENODEV;
+ goto error_put;
+ }
+
+ if (vport->dev->flags & IFF_LOOPBACK ||
+ (vport->dev->type != ARPHRD_ETHER &&
+ vport->dev->type != ARPHRD_NONE) ||
+ ovs_is_internal_dev(vport->dev)) {
+ err = -EINVAL;
+ goto error_put;
+ }
+
+ return ovs_netdev_link(vport, false);
+error_put:
+ netdev_put(vport->dev, &vport->dev_tracker);
+error_free_vport:
+ ovs_vport_free(vport);
+ return ERR_PTR(err);
}
static void vport_netdev_free(struct rcu_head *rcu)
@@ -196,9 +208,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
*/
if (vport->dev->reg_state == NETREG_REGISTERED)
rtnl_delete_link(vport->dev, 0, NULL);
- rtnl_unlock();
+ /* We can't put the device reference yet, since it can still be in
+ * use, but rtnl_unlock()->netdev_run_todo() will block until all
+ * the references are released, so the RCU call must be before it.
+ */
call_rcu(&vport->rcu, vport_netdev_free);
+ rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index c5d83a43bfc4..6c0d7366f986 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -13,7 +13,7 @@
struct vport *ovs_netdev_get_vport(struct net_device *dev);
-struct vport *ovs_netdev_link(struct vport *vport, const char *name);
+struct vport *ovs_netdev_link(struct vport *vport, bool tunnel);
void ovs_netdev_detach_dev(struct vport *);
int __init ovs_netdev_init(void);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 0b881b043bcf..c1b37b50d29e 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -126,6 +126,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
goto error;
}
+ vport->dev = dev;
+ netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL);
+
rtnl_unlock();
return vport;
error:
@@ -140,7 +143,7 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
if (IS_ERR(vport))
return vport;
- return ovs_netdev_link(vport, parms->name);
+ return ovs_netdev_link(vport, true);
}
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c
index 9508b6c38003..e45549f08eef 100644
--- a/net/psp/psp_main.c
+++ b/net/psp/psp_main.c
@@ -263,15 +263,16 @@ EXPORT_SYMBOL(psp_dev_encapsulate);
/* Receive handler for PSP packets.
*
- * Presently it accepts only already-authenticated packets and does not
- * support optional fields, such as virtualization cookies. The caller should
- * ensure that skb->data is pointing to the mac header, and that skb->mac_len
- * is set. This function does not currently adjust skb->csum (CHECKSUM_COMPLETE
- * is not supported).
+ * Accepts only already-authenticated packets. The full PSP header is
+ * stripped according to psph->hdrlen; any optional fields it advertises
+ * (virtualization cookies, etc.) are ignored and discarded along with the
+ * rest of the header. The caller should ensure that skb->data is pointing
+ * to the mac header, and that skb->mac_len is set. This function does not
+ * currently adjust skb->csum (CHECKSUM_COMPLETE is not supported).
*/
int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv)
{
- int l2_hlen = 0, l3_hlen, encap;
+ int l2_hlen = 0, l3_hlen, encap, psp_hlen;
struct psp_skb_ext *pse;
struct psphdr *psph;
struct ethhdr *eth;
@@ -312,18 +313,36 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv)
if (unlikely(uh->dest != htons(PSP_DEFAULT_UDP_PORT)))
return -EINVAL;
- pse = skb_ext_add(skb, SKB_EXT_PSP);
- if (!pse)
+ psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen +
+ sizeof(struct udphdr));
+
+ /* Strip the full PSP header per psph->hdrlen; VC/options are pulled
+ * into the linear region only so they can be discarded with the
+ * rest of the header.
+ */
+ psp_hlen = (psph->hdrlen + 1) * 8;
+
+ if (unlikely(psp_hlen < sizeof(struct psphdr)))
+ return -EINVAL;
+
+ if (psp_hlen > sizeof(struct psphdr) &&
+ !pskb_may_pull(skb, l2_hlen + l3_hlen +
+ sizeof(struct udphdr) + psp_hlen))
return -EINVAL;
psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen +
sizeof(struct udphdr));
+
+ pse = skb_ext_add(skb, SKB_EXT_PSP);
+ if (!pse)
+ return -EINVAL;
+
pse->spi = psph->spi;
pse->dev_id = dev_id;
pse->generation = generation;
pse->version = FIELD_GET(PSPHDR_VERFL_VERSION, psph->verfl);
- encap = PSP_ENCAP_HLEN;
+ encap = sizeof(struct udphdr) + psp_hlen;
encap += strip_icv ? PSP_TRL_SIZE : 0;
if (proto == htons(ETH_P_IP)) {
@@ -340,8 +359,9 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv)
ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) - encap);
}
- memmove(skb->data + PSP_ENCAP_HLEN, skb->data, l2_hlen + l3_hlen);
- skb_pull(skb, PSP_ENCAP_HLEN);
+ memmove(skb->data + sizeof(struct udphdr) + psp_hlen,
+ skb->data, l2_hlen + l3_hlen);
+ skb_pull(skb, sizeof(struct udphdr) + psp_hlen);
if (strip_icv)
pskb_trim(skb, skb->len - PSP_TRL_SIZE);
diff --git a/net/rds/message.c b/net/rds/message.c
index eaa6f22601a4..25fedcb3cd00 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -131,24 +131,34 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
*/
static void rds_message_purge(struct rds_message *rm)
{
+ struct rds_znotifier *znotifier;
unsigned long i, flags;
- bool zcopy = false;
+ bool zcopy;
if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
return;
spin_lock_irqsave(&rm->m_rs_lock, flags);
+ znotifier = rm->data.op_mmp_znotifier;
+ rm->data.op_mmp_znotifier = NULL;
+ zcopy = !!znotifier;
+
if (rm->m_rs) {
struct rds_sock *rs = rm->m_rs;
- if (rm->data.op_mmp_znotifier) {
- zcopy = true;
- rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+ if (znotifier) {
+ rds_rm_zerocopy_callback(rs, znotifier);
rds_wake_sk_sleep(rs);
- rm->data.op_mmp_znotifier = NULL;
}
sock_put(rds_rs_to_sk(rs));
rm->m_rs = NULL;
+ } else if (znotifier) {
+ /*
+ * Zerocopy can fail before the message is queued on the
+ * socket, so there is no rs to carry the notification.
+ */
+ mm_unaccount_pinned_pages(&znotifier->z_mmp);
+ kfree(rds_info_from_znotifier(znotifier));
}
spin_unlock_irqrestore(&rm->m_rs_lock, flags);
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 13c6d1869a14..5862933be8d7 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust);
* Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
*/
-static void cobalt_newton_step(struct cobalt_vars *vars)
+static void cobalt_newton_step(struct cobalt_vars *vars, u32 count)
{
u32 invsqrt, invsqrt2;
u64 val;
invsqrt = vars->rec_inv_sqrt;
invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
- val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+ val = (3LL << 32) - ((u64)count * invsqrt2);
val >>= 2; /* avoid overflow in following multiply */
val = (val * invsqrt) >> (32 - 2 + 1);
@@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
vars->rec_inv_sqrt = val;
}
-static void cobalt_invsqrt(struct cobalt_vars *vars)
+static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count)
{
- if (vars->count < REC_INV_SQRT_CACHE)
- vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
+ if (count < REC_INV_SQRT_CACHE)
+ vars->rec_inv_sqrt = inv_sqrt_cache[count];
else
- cobalt_newton_step(vars);
+ cobalt_newton_step(vars, count);
}
static void cobalt_vars_init(struct cobalt_vars *vars)
@@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars,
bool up = false;
if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
- up = !vars->p_drop;
- vars->p_drop += p->p_inc;
- if (vars->p_drop < p->p_inc)
- vars->p_drop = ~0;
- vars->blue_timer = now;
- }
- vars->dropping = true;
- vars->drop_next = now;
+ u32 p_drop = vars->p_drop;
+
+ up = !p_drop;
+ p_drop += p->p_inc;
+ if (p_drop < p->p_inc)
+ p_drop = ~0;
+ WRITE_ONCE(vars->p_drop, p_drop);
+ WRITE_ONCE(vars->blue_timer, now);
+ }
+ WRITE_ONCE(vars->dropping, true);
+ WRITE_ONCE(vars->drop_next, now);
if (!vars->count)
- vars->count = 1;
+ WRITE_ONCE(vars->count, 1);
return up;
}
@@ -475,20 +478,20 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars,
if (vars->p_drop &&
ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
if (vars->p_drop < p->p_dec)
- vars->p_drop = 0;
+ WRITE_ONCE(vars->p_drop, 0);
else
- vars->p_drop -= p->p_dec;
- vars->blue_timer = now;
+ WRITE_ONCE(vars->p_drop, vars->p_drop - p->p_dec);
+ WRITE_ONCE(vars->blue_timer, now);
down = !vars->p_drop;
}
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
- vars->count--;
- cobalt_invsqrt(vars);
- vars->drop_next = cobalt_control(vars->drop_next,
- p->interval,
- vars->rec_inv_sqrt);
+ WRITE_ONCE(vars->count, vars->count - 1);
+ cobalt_invsqrt(vars, vars->count);
+ WRITE_ONCE(vars->drop_next,
+ cobalt_control(vars->drop_next, p->interval,
+ vars->rec_inv_sqrt));
}
return down;
@@ -507,6 +510,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
bool next_due, over_target;
ktime_t schedule;
u64 sojourn;
+ u32 count;
/* The 'schedule' variable records, in its sign, whether 'now' is before or
* after 'drop_next'. This allows 'drop_next' to be updated before the next
@@ -528,21 +532,22 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
over_target = sojourn > p->target &&
sojourn > p->mtu_time * bulk_flows * 2 &&
sojourn > p->mtu_time * 4;
- next_due = vars->count && ktime_to_ns(schedule) >= 0;
+ count = vars->count;
+ next_due = count && ktime_to_ns(schedule) >= 0;
vars->ecn_marked = false;
if (over_target) {
if (!vars->dropping) {
- vars->dropping = true;
- vars->drop_next = cobalt_control(now,
- p->interval,
- vars->rec_inv_sqrt);
+ WRITE_ONCE(vars->dropping, true);
+ WRITE_ONCE(vars->drop_next,
+ cobalt_control(now, p->interval,
+ vars->rec_inv_sqrt));
}
- if (!vars->count)
- vars->count = 1;
+ if (!count)
+ count = 1;
} else if (vars->dropping) {
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
}
if (next_due && vars->dropping) {
@@ -550,23 +555,23 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
if (!(vars->ecn_marked = INET_ECN_set_ce(skb)))
reason = QDISC_DROP_CONGESTED;
- vars->count++;
- if (!vars->count)
- vars->count--;
- cobalt_invsqrt(vars);
- vars->drop_next = cobalt_control(vars->drop_next,
- p->interval,
- vars->rec_inv_sqrt);
+ count++;
+ if (!count)
+ count--;
+ cobalt_invsqrt(vars, count);
+ WRITE_ONCE(vars->drop_next,
+ cobalt_control(vars->drop_next, p->interval,
+ vars->rec_inv_sqrt));
schedule = ktime_sub(now, vars->drop_next);
} else {
while (next_due) {
- vars->count--;
- cobalt_invsqrt(vars);
- vars->drop_next = cobalt_control(vars->drop_next,
- p->interval,
- vars->rec_inv_sqrt);
+ count--;
+ cobalt_invsqrt(vars, count);
+ WRITE_ONCE(vars->drop_next,
+ cobalt_control(vars->drop_next, p->interval,
+ vars->rec_inv_sqrt));
schedule = ktime_sub(now, vars->drop_next);
- next_due = vars->count && ktime_to_ns(schedule) >= 0;
+ next_due = count && ktime_to_ns(schedule) >= 0;
}
}
@@ -575,11 +580,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
get_random_u32() < vars->p_drop)
reason = QDISC_DROP_FLOOD_PROTECTION;
+ WRITE_ONCE(vars->count, count);
/* Overload the drop_next field as an activity timeout */
- if (!vars->count)
- vars->drop_next = ktime_add_ns(now, p->interval);
+ if (!count)
+ WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval));
else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC)
- vars->drop_next = now;
+ WRITE_ONCE(vars->drop_next, now);
return reason;
}
@@ -914,7 +920,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow)
struct sk_buff *skb = flow->head;
if (skb) {
- flow->head = skb->next;
+ WRITE_ONCE(flow->head, skb->next);
skb_mark_not_on_list(skb);
}
@@ -926,7 +932,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow)
static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
{
if (!flow->head)
- flow->head = skb;
+ WRITE_ONCE(flow->head, skb);
else
flow->tail->next = skb;
flow->tail = skb;
@@ -1357,7 +1363,7 @@ found:
if (elig_ack_prev)
elig_ack_prev->next = elig_ack->next;
else
- flow->head = elig_ack->next;
+ WRITE_ONCE(flow->head, elig_ack->next);
skb_mark_not_on_list(elig_ack);
@@ -1595,11 +1601,11 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
len = qdisc_pkt_len(skb);
q->buffer_used -= skb->truesize;
- b->backlogs[idx] -= len;
WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
+ WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len);
sch->qstats.backlog -= len;
- flow->dropped++;
+ WRITE_ONCE(flow->dropped, flow->dropped + 1);
WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
if (q->config->rate_flags & CAKE_FLAG_INGRESS)
@@ -1824,11 +1830,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
/* stats */
- b->backlogs[idx] += slen;
sch->qstats.backlog += slen;
q->avg_window_bytes += slen;
WRITE_ONCE(b->bytes, b->bytes + slen);
WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen);
+ WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen);
qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen);
consume_skb(skb);
@@ -1861,11 +1867,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* stats */
WRITE_ONCE(b->packets, b->packets + 1);
- b->backlogs[idx] += len - ack_pkt_len;
sch->qstats.backlog += len - ack_pkt_len;
q->avg_window_bytes += len - ack_pkt_len;
WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len);
WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len);
+ WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len);
}
if (q->overflow_timeout)
@@ -1924,7 +1930,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
flow->set = CAKE_SET_SPARSE;
WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count + 1);
- flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode);
+ WRITE_ONCE(flow->deficit, cake_get_flow_quantum(b, flow, q->config->flow_mode));
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
/* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation.
@@ -1977,7 +1983,7 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
if (flow->head) {
skb = dequeue_head(flow);
len = qdisc_pkt_len(skb);
- b->backlogs[q->cur_flow] -= len;
+ WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len);
WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
sch->qstats.backlog -= len;
q->buffer_used -= skb->truesize;
@@ -2166,7 +2172,8 @@ retry:
}
}
- flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode);
+ WRITE_ONCE(flow->deficit,
+ flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode));
list_move_tail(&flow->flowchain, &b->old_flows);
goto retry;
@@ -2232,10 +2239,10 @@ retry:
if (q->config->rate_flags & CAKE_FLAG_INGRESS) {
len = cake_advance_shaper(q, b, skb,
now, true);
- flow->deficit -= len;
+ WRITE_ONCE(flow->deficit, flow->deficit - len);
b->tin_deficit -= len;
}
- flow->dropped++;
+ WRITE_ONCE(flow->dropped, flow->dropped + 1);
WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_qstats_drop(sch);
@@ -2259,7 +2266,7 @@ retry:
delay < b->base_delay ? 2 : 8));
len = cake_advance_shaper(q, b, skb, now, false);
- flow->deficit -= len;
+ WRITE_ONCE(flow->deficit, flow->deficit - len);
b->tin_deficit -= len;
if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
@@ -3137,7 +3144,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
flow = &b->flows[idx % CAKE_QUEUES];
- if (flow->head) {
+ if (READ_ONCE(flow->head)) {
sch_tree_lock(sch);
skb = flow->head;
while (skb) {
@@ -3146,13 +3153,15 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
}
sch_tree_unlock(sch);
}
- qs.backlog = b->backlogs[idx % CAKE_QUEUES];
- qs.drops = flow->dropped;
+ qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]);
+ qs.drops = READ_ONCE(flow->dropped);
}
if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
return -1;
if (flow) {
ktime_t now = ktime_get();
+ bool dropping;
+ u32 p_drop;
stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
if (!stats)
@@ -3167,21 +3176,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
goto nla_put_failure; \
} while (0)
- PUT_STAT_S32(DEFICIT, flow->deficit);
- PUT_STAT_U32(DROPPING, flow->cvars.dropping);
- PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
- PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
- if (flow->cvars.p_drop) {
+ PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit));
+ dropping = READ_ONCE(flow->cvars.dropping);
+ PUT_STAT_U32(DROPPING, dropping);
+ PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count));
+ p_drop = READ_ONCE(flow->cvars.p_drop);
+ PUT_STAT_U32(P_DROP, p_drop);
+ if (p_drop) {
PUT_STAT_S32(BLUE_TIMER_US,
ktime_to_us(
ktime_sub(now,
- flow->cvars.blue_timer)));
+ READ_ONCE(flow->cvars.blue_timer))));
}
- if (flow->cvars.dropping) {
+ if (dropping) {
PUT_STAT_S32(DROP_NEXT_US,
ktime_to_us(
ktime_sub(now,
- flow->cvars.drop_next)));
+ READ_ONCE(flow->cvars.drop_next))));
}
if (nla_nest_end(d->skb, stats) < 0)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 0664b2f2d6f2..24db54684e8a 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -117,7 +117,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow)
{
struct sk_buff *skb = flow->head;
- flow->head = skb->next;
+ WRITE_ONCE(flow->head, skb->next);
skb_mark_not_on_list(skb);
return skb;
}
@@ -127,7 +127,7 @@ static inline void flow_queue_add(struct fq_codel_flow *flow,
struct sk_buff *skb)
{
if (flow->head == NULL)
- flow->head = skb;
+ WRITE_ONCE(flow->head, skb);
else
flow->tail->next = skb;
flow->tail = skb;
@@ -173,8 +173,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
} while (++i < max_packets && len < threshold);
/* Tell codel to increase its signal strength also */
- flow->cvars.count += i;
- q->backlogs[idx] -= len;
+ WRITE_ONCE(flow->cvars.count, flow->cvars.count + i);
+ WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] - len);
q->memory_usage -= mem;
sch->qstats.drops += i;
sch->qstats.backlog -= len;
@@ -204,13 +204,13 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
codel_set_enqueue_time(skb);
flow = &q->flows[idx];
flow_queue_add(flow, skb);
- q->backlogs[idx] += qdisc_pkt_len(skb);
+ WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] + qdisc_pkt_len(skb));
qdisc_qstats_backlog_inc(sch, skb);
if (list_empty(&flow->flowchain)) {
list_add_tail(&flow->flowchain, &q->new_flows);
q->new_flow_count++;
- flow->deficit = q->quantum;
+ WRITE_ONCE(flow->deficit, q->quantum);
}
get_codel_cb(skb)->mem_usage = skb->truesize;
q->memory_usage += get_codel_cb(skb)->mem_usage;
@@ -263,7 +263,8 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
flow = container_of(vars, struct fq_codel_flow, cvars);
if (flow->head) {
skb = dequeue_head(flow);
- q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
+ WRITE_ONCE(q->backlogs[flow - q->flows],
+ q->backlogs[flow - q->flows] - qdisc_pkt_len(skb));
q->memory_usage -= get_codel_cb(skb)->mem_usage;
sch->q.qlen--;
sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -296,7 +297,7 @@ begin:
flow = list_first_entry(head, struct fq_codel_flow, flowchain);
if (flow->deficit <= 0) {
- flow->deficit += q->quantum;
+ WRITE_ONCE(flow->deficit, flow->deficit + q->quantum);
list_move_tail(&flow->flowchain, &q->old_flows);
goto begin;
}
@@ -314,7 +315,7 @@ begin:
goto begin;
}
qdisc_bstats_update(sch, skb);
- flow->deficit -= qdisc_pkt_len(skb);
+ WRITE_ONCE(flow->deficit, flow->deficit - qdisc_pkt_len(skb));
if (q->cstats.drop_count) {
qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
@@ -328,7 +329,7 @@ begin:
static void fq_codel_flow_purge(struct fq_codel_flow *flow)
{
rtnl_kfree_skbs(flow->head, flow->tail);
- flow->head = NULL;
+ WRITE_ONCE(flow->head, NULL);
}
static void fq_codel_reset(struct Qdisc *sch)
@@ -656,21 +657,21 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
memset(&xstats, 0, sizeof(xstats));
xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
- xstats.class_stats.deficit = flow->deficit;
+ xstats.class_stats.deficit = READ_ONCE(flow->deficit);
xstats.class_stats.ldelay =
- codel_time_to_us(flow->cvars.ldelay);
- xstats.class_stats.count = flow->cvars.count;
- xstats.class_stats.lastcount = flow->cvars.lastcount;
- xstats.class_stats.dropping = flow->cvars.dropping;
- if (flow->cvars.dropping) {
- codel_tdiff_t delta = flow->cvars.drop_next -
+ codel_time_to_us(READ_ONCE(flow->cvars.ldelay));
+ xstats.class_stats.count = READ_ONCE(flow->cvars.count);
+ xstats.class_stats.lastcount = READ_ONCE(flow->cvars.lastcount);
+ xstats.class_stats.dropping = READ_ONCE(flow->cvars.dropping);
+ if (xstats.class_stats.dropping) {
+ codel_tdiff_t delta = READ_ONCE(flow->cvars.drop_next) -
codel_get_time();
xstats.class_stats.drop_next = (delta >= 0) ?
codel_time_to_us(delta) :
-codel_time_to_us(-delta);
}
- if (flow->head) {
+ if (READ_ONCE(flow->head)) {
sch_tree_lock(sch);
skb = flow->head;
while (skb) {
@@ -679,7 +680,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
}
sch_tree_unlock(sch);
}
- qs.backlog = q->backlogs[idx];
+ qs.backlog = READ_ONCE(q->backlogs[idx]);
qs.drops = 0;
}
if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index fb53fbf0e328..b41f2def2e2c 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -219,16 +219,14 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
* packet timestamp.
*/
if (!params->dq_rate_estimator) {
- vars->qdelay = now - pie_get_enqueue_time(skb);
+ WRITE_ONCE(vars->qdelay,
+ backlog ? now - pie_get_enqueue_time(skb) : 0);
if (vars->dq_tstamp != DTIME_INVALID)
dtime = now - vars->dq_tstamp;
vars->dq_tstamp = now;
- if (backlog == 0)
- vars->qdelay = 0;
-
if (dtime == 0)
return;
@@ -376,7 +374,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC)))
delta += MAX_PROB / (100 / 2);
- vars->prob += delta;
+ WRITE_ONCE(vars->prob, vars->prob + delta);
if (delta > 0) {
/* prevent overflow */
@@ -401,7 +399,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
if (qdelay == 0 && qdelay_old == 0 && update_prob)
/* Reduce drop probability to 98.4% */
- vars->prob -= vars->prob / 64;
+ WRITE_ONCE(vars->prob, vars->prob - vars->prob / 64);
WRITE_ONCE(vars->qdelay, qdelay);
vars->backlog_old = backlog;
@@ -501,7 +499,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct pie_sched_data *q = qdisc_priv(sch);
struct tc_pie_xstats st = {
- .prob = q->vars.prob << BITS_PER_BYTE,
+ .prob = READ_ONCE(q->vars.prob) << BITS_PER_BYTE,
.delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) /
NSEC_PER_USEC,
.packets_in = READ_ONCE(q->stats.packets_in),
@@ -512,7 +510,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
};
/* avg_dq_rate is only valid if dq_rate_estimator is enabled */
- st.dq_rate_estimating = q->params.dq_rate_estimator;
+ st.dq_rate_estimating = READ_ONCE(q->params.dq_rate_estimator);
/* unscale and return dq_rate in bytes per sec */
if (st.dq_rate_estimating)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 432b8a3000a5..4d0e44a2e7c6 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -162,7 +162,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch)
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
- skb = child->dequeue(child);
+ skb = qdisc_dequeue_peeked(child);
if (skb) {
qdisc_bstats_update(sch, skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index bd5ef561030f..d3ee8e5479b3 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -441,7 +441,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
struct Qdisc *child = q->qdisc;
struct sk_buff *skb;
- skb = child->dequeue(q->qdisc);
+ skb = qdisc_dequeue_peeked(child);
if (skb) {
qdisc_bstats_update(sch, skb);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c3f3181dba54..f39822babf88 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -225,7 +225,8 @@ static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
sfq_unlink(q, x, n, p);
- d = q->slots[x].qlen--;
+ d = q->slots[x].qlen;
+ WRITE_ONCE(q->slots[x].qlen, d - 1);
if (n == p && q->cur_depth == d)
q->cur_depth--;
sfq_link(q, x);
@@ -238,7 +239,8 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
sfq_unlink(q, x, n, p);
- d = ++q->slots[x].qlen;
+ d = q->slots[x].qlen + 1;
+ WRITE_ONCE(q->slots[x].qlen, d);
if (q->cur_depth < d)
q->cur_depth = d;
sfq_link(q, x);
@@ -298,7 +300,7 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
drop:
skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
len = qdisc_pkt_len(skb);
- slot->backlog -= len;
+ WRITE_ONCE(slot->backlog, slot->backlog - len);
sfq_dec(q, x);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
@@ -314,7 +316,7 @@ drop:
q->tail = NULL; /* no more active slots */
else
q->tail->next = slot->next;
- q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+ WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT);
goto drop;
}
@@ -364,10 +366,10 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
x = q->dep[0].next; /* get a free slot */
if (x >= SFQ_MAX_FLOWS)
return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_MAXFLOWS);
- q->ht[hash] = x;
+ WRITE_ONCE(q->ht[hash], x);
slot = &q->slots[x];
slot->hash = hash;
- slot->backlog = 0; /* should already be 0 anyway... */
+ WRITE_ONCE(slot->backlog, 0); /* should already be 0 anyway... */
red_set_vars(&slot->vars);
goto enqueue;
}
@@ -426,7 +428,7 @@ congestion_drop:
head = slot_dequeue_head(slot);
delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
sch->qstats.backlog -= delta;
- slot->backlog -= delta;
+ WRITE_ONCE(slot->backlog, slot->backlog - delta);
qdisc_drop_reason(head, sch, to_free, QDISC_DROP_FLOW_LIMIT);
slot_queue_add(slot, skb);
@@ -436,7 +438,7 @@ congestion_drop:
enqueue:
qdisc_qstats_backlog_inc(sch, skb);
- slot->backlog += qdisc_pkt_len(skb);
+ WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb));
slot_queue_add(slot, skb);
sfq_inc(q, x);
if (slot->qlen == 1) { /* The flow is new */
@@ -452,7 +454,7 @@ enqueue:
*/
q->tail = slot;
/* We could use a bigger initial quantum for new flows */
- slot->allot = q->quantum;
+ WRITE_ONCE(slot->allot, q->quantum);
}
if (++sch->q.qlen <= q->limit)
return NET_XMIT_SUCCESS;
@@ -489,7 +491,7 @@ next_slot:
slot = &q->slots[a];
if (slot->allot <= 0) {
q->tail = slot;
- slot->allot += q->quantum;
+ WRITE_ONCE(slot->allot, slot->allot + q->quantum);
goto next_slot;
}
skb = slot_dequeue_head(slot);
@@ -497,10 +499,10 @@ next_slot:
qdisc_bstats_update(sch, skb);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
- slot->backlog -= qdisc_pkt_len(skb);
+ WRITE_ONCE(slot->backlog, slot->backlog - qdisc_pkt_len(skb));
/* Is the slot empty? */
if (slot->qlen == 0) {
- q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+ WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT);
next_a = slot->next;
if (a == next_a) {
q->tail = NULL; /* no more active slots */
@@ -508,7 +510,7 @@ next_slot:
}
q->tail->next = next_a;
} else {
- slot->allot -= qdisc_pkt_len(skb);
+ WRITE_ONCE(slot->allot, slot->allot - qdisc_pkt_len(skb));
}
return skb;
}
@@ -549,9 +551,9 @@ static void sfq_rehash(struct Qdisc *sch)
sfq_dec(q, i);
__skb_queue_tail(&list, skb);
}
- slot->backlog = 0;
+ WRITE_ONCE(slot->backlog, 0);
red_set_vars(&slot->vars);
- q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+ WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT);
}
q->tail = NULL;
@@ -570,7 +572,7 @@ drop:
dropped++;
continue;
}
- q->ht[hash] = x;
+ WRITE_ONCE(q->ht[hash], x);
slot = &q->slots[x];
slot->hash = hash;
}
@@ -581,7 +583,7 @@ drop:
slot->vars.qavg = red_calc_qavg(q->red_parms,
&slot->vars,
slot->backlog);
- slot->backlog += qdisc_pkt_len(skb);
+ WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb));
sfq_inc(q, x);
if (slot->qlen == 1) { /* The flow is new */
if (q->tail == NULL) { /* It is the first flow */
@@ -591,7 +593,7 @@ drop:
q->tail->next = x;
}
q->tail = slot;
- slot->allot = q->quantum;
+ WRITE_ONCE(slot->allot, q->quantum);
}
}
sch->q.qlen -= dropped;
@@ -905,16 +907,16 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct gnet_dump *d)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- sfq_index idx = q->ht[cl - 1];
+ sfq_index idx = READ_ONCE(q->ht[cl - 1]);
struct gnet_stats_queue qs = { 0 };
struct tc_sfq_xstats xstats = { 0 };
if (idx != SFQ_EMPTY_SLOT) {
const struct sfq_slot *slot = &q->slots[idx];
- xstats.allot = slot->allot;
- qs.qlen = slot->qlen;
- qs.backlog = slot->backlog;
+ xstats.allot = READ_ONCE(slot->allot);
+ qs.qlen = READ_ONCE(slot->qlen);
+ qs.backlog = READ_ONCE(slot->backlog);
}
if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
return -1;
@@ -930,7 +932,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (i = 0; i < q->divisor; i++) {
- if (q->ht[i] == SFQ_EMPTY_SLOT) {
+ if (READ_ONCE(q->ht[i]) == SFQ_EMPTY_SLOT) {
arg->count++;
continue;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1a565095376a..185dbed7de5d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1628,12 +1628,8 @@ static void smc_connect_work(struct work_struct *work)
lock_sock(&smc->sk);
if (rc != 0 || smc->sk.sk_err) {
smc->sk.sk_state = SMC_CLOSED;
- if (rc == -EPIPE || rc == -EAGAIN)
- smc->sk.sk_err = EPIPE;
- else if (rc == -ECONNREFUSED)
- smc->sk.sk_err = ECONNREFUSED;
- else if (signal_pending(current))
- smc->sk.sk_err = -sock_intr_errno(timeo);
+ if (!smc->sk.sk_err)
+ smc->sk.sk_err = (rc == -EAGAIN) ? EPIPE : -rc;
sock_put(&smc->sk); /* passive closing */
goto out;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 798243eabb1f..2590e855f6a5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2317,9 +2317,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
if (copied < 0)
goto splice_requeue;
- if (chunk < rxm->full_len) {
- rxm->offset += len;
- rxm->full_len -= len;
+ if (copied < rxm->full_len) {
+ rxm->offset += copied;
+ rxm->full_len -= copied;
goto splice_requeue;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e2d787ca3e74..1cbf36ea043b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3323,6 +3323,9 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct sk_buff *skb;
int answ = 0;
+ if (sk->sk_type != SOCK_STREAM)
+ return -EOPNOTSUPP;
+
mutex_lock(&u->iolock);
skb = skb_peek(&sk->sk_receive_queue);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index a7967a345827..0783555e2526 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -607,6 +607,8 @@ static void unix_gc(struct work_struct *work)
struct sk_buff_head hitlist;
struct sk_buff *skb;
+ WRITE_ONCE(gc_in_progress, true);
+
spin_lock(&unix_gc_lock);
if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) {
@@ -649,10 +651,8 @@ void unix_schedule_gc(struct user_struct *user)
READ_ONCE(user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
return;
- if (!READ_ONCE(gc_in_progress)) {
- WRITE_ONCE(gc_in_progress, true);
+ if (!READ_ONCE(gc_in_progress))
queue_work(system_dfl_wq, &unix_gc_work);
- }
if (user && READ_ONCE(unix_graph_cyclic_sccs))
flush_work(&unix_gc_work);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 416d533f493d..9b8014516f4f 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -447,7 +447,9 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
u32 len)
{
- if (vvs->buf_used + len > vvs->buf_alloc)
+ u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0);
+
+ if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc)
return false;
vvs->rx_bytes += len;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f334cdef8958..7db9cd433801 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1276,6 +1276,18 @@ static int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
rtnl_unlock();
return -ENODEV;
}
+
+ /*
+ * The first invocation validated the wdev's netns against
+ * the caller via __cfg80211_wdev_from_attrs(). The wiphy
+ * may have moved netns between dumpit invocations (via
+ * NL80211_CMD_SET_WIPHY_NETNS), so re-check here.
+ */
+ if (!net_eq(wiphy_net(wiphy), sock_net(cb->skb->sk))) {
+ rtnl_unlock();
+ return -ENODEV;
+ }
+
*rdev = wiphy_to_rdev(wiphy);
*wdev = NULL;
@@ -13867,6 +13879,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(net))
return PTR_ERR(net);
+ /*
+ * The caller already has CAP_NET_ADMIN over the source netns
+ * (enforced by GENL_UNS_ADMIN_PERM on the genl op). Mirror the
+ * convention used by net/core/rtnetlink.c::rtnl_get_net_ns_capable()
+ * and require CAP_NET_ADMIN over the target netns as well, so that
+ * a caller that is privileged in their own user namespace cannot
+ * push a wiphy into a netns where they have no privilege.
+ */
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return -EPERM;
+ }
+
err = 0;
/* check if anything to do */
@@ -19828,6 +19853,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.cmd = NL80211_CMD_SET_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_pmk,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_CLEAR_SKB),
},
@@ -19835,6 +19861,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.cmd = NL80211_CMD_DEL_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_pmk,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 4c8ea0583f94..d6cd0de64d1f 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.ftms_per_burst = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST])
out->ftm.ftms_per_burst =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]);
if (capa->ftm.max_ftms_per_burst &&
(out->ftm.ftms_per_burst > capa->ftm.max_ftms_per_burst ||
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 887abed25466..5e5786cd9af5 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -646,9 +646,42 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb)
return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL);
}
-static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr)
+static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr)
{
- skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL);
+ struct xsk_addrs *xsk_addr;
+
+ xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
+ if (unlikely(!xsk_addr))
+ return NULL;
+
+ xsk_addr->addrs[0] = addr;
+ skb_shinfo(skb)->destructor_arg = (void *)xsk_addr;
+ return xsk_addr;
+}
+
+static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb)
+{
+ struct xsk_addrs *xsk_addr;
+
+ if (!xsk_skb_destructor_is_addr(skb))
+ return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+
+ xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb));
+ if (likely(xsk_addr))
+ xsk_addr->num_descs = 1;
+ return xsk_addr;
+}
+
+static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr)
+{
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL);
+ return 0;
+ }
+
+ if (unlikely(!__xsk_addrs_alloc(skb, addr)))
+ return -ENOMEM;
+ return 0;
}
static void xsk_inc_num_desc(struct sk_buff *skb)
@@ -685,7 +718,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
spin_lock_irqsave(&pool->cq_prod_lock, flags);
idx = xskq_get_prod(pool->cq);
- if (unlikely(num_descs > 1)) {
+ if (unlikely(!xsk_skb_destructor_is_addr(skb))) {
xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
for (i = 0; i < num_descs; i++) {
@@ -724,14 +757,20 @@ void xsk_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
-static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
- u64 addr)
+static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
+ u64 addr)
{
+ int err;
+
+ err = xsk_skb_destructor_set_addr(skb, addr);
+ if (unlikely(err))
+ return err;
+
skb->dev = xs->dev;
skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
- xsk_skb_destructor_set_addr(skb, addr);
+ return 0;
}
static void xsk_consume_skb(struct sk_buff *skb)
@@ -740,7 +779,7 @@ static void xsk_consume_skb(struct sk_buff *skb)
u32 num_descs = xsk_get_num_desc(skb);
struct xsk_addrs *xsk_addr;
- if (unlikely(num_descs > 1)) {
+ if (unlikely(!xsk_skb_destructor_is_addr(skb))) {
xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
kmem_cache_free(xsk_tx_generic_cache, xsk_addr);
}
@@ -819,28 +858,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
return ERR_PTR(err);
skb_reserve(skb, hr);
-
- xsk_skb_init_misc(skb, xs, desc->addr);
if (desc->options & XDP_TX_METADATA) {
err = xsk_skb_metadata(skb, buffer, desc, pool, hr);
- if (unlikely(err))
+ if (unlikely(err)) {
+ kfree_skb(skb);
return ERR_PTR(err);
+ }
}
} else {
struct xsk_addrs *xsk_addr;
- if (xsk_skb_destructor_is_addr(skb)) {
- xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache,
- GFP_KERNEL);
- if (!xsk_addr)
- return ERR_PTR(-ENOMEM);
-
- xsk_addr->num_descs = 1;
- xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb);
- skb_shinfo(skb)->destructor_arg = (void *)xsk_addr;
- } else {
- xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
- }
+ xsk_addr = xsk_addrs_alloc(skb);
+ if (!xsk_addr)
+ return ERR_PTR(-ENOMEM);
/* in case of -EOVERFLOW that could happen below,
* xsk_consume_skb() will release this node as whole skb
@@ -856,8 +886,11 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
addr = buffer - pool->addrs;
for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
- if (unlikely(i >= MAX_SKB_FRAGS))
+ if (unlikely(i >= MAX_SKB_FRAGS)) {
+ if (!xs->skb)
+ kfree_skb(skb);
return ERR_PTR(-EOVERFLOW);
+ }
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
@@ -914,7 +947,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
if (unlikely(err))
goto free_err;
- xsk_skb_init_misc(skb, xs, desc->addr);
if (desc->options & XDP_TX_METADATA) {
err = xsk_skb_metadata(skb, buffer, desc,
xs->pool, hr);
@@ -927,19 +959,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct page *page;
u8 *vaddr;
- if (xsk_skb_destructor_is_addr(skb)) {
- xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache,
- GFP_KERNEL);
- if (!xsk_addr) {
- err = -ENOMEM;
- goto free_err;
- }
-
- xsk_addr->num_descs = 1;
- xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb);
- skb_shinfo(skb)->destructor_arg = (void *)xsk_addr;
- } else {
- xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
+ xsk_addr = xsk_addrs_alloc(skb);
+ if (!xsk_addr) {
+ err = -ENOMEM;
+ goto free_err;
}
if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
@@ -964,18 +987,28 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
}
}
+ if (!xs->skb) {
+ err = xsk_skb_init_misc(skb, xs, desc->addr);
+ if (unlikely(err))
+ goto free_err;
+ }
xsk_inc_num_desc(skb);
return skb;
free_err:
- if (skb && !skb_shinfo(skb)->nr_frags)
+ if (skb && !xs->skb)
kfree_skb(skb);
if (err == -EOVERFLOW) {
- /* Drop the packet */
- xsk_inc_num_desc(xs->skb);
- xsk_drop_skb(xs->skb);
+ if (xs->skb) {
+ /* Drop the packet */
+ xsk_inc_num_desc(xs->skb);
+ xsk_drop_skb(xs->skb);
+ } else {
+ xsk_cq_cancel_locked(xs->pool, 1);
+ xs->tx->invalid_descs++;
+ }
xskq_cons_release(xs->tx);
} else {
/* Let application retry */
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index cd7bc50872f6..d981cfdd8535 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -175,6 +175,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
if (force_zc && force_copy)
return -EINVAL;
+ if (pool->tx_sw_csum && (netdev->priv_flags & IFF_TX_SKB_NO_LINEAR))
+ return -EOPNOTSUPP;
+
if (xsk_get_pool_from_qid(netdev, queue_id))
return -EBUSY;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index a9652b422f51..cc35c2fcbbe0 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,7 +66,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph = ip_hdr(skb);
int ihl = iph->ihl * 4;
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol)
+ skb_set_inner_transport_header(skb,
+ skb_transport_offset(skb));
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
@@ -167,7 +169,9 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
iph = ipv6_hdr(skb);
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol)
+ skb_set_inner_transport_header(skb,
+ skb_transport_offset(skb));
hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
if (hdr_len < 0)
@@ -276,8 +280,10 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *top_iph;
int flags;
- skb_set_inner_network_header(skb, skb_network_offset(skb));
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol) {
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ }
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
@@ -321,8 +327,10 @@ static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
struct ipv6hdr *top_iph;
int dsfield;
- skb_set_inner_network_header(skb, skb_network_offset(skb));
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol) {
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ }
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1748d374abca..686014d39429 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -818,17 +818,17 @@ int __xfrm_state_delete(struct xfrm_state *x)
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
- hlist_del_rcu(&x->bydst);
- hlist_del_rcu(&x->bysrc);
- if (x->km.seq)
- hlist_del_rcu(&x->byseq);
+ hlist_del_init_rcu(&x->bydst);
+ hlist_del_init_rcu(&x->bysrc);
+ if (!hlist_unhashed(&x->byseq))
+ hlist_del_init_rcu(&x->byseq);
if (!hlist_unhashed(&x->state_cache))
hlist_del_rcu(&x->state_cache);
if (!hlist_unhashed(&x->state_cache_input))
hlist_del_rcu(&x->state_cache_input);
- if (x->id.spi)
- hlist_del_rcu(&x->byspi);
+ if (!hlist_unhashed(&x->byspi))
+ hlist_del_init_rcu(&x->byspi);
net->xfrm.state_num--;
xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d56450f61669..38a90e5ee3d9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3323,6 +3323,7 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping),
[XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
[XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
};
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 85ca4d1ecf9e..82809d5b2478 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -31,6 +31,7 @@ TEST_PROGS = \
hw_stats_l3.sh \
hw_stats_l3_gre.sh \
iou-zcrx.py \
+ ipsec_vxlan.py \
irq.py \
loopback.sh \
nic_timestamp.py \
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
index dd50cb8a7911..8c132ace2b8d 100644
--- a/tools/testing/selftests/drivers/net/hw/config
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -3,6 +3,10 @@ CONFIG_FAIL_FUNCTION=y
CONFIG_FAULT_INJECTION=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FUNCTION_ERROR_INJECTION=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_ESP_OFFLOAD=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
CONFIG_IO_URING=y
CONFIG_IPV6=y
CONFIG_IPV6_GRE=y
@@ -14,3 +18,4 @@ CONFIG_NETKIT=y
CONFIG_NET_SCH_INGRESS=y
CONFIG_UDMABUF=y
CONFIG_VXLAN=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
new file mode 100755
index 000000000000..0740a4d85240
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+"""Traffic test for VXLAN + IPsec crypto-offload."""
+
+import os
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx
+from lib.py import CmdExitFailure, NetDrvEpEnv, cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
+
+# Inner tunnel addresses - TEST-NET-2 (RFC 5737) / doc prefix (RFC 3849)
+INNER_V4_LOCAL = "198.51.100.1"
+INNER_V4_REMOTE = "198.51.100.2"
+INNER_V6_LOCAL = "2001:db8:100::1"
+INNER_V6_REMOTE = "2001:db8:100::2"
+
+# ESP parameters
+SPI_OUT = "0x1000"
+SPI_IN = "0x1001"
+# 128-bit key + 32-bit salt = 20 bytes hex, 128-bit ICV
+ESP_AEAD = "aead 'rfc4106(gcm(aes))' 0x" + "01" * 20 + " 128"
+
+
+def xfrm(args, host=None):
+ """Runs 'ip xfrm' via shell to preserve parentheses in algo names."""
+ cmd(f"ip xfrm {args}", shell=True, host=host)
+
+
+def check_xfrm_offload_support():
+ """Skips if iproute2 lacks xfrm offload support."""
+ out = cmd("ip xfrm state help", fail=False)
+ if "offload" not in out.stdout + out.stderr:
+ raise KsftSkipEx("iproute2 too old, missing xfrm offload")
+
+
+def check_esp_hw_offload(cfg):
+ """Skips if device lacks esp-hw-offload support."""
+ check_xfrm_offload_support()
+ try:
+ feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ except (CmdExitFailure, IndexError) as e:
+ raise KsftSkipEx(f"can't query features: {e}") from e
+ if not feat.get("esp-hw-offload", {}).get("active"):
+ raise KsftSkipEx("Device does not support esp-hw-offload")
+
+
+def get_tx_drops(cfg):
+ """Returns TX dropped counter from the physical device."""
+ stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
+ return stats["stats64"]["tx"]["dropped"]
+
+
+def setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver):
+ """Sets up VXLAN tunnel with IPsec transport-mode crypto-offload."""
+ vxlan_name = f"vx{os.getpid()}"
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ if inner_ipver == "4":
+ inner_local = f"{INNER_V4_LOCAL}/24"
+ inner_remote = f"{INNER_V4_REMOTE}/24"
+ addr_extra = ""
+ else:
+ inner_local = f"{INNER_V6_LOCAL}/64"
+ inner_remote = f"{INNER_V6_REMOTE}/64"
+ addr_extra = " nodad"
+
+ if outer_ipver == "6":
+ vxlan_opts = "udp6zerocsumtx udp6zerocsumrx"
+ else:
+ vxlan_opts = "noudpcsum"
+
+ # VXLAN tunnel - local side
+ ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} "
+ f"local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {vxlan_name}")
+ ip(f"addr add {inner_local} dev {vxlan_name}{addr_extra}")
+ ip(f"link set {vxlan_name} up")
+
+ # VXLAN tunnel - remote side
+ ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} "
+ f"local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {vxlan_name}", host=cfg.remote)
+ ip(f"addr add {inner_remote} dev {vxlan_name}{addr_extra}",
+ host=cfg.remote)
+ ip(f"link set {vxlan_name} up", host=cfg.remote)
+
+ # xfrm state - local outbound SA
+ xfrm(f"state add src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT} "
+ f"{ESP_AEAD} "
+ f"mode transport offload crypto dev {cfg.ifname} dir out")
+ defer(xfrm, f"state del src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT}")
+
+ # xfrm state - local inbound SA
+ xfrm(f"state add src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN} "
+ f"{ESP_AEAD} "
+ f"mode transport offload crypto dev {cfg.ifname} dir in")
+ defer(xfrm, f"state del src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN}")
+
+ # xfrm state - remote outbound SA (mirror, software crypto)
+ xfrm(f"state add src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN} "
+ f"{ESP_AEAD} "
+ f"mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"state del src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN}", host=cfg.remote)
+
+ # xfrm state - remote inbound SA (mirror, software crypto)
+ xfrm(f"state add src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT} "
+ f"{ESP_AEAD} "
+ f"mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"state del src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT}", host=cfg.remote)
+
+ # xfrm policy - local out
+ xfrm(f"policy add src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir out "
+ f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport")
+ defer(xfrm, f"policy del src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir out")
+
+ # xfrm policy - local in
+ xfrm(f"policy add src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir in "
+ f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport")
+ defer(xfrm, f"policy del src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir in")
+
+ # xfrm policy - remote out
+ xfrm(f"policy add src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir out "
+ f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"policy del src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir out", host=cfg.remote)
+
+ # xfrm policy - remote in
+ xfrm(f"policy add src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir in "
+ f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"policy del src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir in", host=cfg.remote)
+
+
+def _vxlan_ipsec_variants():
+ """Generates outer/inner IP version variants."""
+ for outer in ["4", "6"]:
+ for inner in ["4", "6"]:
+ yield KsftNamedVariant(f"outer_v{outer}_inner_v{inner}", outer, inner)
+
+
+@ksft_variants(_vxlan_ipsec_variants())
+def test_vxlan_ipsec_crypto_offload(cfg, outer_ipver, inner_ipver):
+ """Tests VXLAN+IPsec crypto-offload has no TX drops."""
+ cfg.require_ipver(outer_ipver)
+ check_esp_hw_offload(cfg)
+
+ setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver)
+
+ if inner_ipver == "4":
+ inner_local = INNER_V4_LOCAL
+ inner_remote = INNER_V4_REMOTE
+ ping = "ping"
+ else:
+ inner_local = INNER_V6_LOCAL
+ inner_remote = INNER_V6_REMOTE
+ ping = "ping -6"
+
+ cmd(f"{ping} -c 1 -W 2 {inner_remote}")
+
+ drops_before = get_tx_drops(cfg)
+
+ runner = Iperf3Runner(cfg, server_ip=inner_local,
+ client_ip=inner_remote)
+ bw_gbps = runner.measure_bandwidth(reverse=True)
+
+ cfg.wait_hw_stats_settle()
+ drops_after = get_tx_drops(cfg)
+
+ ksft_eq(drops_after - drops_before, 0,
+ comment="TX drops during VXLAN+IPsec")
+ ksft_ge(bw_gbps, 0.1,
+ comment="Minimum 100Mbps over VXLAN+IPsec")
+
+
+def main():
+ """Runs VXLAN+IPsec crypto-offload GSO selftest."""
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([test_vxlan_ipsec_crypto_offload], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index f181fa2d38fc..e24660e5c27f 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -48,7 +48,10 @@ class Iperf3Runner:
Starts the iperf3 client with the configured options.
"""
cmdline = self._build_client(streams, duration, reverse)
- return cmd(cmdline, background=background, host=self.env.remote)
+ kwargs = {"background": background, "host": self.env.remote}
+ if not background:
+ kwargs["timeout"] = duration + 5
+ return cmd(cmdline, **kwargs)
def measure_bandwidth(self, reverse=False):
"""
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index a275ed584026..f3da38c54d27 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -96,6 +96,7 @@ TEST_PROGS := \
srv6_hl2encap_red_l2vpn_test.sh \
srv6_iptunnel_cache.sh \
stress_reuseport_listen.sh \
+ tcp_ecmp_failover.sh \
tcp_fastopen_backup_key.sh \
test_bpf.sh \
test_bridge_backup_port.sh \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 5fea7e7df628..989a5975dcea 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -474,20 +474,24 @@ mptcp_lib_wait_local_port_listen() {
wait_local_port_listen "${@}" "tcp"
}
+# $1: error file, $2: cmd, $3: expected msg, [$4: expected error]
mptcp_lib_check_output() {
local err="${1}"
local cmd="${2}"
local expected="${3}"
+ local exp_error="${4:-0}"
local cmd_ret=0
local out
- if ! out=$(${cmd} 2>"${err}"); then
- cmd_ret=${?}
- fi
+ out=$(${cmd} 2>"${err}") || cmd_ret=1
- if [ ${cmd_ret} -ne 0 ]; then
- mptcp_lib_pr_fail "command execution '${cmd}' stderr"
- cat "${err}"
+ if [ "${cmd_ret}" != "${exp_error}" ]; then
+ mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:"
+ if [ "${exp_error}" = 0 ]; then
+ cat "${err}"
+ else
+ echo "${out}"
+ fi
return 2
elif [ "${out}" = "${expected}" ]; then
return 0
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 123d9d7a0278..04594dfc22b1 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -122,10 +122,12 @@ check()
local cmd="$1"
local expected="$2"
local msg="$3"
+ local exp_error="$4"
local rc=0
mptcp_lib_print_title "$msg"
- mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" ||
+ rc=${?}
if [ ${rc} -eq 2 ]; then
mptcp_lib_result_fail "${msg} # error ${rc}"
ret=${KSFT_FAIL}
@@ -158,13 +160,13 @@ check "show_endpoints" \
"3,10.0.1.3,signal backup")" "dump addrs"
del_endpoint 2
-check "get_endpoint 2" "" "simple del addr"
+check "get_endpoint 2" "" "simple del addr" 1
check "show_endpoints" \
"$(format_endpoints "1,10.0.1.1" \
"3,10.0.1.3,signal backup")" "dump addrs after del"
add_endpoint 10.0.1.3 2>/dev/null
-check "get_endpoint 4" "" "duplicate addr"
+check "get_endpoint 4" "" "duplicate addr" 1
add_endpoint 10.0.1.4 flags signal
check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment"
@@ -173,7 +175,7 @@ for i in $(seq 5 9); do
add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1
done
check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit"
-check "get_endpoint 10" "" "above hard addr limit"
+check "get_endpoint 10" "" "above hard addr limit" 1
del_endpoint 9
for i in $(seq 10 255); do
@@ -192,9 +194,13 @@ check "show_endpoints" \
flush_endpoint
check "show_endpoints" "" "flush addrs"
-add_endpoint 10.0.1.1 flags unknown
-check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
-flush_endpoint
+# "unknown" flag is only supported by pm_nl_ctl
+if ! mptcp_lib_is_ip_mptcp; then
+ add_endpoint 10.0.1.1 flags unknown
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \
+ "ignore unknown flags"
+ flush_endpoint
+fi
set_limits 9 1 2>/dev/null
check "get_limits" "${default_limits}" "rcv addrs above hard limit"
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index b327d3061ed5..3cdd953f6813 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -26,6 +26,7 @@ tests="
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
tunnel_metadata ovs: test extraction of tunnel metadata
+ tunnel_refcount ovs: test tunnel vport reference cleanup
drop_reason drop: test drop reasons are emitted
psample psample: Sampling packets with psample"
@@ -830,6 +831,42 @@ test_tunnel_metadata() {
return 0
}
+test_tunnel_refcount() {
+ sbxname="test_tunnel_refcount"
+ sbx_add "${sbxname}" || return 1
+
+ ovs_sbx "${sbxname}" ip netns add trefns || return 1
+ on_exit "ovs_sbx ${sbxname} ip netns del trefns"
+
+ for tun_type in gre vxlan geneve; do
+ info "testing ${tun_type} tunnel vport refcount"
+
+ ovs_sbx "${sbxname}" ip netns exec trefns \
+ python3 $ovs_base/ovs-dpctl.py \
+ add-dp dp-${tun_type} || return 1
+
+ ovs_sbx "${sbxname}" ip netns exec trefns \
+ python3 $ovs_base/ovs-dpctl.py \
+ add-if --no-lwt -t ${tun_type} \
+ dp-${tun_type} ovs-${tun_type}0 || return 1
+
+ ovs_wait ip -netns trefns link show \
+ ovs-${tun_type}0 >/dev/null 2>&1 || return 1
+
+ info "deleting dp - may hang if reference counting is broken"
+ ovs_sbx "${sbxname}" ip netns exec trefns \
+ python3 $ovs_base/ovs-dpctl.py \
+ del-dp dp-${tun_type} &
+
+ dev_removed() {
+ ! ip -netns trefns link show "$1" >/dev/null 2>&1
+ }
+ ovs_wait dev_removed dp-${tun_type} || return 1
+ ovs_wait dev_removed ovs-${tun_type}0 || return 1
+ done
+ return 0
+}
+
run_test() {
(
tname="$1"
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 848f61fdcee0..bbe35e2718d2 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -11,7 +11,6 @@ import logging
import math
import multiprocessing
import re
-import socket
import struct
import sys
import time
@@ -2069,7 +2068,7 @@ class OvsVport(GenericNetlinkSocket):
elif vport_type == "internal":
return OvsVport.OVS_VPORT_TYPE_INTERNAL
elif vport_type == "gre":
- return OvsVport.OVS_VPORT_TYPE_INTERNAL
+ return OvsVport.OVS_VPORT_TYPE_GRE
elif vport_type == "vxlan":
return OvsVport.OVS_VPORT_TYPE_VXLAN
elif vport_type == "geneve":
@@ -2121,6 +2120,7 @@ class OvsVport(GenericNetlinkSocket):
)
TUNNEL_DEFAULTS = [("geneve", 6081),
+ ("gre", 0),
("vxlan", 4789)]
for tnl in TUNNEL_DEFAULTS:
@@ -2129,9 +2129,13 @@ class OvsVport(GenericNetlinkSocket):
dport = tnl[1]
if not lwt:
+ if tnl[0] == "gre":
+ # GRE tunnels have no options.
+ break
+
vportopt = OvsVport.ovs_vport_msg.vportopts()
vportopt["attrs"].append(
- ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)]
+ ["OVS_TUNNEL_ATTR_DST_PORT", dport]
)
msg["attrs"].append(
["OVS_VPORT_ATTR_OPTIONS", vportopt]
@@ -2145,6 +2149,9 @@ class OvsVport(GenericNetlinkSocket):
geneve_port=dport,
geneve_collect_metadata=True,
geneve_udp_zero_csum6_rx=1)
+ elif tnl[0] == "gre":
+ ipr.link("add", ifname=vport_ifname, kind="gretap",
+ gre_collect_metadata=True)
elif tnl[0] == "vxlan":
ipr.link("add", ifname=vport_ifname, kind=tnl[0],
vxlan_learning=0, vxlan_collect_metadata=1,
@@ -2563,7 +2570,7 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
if vpo:
dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT")
if dpo:
- opts += " tnl-dport:%s" % socket.ntohs(dpo)
+ opts += " tnl-dport:%s" % dpo
print(
" port %d: %s (%s%s)"
% (
@@ -2632,7 +2639,7 @@ def main(argv):
"--ptype",
type=str,
default="netdev",
- choices=["netdev", "internal", "geneve", "vxlan"],
+ choices=["netdev", "internal", "gre", "geneve", "vxlan"],
help="Interface type (default netdev)",
)
addifcmd.add_argument(
@@ -2645,7 +2652,7 @@ def main(argv):
addifcmd.add_argument(
"-l",
"--lwt",
- type=bool,
+ action=argparse.BooleanOptionalAction,
default=True,
help="Use LWT infrastructure instead of vport (default true)."
)
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
index b50dbe45a4d0..c06e3135fbef 100755
--- a/tools/testing/selftests/net/ovpn/test.sh
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -98,10 +98,10 @@ ovpn_run_basic_traffic() {
sleep 0.3
ovpn_cmd_ok "send baseline traffic to peer ${p}" \
ip netns exec ovpn_peer0 \
- ping -qfc 500 -w 3 5.5.5.$((p + 1))
+ ping -qfc 100 -w 3 5.5.5.$((p + 1))
ovpn_cmd_ok "send large-payload traffic to peer ${p}" \
ip netns exec ovpn_peer0 \
- ping -qfc 500 -s 3000 -w 3 5.5.5.$((p + 1))
+ ping -qfc 100 -s 3000 -w 3 5.5.5.$((p + 1))
wait "${tcpdump_pid1}" || return 1
wait "${tcpdump_pid2}" || return 1
diff --git a/tools/testing/selftests/net/tcp_ecmp_failover.sh b/tools/testing/selftests/net/tcp_ecmp_failover.sh
new file mode 100755
index 000000000000..5768aa8bff6a
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ecmp_failover.sh
@@ -0,0 +1,216 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2026 Google LLC.
+#
+# This test verifies TCP flow failover between ECMP routes
+# upon carrier loss on the active device.
+#
+# socat -----------------------------> socat
+# |
+# .-- veth-c1 -|- veth-s1 --.
+# dummy0 -| | |-- dummy0
+# '-- veth-c2 -|- veth-s2 --'
+# |
+#
+
+REQUIRE_JQ=no
+REQUIRE_MZ=no
+NUM_NETIFS=0
+
+source forwarding/lib.sh
+
+CLIENT_IP="10.0.59.1"
+SERVER_IP="10.0.92.1"
+CLIENT_IP6="2001:db8:5a9a::1"
+SERVER_IP6="2001:db8:9292::1"
+
+setup_server()
+{
+ IP="ip -n $server"
+ NS_EXEC="ip netns exec $server"
+
+ $IP link add dummy0 type dummy
+ $IP link set dummy0 up
+
+ $IP -4 addr add $SERVER_IP/32 dev dummy0
+ $IP -6 addr add $SERVER_IP6/128 dev dummy0 nodad
+
+ $IP link set veth-s1 up
+ $IP link set veth-s2 up
+
+ $IP -4 addr add 192.168.1.2/24 dev veth-s1
+ $IP -4 addr add 192.168.2.2/24 dev veth-s2
+
+ $IP -4 route add $CLIENT_IP/32 \
+ nexthop via 192.168.1.1 dev veth-s1 weight 1 \
+ nexthop via 192.168.2.1 dev veth-s2 weight 1
+
+ $IP -6 addr add 2001:db8:1::2/64 dev veth-s1 nodad
+ $IP -6 addr add 2001:db8:2::2/64 dev veth-s2 nodad
+
+ $IP -6 route add $CLIENT_IP6/128 \
+ nexthop via 2001:db8:1::1 dev veth-s1 weight 1 \
+ nexthop via 2001:db8:2::1 dev veth-s2 weight 1
+}
+
+setup_client()
+{
+ IP="ip -n $client"
+ NS_EXEC="ip netns exec $client"
+
+ $IP link add dummy0 type dummy
+ $IP link set dummy0 up
+
+ $IP -4 addr add $CLIENT_IP/32 dev dummy0
+ $IP -6 addr add $CLIENT_IP6/128 dev dummy0 nodad
+
+ $IP link set veth-c1 up
+ $IP link set veth-c2 up
+
+ $IP -4 addr add 192.168.1.1/24 dev veth-c1
+ $IP -4 addr add 192.168.2.1/24 dev veth-c2
+
+ $IP -4 route add $SERVER_IP/32 \
+ nexthop via 192.168.1.2 dev veth-c1 weight 1 \
+ nexthop via 192.168.2.2 dev veth-c2 weight 1
+
+ $IP -6 addr add 2001:db8:1::1/64 dev veth-c1 nodad
+ $IP -6 addr add 2001:db8:2::1/64 dev veth-c2 nodad
+
+ $IP -6 route add $SERVER_IP6/128 \
+ nexthop via 2001:db8:1::2 dev veth-c1 weight 1 \
+ nexthop via 2001:db8:2::2 dev veth-c2 weight 1
+
+ # By default, tcp_retries1=3 triggers a route refresh
+ # after 3 retransmits (~5s). Ensure this never occurs
+ # for test stability.
+ $NS_EXEC sysctl -qw net.ipv4.tcp_retries1=100
+
+ # When NETDEV_CHANGE is issued for a dev tied to an ECMP
+ # route, RTNH_F_LINKDOWN is flagged and the sernum is
+ # bumped to invalidate the route via sk_dst_check().
+ #
+ # Without ignore_routes_with_linkdown=1, subsequent
+ # lookups may still select the same RTNH_F_LINKDOWN route.
+ $NS_EXEC sysctl -qw net.ipv4.conf.veth-c1.ignore_routes_with_linkdown=1
+ $NS_EXEC sysctl -qw net.ipv4.conf.veth-c2.ignore_routes_with_linkdown=1
+
+ $NS_EXEC sysctl -qw net.ipv6.conf.veth-c1.ignore_routes_with_linkdown=1
+ $NS_EXEC sysctl -qw net.ipv6.conf.veth-c2.ignore_routes_with_linkdown=1
+}
+
+setup()
+{
+ setup_ns client server
+
+ ip -n "$client" link add veth-c1 type veth peer veth-s1 netns "$server"
+ ip -n "$client" link add veth-c2 type veth peer veth-s2 netns "$server"
+
+ setup_server
+ setup_client
+}
+
+cleanup()
+{
+ cleanup_all_ns > /dev/null 2>&1
+}
+
+tcp_ecmp_failover()
+{
+ local pf=$1; shift
+ local server_ip=$1; shift
+ local client_ip=$1; shift
+
+ RET=0
+
+ tcpdump_start veth-s1 "$server"
+ tcpdump_start veth-s2 "$server"
+
+ ip netns exec "$server" \
+ socat -u TCP-LISTEN:8080,pf="$pf",bind="$server_ip",reuseaddr /dev/null &
+ server_pid=$!
+
+ # Wait for server to start listening.
+ # Sometimes client fails without this sleep.
+ sleep 1
+
+ ip netns exec "$client" \
+ socat -u /dev/zero TCP:"$server_ip":8080,pf="$pf",bind="$client_ip" &
+ client_pid=$!
+
+ # To capture enough packets.
+ sleep 3
+
+ tcpdump_stop veth-s1
+ tcpdump_stop veth-s2
+
+ pkts_s1=$(tcpdump_show veth-s1 | wc -l)
+ pkts_s2=$(tcpdump_show veth-s2 | wc -l)
+
+ tcpdump_cleanup veth-s1
+ tcpdump_cleanup veth-s2
+
+ # Detect the device chosen by the client
+ if [ "$pkts_s1" -gt "$pkts_s2" ]; then
+ veth_down=veth-s1
+ veth_up=veth-s2
+ else
+ veth_down=veth-s2
+ veth_up=veth-s1
+ fi
+
+ # Taking down $veth_down causes its peer to lose carrier,
+ # triggering NETDEV_CHANGE. This flags RTNH_F_LINKDOWN
+ # and bumps the sernum for the route associated with that
+ # peer, invalidating the cached dst in the TCP socket.
+ #
+ # Consequently, sk_dst_check() fails, forcing the subsequent
+ # lookup to select the remaining healthy route via $veth_up.
+ ip -n "$server" link set "$veth_down" down
+
+ tcpdump_start "$veth_up" "$server"
+
+ # To capture enough packets.
+ sleep 3
+
+ tcpdump_stop "$veth_up"
+
+ kill -9 "$client_pid" > /dev/null 2>&1
+ kill -9 "$server_pid" > /dev/null 2>&1
+ wait 2> /dev/null
+
+ pkts=$(tcpdump_show $veth_up | wc -l)
+
+ tcpdump_cleanup "$veth_up"
+
+ if [ "$pkts" -lt 1000 ]; then
+ RET=$ksft_fail
+ fi
+}
+
+test_ipv4()
+{
+ setup
+ tcp_ecmp_failover IPv4 $SERVER_IP $CLIENT_IP
+ log_test "TCP IPv4 failover"
+ cleanup
+}
+
+test_ipv6()
+{
+ setup
+ tcp_ecmp_failover IPv6 "[$SERVER_IP6]" "[$CLIENT_IP6]"
+ log_test "TCP IPv6 failover"
+ cleanup
+}
+
+require_command socat
+require_command tcpdump
+
+trap cleanup EXIT
+
+test_ipv4
+test_ipv6
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 9e2ccea13d70..30a236b8e9f7 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -946,6 +946,49 @@ TEST_F(tls, peek_and_splice)
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
+TEST_F(tls, splice_to_pipe_small)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ size_t total = 0;
+ int p[2];
+
+ memrnd(mem_send, sizeof(mem_send));
+
+ ASSERT_GE(pipe(p), 0);
+
+ /* Shrink pipe to 1 page (typically 4096 bytes) to force multiple
+ * splice iterations for a 16384-byte TLS record.
+ */
+ EXPECT_GE(fcntl(p[1], F_SETPIPE_SZ, 4096), 4096);
+
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+ while (total < (size_t)send_len) {
+ ssize_t spliced, drained;
+
+ spliced = splice(self->cfd, NULL, p[1], NULL,
+ send_len - total, 0);
+ EXPECT_GT(spliced, 0);
+ if (spliced <= 0)
+ break;
+
+ drained = read(p[0], mem_recv + total, spliced);
+ EXPECT_EQ(drained, spliced);
+ if (drained <= 0)
+ break;
+
+ total += drained;
+ }
+
+ EXPECT_EQ(total, (size_t)send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+
+ close(p[0]);
+ close(p[1]);
+}
+
#define MAX_FRAGS 48
TEST_F(tls, splice_short)
{
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index eefadd0546d3..b1f856cf62c1 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1136,5 +1136,153 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "7a5f",
+ "name": "Force red to dequeue from its child's gso_skb with qfq leaf",
+ "category": [
+ "qdisc",
+ "tbf",
+ "red",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16",
+ "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq",
+ "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1",
+ "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:",
+ "matchJSON": [
+ {
+ "kind": "red",
+ "handle": "2:",
+ "bytes": 98,
+ "packets": 1,
+ "backlog": 0,
+ "qlen": 0
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "cdae",
+ "name": "Force sfb to dequeue from its child's gso_skb with qfq leaf",
+ "category": [
+ "qdisc",
+ "tbf",
+ "sfb",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb",
+ "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq",
+ "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1",
+ "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:",
+ "matchJSON": [
+ {
+ "kind": "sfb",
+ "handle": "2:",
+ "bytes": 98,
+ "packets": 1,
+ "backlog": 0,
+ "qlen": 0
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "291d",
+ "name": "Force red to dequeue from its child's gso_skb with dualpi2 leaf",
+ "category": [
+ "qdisc",
+ "tbf",
+ "red",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16",
+ "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:",
+ "matchJSON": [
+ {
+ "kind": "red",
+ "handle": "2:",
+ "bytes": 98,
+ "packets": 1,
+ "backlog": 0,
+ "qlen": 0
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9c6d",
+ "name": "Force sfb to dequeue from its child's gso_skb with dualpi2 leaf",
+ "category": [
+ "qdisc",
+ "tbf",
+ "sfb",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb",
+ "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:",
+ "matchJSON": [
+ {
+ "kind": "sfb",
+ "handle": "2:",
+ "bytes": 98,
+ "packets": 1,
+ "backlog": 0,
+ "qlen": 0
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
}
]