summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml35
-rw-r--r--Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml3
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/boot/dts/gemini/gemini-sl93512r.dts2
-rw-r--r--arch/arm/boot/dts/gemini/gemini-sq201.dts2
-rw-r--r--arch/arm/boot/dts/microchip/sam9x7.dtsi6
-rw-r--r--arch/arm/mach-socfpga/platsmp.c1
-rw-r--r--arch/arm64/boot/dts/qcom/eliza.dtsi6
-rw-r--r--arch/arm64/boot/dts/qcom/glymur.dtsi20
-rw-r--r--arch/arm64/boot/dts/qcom/milos.dtsi6
-rw-r--r--arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi7
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/kvm_nested.h4
-rw-r--r--arch/arm64/kvm/at.c6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c4
-rw-r--r--arch/arm64/kvm/nested.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c6
-rw-r--r--arch/s390/configs/debug_defconfig4
-rw-r--r--arch/s390/configs/defconfig4
-rw-r--r--arch/s390/include/asm/bug.h12
-rw-r--r--arch/s390/include/asm/gmap_helpers.h1
-rw-r--r--arch/s390/include/asm/linkage.h2
-rw-r--r--arch/s390/kvm/faultin.c30
-rw-r--r--arch/s390/kvm/gaccess.c11
-rw-r--r--arch/s390/kvm/gmap.c19
-rw-r--r--arch/s390/kvm/gmap.h3
-rw-r--r--arch/s390/kvm/kvm-s390.c33
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/kvm/pv.c21
-rw-r--r--arch/s390/mm/gmap_helpers.c128
-rw-r--r--arch/x86/kvm/svm/sev.c238
-rw-r--r--arch/x86/kvm/svm/svm.h9
-rw-r--r--drivers/auxdisplay/Kconfig2
-rw-r--r--drivers/auxdisplay/line-display.c2
-rw-r--r--drivers/auxdisplay/max6959.c5
-rw-r--r--drivers/firmware/samsung/exynos-acpm-dvfs.c3
-rw-r--r--drivers/firmware/samsung/exynos-acpm.c141
-rw-r--r--drivers/md/dm-cache-policy-smq.c12
-rw-r--r--drivers/memory/atmel-ebi.c3
-rw-r--r--drivers/mmc/core/mmc.c4
-rw-r--r--drivers/mmc/host/dw_mmc-rockchip.c17
-rw-r--r--drivers/mmc/host/litex_mmc.c20
-rw-r--r--drivers/mmc/host/renesas_sdhi_internal_dmac.c1
-rw-r--r--drivers/mmc/host/sdhci-msm.c10
-rw-r--r--drivers/mmc/host/sdhci-of-dwcmshc.c44
-rw-r--r--drivers/mmc/host/sdhci.c1
-rw-r--r--drivers/net/bonding/bond_3ad.c18
-rw-r--r--drivers/net/bonding/bond_main.c4
-rw-r--r--drivers/net/bonding/bond_netlink.c4
-rw-r--r--drivers/net/bonding/bond_procfs.c8
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c2
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c36
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c36
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c6
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c32
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.h1
-rw-r--r--drivers/net/ethernet/realtek/rtase/rtase_main.c7
-rw-r--r--drivers/net/geneve.c2
-rw-r--r--drivers/net/phy/sfp.c1
-rw-r--r--drivers/net/vxlan/vxlan_vnifilter.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/ap.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/mac80211.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/power.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c26
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c56
-rw-r--r--drivers/ptp/ptp_vclock.c14
-rw-r--r--drivers/soc/imx/soc-imx8m.c2
-rw-r--r--drivers/soc/qcom/ice.c66
-rw-r--r--drivers/tee/optee/supp.c107
-rw-r--r--drivers/tee/qcomtee/core.c6
-rw-r--r--drivers/tee/tee_core.c56
-rw-r--r--drivers/tee/tee_shm.c2
-rw-r--r--drivers/ufs/host/ufs-qcom.c10
-rw-r--r--fs/erofs/zdata.c6
-rw-r--r--fs/erofs/zmap.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/smb/server/oplock.c15
-rw-r--r--fs/smb/server/smb2pdu.c11
-rw-r--r--fs/smb/server/vfs_cache.c12
-rw-r--r--fs/xfs/scrub/cow_repair.c12
-rw-r--r--fs/xfs/xfs_ioctl.c47
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_pnfs.c11
-rw-r--r--fs/xfs/xfs_reflink.c14
-rw-r--r--fs/xfs/xfs_zone_gc.c2
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/mm.h8
-rw-r--r--include/linux/tracepoint.h8
-rw-r--r--include/net/act_api.h1
-rw-r--r--include/net/bluetooth/l2cap.h1
-rw-r--r--include/net/ip_vs.h3
-rw-r--r--include/net/mptcp.h7
-rw-r--r--include/net/tc_act/tc_pedit.h1
-rw-r--r--include/uapi/linux/tee.h1
-rw-r--r--io_uring/net.c3
-rw-r--r--kernel/cgroup/cpuset.c13
-rw-r--r--kernel/sched/ext.c10
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--mm/cma.c7
-rw-r--r--mm/cma_debug.c3
-rw-r--r--mm/damon/ops-common.c4
-rw-r--r--mm/huge_memory.c4
-rw-r--r--mm/hugetlb.c69
-rw-r--r--mm/hugetlb_vmemmap.c36
-rw-r--r--mm/memcontrol.c5
-rw-r--r--mm/memory-failure.c19
-rw-r--r--mm/userfaultfd.c92
-rw-r--r--net/6lowpan/iphc.c4
-rw-r--r--net/802/garp.c2
-rw-r--r--net/802/mrp.c9
-rw-r--r--net/appletalk/aarp.c2
-rw-r--r--net/bluetooth/bnep/core.c57
-rw-r--r--net/bluetooth/hci_sync.c5
-rw-r--r--net/bluetooth/hci_sysfs.c6
-rw-r--r--net/bluetooth/iso.c63
-rw-r--r--net/bluetooth/l2cap_core.c46
-rw-r--r--net/bluetooth/mgmt.c17
-rw-r--r--net/bluetooth/rfcomm/core.c67
-rw-r--r--net/bluetooth/rfcomm/sock.c26
-rw-r--r--net/bluetooth/sco.c20
-rw-r--r--net/bridge/netfilter/ebt_snat.c3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/devlink/core.c2
-rw-r--r--net/hsr/hsr_framereg.c4
-rw-r--r--net/ieee802154/6lowpan/tx.c5
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv6/anycast.c16
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c3
-rw-r--r--net/l2tp/l2tp_ppp.c82
-rw-r--r--net/mac80211/tx.c4
-rw-r--r--net/mptcp/options.c79
-rw-r--r--net/mptcp/pm.c15
-rw-r--r--net/mptcp/pm_userspace.c14
-rw-r--r--net/mptcp/protocol.c10
-rw-r--r--net/mptcp/protocol.h7
-rw-r--r--net/mptcp/sockopt.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c14
-rw-r--r--net/netfilter/nf_conntrack_irc.c4
-rw-r--r--net/netfilter/nf_synproxy_core.c24
-rw-r--r--net/netfilter/nft_byteorder.c51
-rw-r--r--net/netfilter/nft_ct.c8
-rw-r--r--net/netfilter/nft_ct_fast.c2
-rw-r--r--net/netfilter/nft_tunnel.c2
-rw-r--r--net/netfilter/xt_NFQUEUE.c2
-rw-r--r--net/rds/ib_cm.c1
-rw-r--r--net/sched/act_api.c7
-rw-r--r--net/sched/act_pedit.c77
-rw-r--r--net/sctp/diag.c17
-rw-r--r--net/sctp/sm_make_chunk.c5
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/unix/af_unix.c11
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--net/wireless/nl80211.c9
-rw-r--r--net/wireless/scan.c9
-rw-r--r--net/xdp/xsk.c11
-rw-r--r--scripts/kconfig/tests/err_repeated_inc/expected_stderr4
-rw-r--r--tools/sched_ext/scx_show_state.py19
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh10
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc2
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c54
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh4
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifilter_notify.sh184
-rw-r--r--virt/kvm/kvm_main.c3
176 files changed, 2070 insertions, 1025 deletions
diff --git a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml
index 876bf90ed96e..ccb6b8dd8e11 100644
--- a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml
+++ b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml
@@ -30,6 +30,16 @@ properties:
maxItems: 1
clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: core
+ - const: iface
+
+ power-domains:
maxItems: 1
operating-points-v2: true
@@ -44,6 +54,25 @@ required:
additionalProperties: false
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,eliza-inline-crypto-engine
+ - qcom,milos-inline-crypto-engine
+
+ then:
+ required:
+ - power-domains
+ - clock-names
+ properties:
+ clocks:
+ minItems: 2
+ clock-names:
+ minItems: 2
+
examples:
- |
#include <dt-bindings/clock/qcom,sm8550-gcc.h>
@@ -52,7 +81,11 @@ examples:
compatible = "qcom,sm8550-inline-crypto-engine",
"qcom,inline-crypto-engine";
reg = <0x01d88000 0x8000>;
- clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>;
+ clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>,
+ <&gcc GCC_UFS_PHY_AHB_CLK>;
+ clock-names = "core",
+ "iface";
+ power-domains = <&gcc UFS_PHY_GDSC>;
operating-points-v2 = <&ice_opp_table>;
diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
index b66ae6300faf..65882ff79d8d 100644
--- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
+++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
@@ -84,7 +84,8 @@ properties:
This reference is provided for background information only.
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
- - items:
+ - minItems: 4
+ items:
- description: Phandle to HSP(High-Speed Peripheral) device
- description: Offset of phy control register for internal
or external clock selection
diff --git a/MAINTAINERS b/MAINTAINERS
index 9ec290e38b44..e035a3be797c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6535,7 +6535,7 @@ F: include/linux/blk-cgroup.h
CONTROL GROUP - CPUSET
M: Waiman Long <longman@redhat.com>
-R: Chen Ridong <chenridong@huaweicloud.com>
+R: Ridong Chen <ridong.chen@linux.dev>
L: cgroups@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
diff --git a/arch/arm/boot/dts/gemini/gemini-sl93512r.dts b/arch/arm/boot/dts/gemini/gemini-sl93512r.dts
index 4992ec276de9..341dec9b636a 100644
--- a/arch/arm/boot/dts/gemini/gemini-sl93512r.dts
+++ b/arch/arm/boot/dts/gemini/gemini-sl93512r.dts
@@ -146,7 +146,7 @@
partitions {
compatible = "redboot-fis";
/* Eraseblock at 0xfe0000 */
- fis-index-block = <0x1fc>;
+ fis-index-block = <0x7f>;
};
};
diff --git a/arch/arm/boot/dts/gemini/gemini-sq201.dts b/arch/arm/boot/dts/gemini/gemini-sq201.dts
index f8c6f6e5cdea..bfd1e8581ad6 100644
--- a/arch/arm/boot/dts/gemini/gemini-sq201.dts
+++ b/arch/arm/boot/dts/gemini/gemini-sq201.dts
@@ -134,7 +134,7 @@
partitions {
compatible = "redboot-fis";
/* Eraseblock at 0xfe0000 */
- fis-index-block = <0x1fc>;
+ fis-index-block = <0x7f>;
};
};
diff --git a/arch/arm/boot/dts/microchip/sam9x7.dtsi b/arch/arm/boot/dts/microchip/sam9x7.dtsi
index d242d7a934d0..c680a5033b6b 100644
--- a/arch/arm/boot/dts/microchip/sam9x7.dtsi
+++ b/arch/arm/boot/dts/microchip/sam9x7.dtsi
@@ -990,9 +990,9 @@
<62 IRQ_TYPE_LEVEL_HIGH 3>, /* Queue 3 */
<63 IRQ_TYPE_LEVEL_HIGH 3>, /* Queue 4 */
<64 IRQ_TYPE_LEVEL_HIGH 3>; /* Queue 5 */
- clocks = <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_GCK 24>, <&pmc PMC_TYPE_GCK 67>;
- clock-names = "hclk", "pclk", "tx_clk", "tsu_clk";
- assigned-clocks = <&pmc PMC_TYPE_GCK 67>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_GCK 24>;
+ clock-names = "hclk", "pclk", "tsu_clk";
+ assigned-clocks = <&pmc PMC_TYPE_GCK 24>;
assigned-clock-rates = <266666666>;
status = "disabled";
};
diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c
index 201191cf68f3..349e6c54518e 100644
--- a/arch/arm/mach-socfpga/platsmp.c
+++ b/arch/arm/mach-socfpga/platsmp.c
@@ -78,6 +78,7 @@ static void __init socfpga_smp_prepare_cpus(unsigned int max_cpus)
}
socfpga_scu_base_addr = of_iomap(np, 0);
+ of_node_put(np);
if (!socfpga_scu_base_addr)
return;
scu_enable(socfpga_scu_base_addr);
diff --git a/arch/arm64/boot/dts/qcom/eliza.dtsi b/arch/arm64/boot/dts/qcom/eliza.dtsi
index 4a7a0ac40ce6..7e97361a5dc5 100644
--- a/arch/arm64/boot/dts/qcom/eliza.dtsi
+++ b/arch/arm64/boot/dts/qcom/eliza.dtsi
@@ -843,7 +843,11 @@
"qcom,inline-crypto-engine";
reg = <0x0 0x01d88000 0x0 0x18000>;
- clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>;
+ clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>,
+ <&gcc GCC_UFS_PHY_AHB_CLK>;
+ clock-names = "core",
+ "iface";
+ power-domains = <&gcc GCC_UFS_PHY_GDSC>;
};
tcsr_mutex: hwlock@1f40000 {
diff --git a/arch/arm64/boot/dts/qcom/glymur.dtsi b/arch/arm64/boot/dts/qcom/glymur.dtsi
index f23cf81ddb77..82436984485d 100644
--- a/arch/arm64/boot/dts/qcom/glymur.dtsi
+++ b/arch/arm64/boot/dts/qcom/glymur.dtsi
@@ -2314,11 +2314,9 @@
clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>,
<&tcsr TCSR_USB3_0_CLKREF_EN>,
- <&rpmhcc RPMH_CXO_CLK>,
<&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>,
<&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>;
clock-names = "aux",
- "clkref",
"ref",
"com_aux",
"pipe";
@@ -2343,11 +2341,9 @@
clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>,
<&tcsr TCSR_USB3_1_CLKREF_EN>,
- <&rpmhcc RPMH_CXO_CLK>,
<&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>,
<&gcc GCC_USB3_MP_PHY_PIPE_1_CLK>;
clock-names = "aux",
- "clkref",
"ref",
"com_aux",
"pipe";
@@ -2482,15 +2478,13 @@
reg = <0x0 0x00fde000 0x0 0x8000>;
clocks = <&gcc GCC_USB3_SEC_PHY_AUX_CLK>,
- <&rpmhcc RPMH_CXO_CLK>,
+ <&tcsr TCSR_USB4_1_CLKREF_EN>,
<&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>,
- <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>,
- <&tcsr TCSR_USB4_1_CLKREF_EN>;
+ <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>;
clock-names = "aux",
"ref",
"com_aux",
- "usb3_pipe",
- "clkref";
+ "usb3_pipe";
power-domains = <&gcc GCC_USB_1_PHY_GDSC>;
@@ -3750,15 +3744,13 @@
reg = <0x0 0x088e1000 0x0 0x8000>;
clocks = <&gcc GCC_USB3_TERT_PHY_AUX_CLK>,
- <&rpmhcc RPMH_CXO_CLK>,
+ <&tcsr TCSR_USB4_2_CLKREF_EN>,
<&gcc GCC_USB3_TERT_PHY_COM_AUX_CLK>,
- <&gcc GCC_USB3_TERT_PHY_PIPE_CLK>,
- <&tcsr TCSR_USB4_2_CLKREF_EN>;
+ <&gcc GCC_USB3_TERT_PHY_PIPE_CLK>;
clock-names = "aux",
"ref",
"com_aux",
- "usb3_pipe",
- "clkref";
+ "usb3_pipe";
power-domains = <&gcc GCC_USB_2_PHY_GDSC>;
diff --git a/arch/arm64/boot/dts/qcom/milos.dtsi b/arch/arm64/boot/dts/qcom/milos.dtsi
index 4a64a98a434b..a6e463f3885d 100644
--- a/arch/arm64/boot/dts/qcom/milos.dtsi
+++ b/arch/arm64/boot/dts/qcom/milos.dtsi
@@ -1275,7 +1275,11 @@
"qcom,inline-crypto-engine";
reg = <0x0 0x01d88000 0x0 0x18000>;
- clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>;
+ clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>,
+ <&gcc GCC_UFS_PHY_AHB_CLK>;
+ clock-names = "core",
+ "iface";
+ power-domains = <&gcc UFS_PHY_GDSC>;
};
tcsr_mutex: hwlock@1f40000 {
diff --git a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi
index 0d9a324cc6cc..db291730130c 100644
--- a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi
@@ -982,12 +982,6 @@
status = "okay";
};
-&i2c20 {
- clock-frequency = <400000>;
-
- status = "okay";
-};
-
&lpass_tlmm {
spkr_01_sd_n_active: spkr-01-sd-n-active-state {
pins = "gpio12";
@@ -1308,6 +1302,7 @@
&tlmm {
gpio-reserved-ranges = <44 4>, /* SPI11 (TPM) */
<76 4>, /* SPI19 (TZ Protected) */
+ <80 2>, /* I2C20 (Battery SMBus) */
<238 1>; /* UFS Reset */
cam_rgb_default: cam-rgb-default-state {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d905a0777f93..96ce783f24e7 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -260,6 +260,7 @@ CONFIG_PCI_ENDPOINT=y
CONFIG_PCI_ENDPOINT_CONFIGFS=y
CONFIG_PCI_EPF_TEST=m
CONFIG_PCI_PWRCTRL_GENERIC=m
+CONFIG_POWER_SEQUENCING_PCIE_M2=m
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_FW_LOADER_USER_HELPER=y
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index dc2957662ff2..cdf3e8422ea1 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -132,7 +132,7 @@ static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *t
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
- xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+ xn &= 0b10;
switch (xn) {
case 0b00:
@@ -148,7 +148,7 @@ static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *t
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
- xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+ xn &= 0b10;
switch (xn) {
case 0b00:
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 9f8f0ae8e86e..889c2c15d7bd 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1569,7 +1569,8 @@ int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
/* Do the stage-2 translation */
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
out.esr = 0;
- ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+ scoped_guard(srcu, &vcpu->kvm->srcu)
+ ret = kvm_walk_nested_s2(vcpu, ipa, &out);
if (ret < 0)
return ret;
@@ -1665,7 +1666,8 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
}
/* Walk the guest's PT, looking for a match along the way */
- ret = walk_s1(vcpu, &wi, &wr, va);
+ scoped_guard(srcu, &vcpu->kvm->srcu)
+ ret = walk_s1(vcpu, &wi, &wr, va);
switch (ret) {
case -EINTR:
/* We interrupted the walk on a match, return the level */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 320cd45d49c5..e9b36a3b27bb 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -181,6 +181,8 @@ static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
val |= CPACR_EL1_ZEN;
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN;
+ if (cpus_have_final_cap(ARM64_HAS_S1POE))
+ val |= CPACR_EL1_E0POE;
write_sysreg(val, cpacr_el1);
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 0c1defa5fb0f..91a7dfad6686 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -925,7 +925,9 @@ static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
static bool stage2_pte_executable(kvm_pte_t pte)
{
- return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+ enum kvm_pgtable_prot prot = kvm_pgtable_stage2_pte_prot(pte);
+
+ return prot & (KVM_PGTABLE_PROT_UX | KVM_PGTABLE_PROT_PX);
}
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 38f672e94087..6f7bc9a9992e 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -89,21 +89,28 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
* again, and there is no reason to affect the whole VM for this.
*/
num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU;
- tmp = kvrealloc(kvm->arch.nested_mmus,
- size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus),
- GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!tmp)
- return -ENOMEM;
- swap(kvm->arch.nested_mmus, tmp);
+ if (num_mmus > kvm->arch.nested_mmus_size) {
+ tmp = kvcalloc(num_mmus, sizeof(*tmp), GFP_KERNEL_ACCOUNT);
+ if (!tmp)
+ return -ENOMEM;
- /*
- * If we went through a realocation, adjust the MMU back-pointers in
- * the previously initialised kvm_pgtable structures.
- */
- if (kvm->arch.nested_mmus != tmp)
- for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
- kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i];
+ write_lock(&kvm->mmu_lock);
+
+ if (kvm->arch.nested_mmus_size) {
+ memcpy(tmp, kvm->arch.nested_mmus,
+ size_mul(sizeof(*tmp), kvm->arch.nested_mmus_size));
+
+ for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
+ tmp[i].pgt->mmu = &tmp[i];
+ }
+
+ swap(kvm->arch.nested_mmus, tmp);
+
+ write_unlock(&kvm->mmu_lock);
+
+ kvfree(tmp);
+ }
for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 1d7e5d560af4..1e3706ac3b8e 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -597,8 +597,10 @@ static void vgic_its_invalidate_cache(struct vgic_its *its)
unsigned long idx;
xa_for_each(&its->translation_cache, idx, irq) {
- xa_erase(&its->translation_cache, idx);
- vgic_put_irq(kvm, irq);
+ /* Only the context that erases the entry drops its cache ref. */
+ irq = xa_erase(&its->translation_cache, idx);
+ if (irq)
+ vgic_put_irq(kvm, irq);
}
}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index c28f9a7d0bd8..730c90b4a876 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -56,6 +56,10 @@ CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_IOMMUFD_DRIVER=y
+CONFIG_IOMMUFD_DRIVER_CORE=y
+CONFIG_IOMMUFD=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index d89c988f33ea..dd5fc1426c88 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -54,6 +54,10 @@ CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
+CONFIG_VFIO_DEVICE_CDEV=y
+CONFIG_IOMMUFD_DRIVER=y
+CONFIG_IOMMUFD_DRIVER_CORE=y
+CONFIG_IOMMUFD=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 59017fd3d935..50a270edb020 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -12,12 +12,11 @@
#if defined(CONFIG_BUG) && defined(CONFIG_CC_HAS_ASM_IMMEDIATE_STRINGS)
#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define __BUG_ENTRY_VERBOSE(format, file, line) \
- " .long " format " - . # bug_entry::format\n" \
+#define __BUG_ENTRY_VERBOSE(file, line) \
" .long " file " - . # bug_entry::file\n" \
" .short " line " # bug_entry::line\n"
#else
-#define __BUG_ENTRY_VERBOSE(format, file, line)
+#define __BUG_ENTRY_VERBOSE(file, line)
#endif
#ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED
@@ -28,9 +27,10 @@
#define __BUG_ENTRY(format, file, line, flags, size) \
" .section __bug_table,\"aw\"\n" \
- "1: .long 0b - . # bug_entry::bug_addr\n" \
- __BUG_ENTRY_VERBOSE(format, file, line) \
- " .short "flags" # bug_entry::flags\n" \
+ "1: .long 0b - . # bug_entry::bug_addr\n"\
+ " .long " format " - . # bug_entry::format\n" \
+ __BUG_ENTRY_VERBOSE(file, line) \
+ " .short "flags" # bug_entry::flags\n" \
" .org 1b+"size"\n" \
" .previous"
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
index 2d3ae421077e..d2b616604a46 100644
--- a/arch/s390/include/asm/gmap_helpers.h
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -12,5 +12,6 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
int gmap_helper_disable_cow_sharing(void);
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr);
+pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl);
#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index df3fb7d8227b..1b3ac553a642 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -7,4 +7,6 @@
#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x07
#define __ALIGN_STR __stringify(__ALIGN)
+#define _THIS_IP_ ({ unsigned long __ip; asm volatile("larl %0, ." : "=d" (__ip)); __ip; })
+
#endif
diff --git a/arch/s390/kvm/faultin.c b/arch/s390/kvm/faultin.c
index ddf0ca71f374..fee80047bd94 100644
--- a/arch/s390/kvm/faultin.c
+++ b/arch/s390/kvm/faultin.c
@@ -36,7 +36,8 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
struct kvm_s390_mmu_cache *mc = NULL;
struct kvm_memory_slot *slot;
unsigned long inv_seq;
- int foll, rc = 0;
+ int rc = -EAGAIN;
+ int foll;
foll = f->write_attempt ? FOLL_WRITE : 0;
foll |= f->attempt_pfault ? FOLL_NOWAIT : 0;
@@ -53,7 +54,14 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
return 0;
}
- while (1) {
+ if (!mc) {
+ local_mc = kvm_s390_new_mmu_cache();
+ if (!local_mc)
+ return -ENOMEM;
+ mc = local_mc;
+ }
+
+ while (rc == -EAGAIN) {
f->valid = false;
inv_seq = kvm->mmu_invalidate_seq;
/* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
@@ -93,14 +101,7 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
if (is_error_pfn(f->pfn))
return -EFAULT;
- if (!mc) {
- local_mc = kvm_s390_new_mmu_cache();
- if (!local_mc)
- return -ENOMEM;
- mc = local_mc;
- }
-
- /* Loop, will automatically release the faulted page. */
+ /* Loop, release the faulted page. */
if (mmu_invalidate_retry_gfn_unsafe(kvm, inv_seq, f->gfn)) {
kvm_release_faultin_page(kvm, f->page, true, false);
continue;
@@ -110,20 +111,19 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa
if (!mmu_invalidate_retry_gfn(kvm, inv_seq, f->gfn)) {
f->valid = true;
rc = gmap_link(mc, kvm->arch.gmap, f, slot);
- kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt);
- f->page = NULL;
}
+ kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt);
}
- kvm_release_faultin_page(kvm, f->page, true, false);
if (rc == -ENOMEM) {
rc = kvm_s390_mmu_cache_topup(mc);
if (rc)
return rc;
- } else if (rc != -EAGAIN) {
- return rc;
+ rc = -EAGAIN;
}
}
+
+ return rc;
}
int kvm_s390_get_guest_page(struct kvm *kvm, struct guest_fault *f, gfn_t gfn, bool w)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4f8d5592c9a9..20e28b183c1a 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1466,15 +1466,17 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
struct guest_fault *f, bool p)
{
union crste newcrste, oldcrste;
- gfn_t gfn;
+ unsigned long mask;
+ gfn_t r_gfn;
int rc;
lockdep_assert_held(&sg->kvm->mmu_lock);
lockdep_assert_held(&sg->parent->children_lock);
- gfn = f->gfn & (is_pmd(*table) ? _SEGMENT_FR_MASK : _REGION3_FR_MASK);
+ mask = is_pmd(*table) ? _SEGMENT_FR_MASK : _REGION3_FR_MASK;
+ r_gfn = gpa_to_gfn(raddr) & mask;
scoped_guard(spinlock, &sg->host_to_rmap_lock)
- rc = gmap_insert_rmap(sg, gfn, gpa_to_gfn(raddr), host->h.tt);
+ rc = gmap_insert_rmap(sg, f->gfn & mask, r_gfn, host->h.tt);
if (rc)
return rc;
@@ -1497,8 +1499,7 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
return -EAGAIN;
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
- gfn = gpa_to_gfn(raddr);
- while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
+ while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, r_gfn, sg->asce))
;
return 0;
}
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 957126ab991c..52d55ddea8d4 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -395,15 +395,28 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
struct gmap_unmap_priv *priv = walk->priv;
struct folio *folio = NULL;
union crste old = *crstep;
+ bool ok;
if (!old.h.fc)
return 0;
if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
folio = phys_to_folio(crste_origin_large(old));
- /* No races should happen because kvm->mmu_lock is held in write mode */
- KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
- priv->gmap->kvm);
+ /*
+ * No races should happen because kvm->mmu_lock is held in write mode,
+ * but the unmap operation could have triggered an unshadow, which
+ * causes gmap_crstep_xchg_atomic() to return false and clear the
+ * vsie_notif bit. Allow the operation to fail once, if the old crste
+ * had the vsie_notif bit set. A second failure is not allowed, for
+ * the reasons above.
+ */
+ ok = gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn);
+ if (!ok) {
+ KVM_BUG_ON(!old.s.fc1.vsie_notif, priv->gmap->kvm);
+ old.s.fc1.vsie_notif = 0;
+ ok = gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn);
+ KVM_BUG_ON(!ok, priv->gmap->kvm);
+ }
if (folio)
uv_convert_from_secure_folio(folio);
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index 742e42a31744..5374f21aaf8d 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -273,11 +273,14 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
+ newcrste = oldcrste;
newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
_gmap_handle_vsie_unshadow_event(gmap, gfn);
+ dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
+ return false;
}
if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s)
SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e09960c2e6ed..ffb20a64d328 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -999,7 +999,10 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
break;
}
case KVM_S390_VM_MEM_LIMIT_SIZE: {
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *ms;
unsigned long new_limit;
+ int bkt;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -1007,6 +1010,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (get_user(new_limit, (u64 __user *)attr->addr))
return -EFAULT;
+ guard(mutex)(&kvm->lock);
+
+ new_limit = ALIGN(new_limit, HPAGE_SIZE);
if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
new_limit > kvm->arch.mem_limit)
return -E2BIG;
@@ -1014,12 +1020,27 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (!new_limit)
return -EINVAL;
- ret = -EBUSY;
- if (!kvm->created_vcpus)
- ret = gmap_set_limit(kvm->arch.gmap, gpa_to_gfn(new_limit));
+ if (kvm->created_vcpus)
+ return -EBUSY;
+
+ ret = 0;
+ scoped_guard(mutex, &kvm->slots_lock) {
+ slots = kvm_memslots(kvm);
+ if (slots && !kvm_memslots_empty(slots)) {
+ kvm_for_each_memslot(ms, bkt, slots) {
+ if (gpa_to_gfn(new_limit) < ms->base_gfn + ms->npages) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ }
+ if (!ret)
+ ret = gmap_set_limit(kvm->arch.gmap, gpa_to_gfn(new_limit));
+ }
+ if (ret)
+ break;
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
- VM_EVENT(kvm, 3, "New guest asce: 0x%p",
- (void *)kvm->arch.gmap->asce.val);
+ VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *)kvm->arch.gmap->asce.val);
break;
}
default:
@@ -5672,6 +5693,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return -EINVAL;
if ((new->base_gfn + new->npages) * PAGE_SIZE > kvm->arch.mem_limit)
return -EINVAL;
+ if (!asce_contains_gfn(kvm->arch.gmap->asce, new->base_gfn + new->npages - 1))
+ return -EINVAL;
}
if (!kvm->arch.migration_mode)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index cc0553da14cb..447ec7ed423d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1188,6 +1188,7 @@ static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len
union crste *crstep;
union pgste pgste;
union pte *ptep;
+ hva_t hva;
int i;
lockdep_assert_held(&vcpu->kvm->mmu_lock);
@@ -1199,8 +1200,11 @@ static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len
if (!ptep || ptep->s.pr)
continue;
pgste = pgste_get_lock(ptep);
- if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero)
- gmap_helper_zap_one_page(vcpu->kvm->mm, cbrl[i]);
+ if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero) {
+ hva = gpa_to_hva(vcpu->kvm, cbrl[i]);
+ if (!kvm_is_error_hva(hva))
+ gmap_helper_zap_one_page(vcpu->kvm->mm, hva);
+ }
pgste_set_unlock(ptep, pgste);
}
}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index c2dafd812a3b..4b865e75351c 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,6 +17,7 @@
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
+#include <asm/gmap_helpers.h>
#include "kvm-s390.h"
#include "dat.h"
#include "gaccess.h"
@@ -73,6 +74,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
struct pv_make_secure {
void *uvcb;
struct folio *folio;
+ struct kvm *kvm;
int rc;
bool needs_export;
};
@@ -103,9 +105,21 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f)
{
struct pv_make_secure *priv = f->priv;
struct folio *folio;
+ spinlock_t *ptl; /* pte lock from try_get_locked_pte() */
+ pte_t *ptep;
folio = pfn_folio(f->pfn);
priv->rc = -EAGAIN;
+
+ if (!mmap_read_trylock(priv->kvm->mm))
+ return;
+
+ ptep = try_get_locked_pte(priv->kvm->mm, gfn_to_hva(priv->kvm, f->gfn), &ptl);
+ if (IS_ERR_VALUE(ptep)) {
+ priv->rc = PTR_ERR(ptep);
+ goto out;
+ }
+
if (folio_trylock(folio)) {
priv->rc = __kvm_s390_pv_make_secure(f, folio);
if (priv->rc == -E2BIG || priv->rc == -EBUSY) {
@@ -114,6 +128,11 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f)
}
folio_unlock(folio);
}
+
+ if (ptep)
+ pte_unmap_unlock(ptep, ptl);
+out:
+ mmap_read_unlock(priv->kvm->mm);
}
/**
@@ -127,7 +146,7 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f)
*/
int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
{
- struct pv_make_secure priv = { .uvcb = uvcb };
+ struct pv_make_secure priv = { .uvcb = uvcb, .kvm = kvm, };
struct guest_fault f = {
.write_attempt = true,
.gfn = gpa_to_gfn(gaddr),
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index f8789ffcc05c..1cfe4724fbe2 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -17,22 +17,68 @@
#include <asm/gmap_helpers.h>
/**
- * ptep_zap_softleaf_entry() - discard a software leaf entry.
+ * try_get_locked_pte() - like get_locked_pte(), but atomic and with trylock
* @mm: the mm
- * @entry: the software leaf entry that needs to be zapped
+ * @vmaddr: the userspace virtual address whose pte is to be found
+ * @ptl: will be set to the pointer to the lock used to lock the pte in case
+ * of success.
*
- * Discards the given software leaf entry. If the leaf entry was an actual
- * swap entry (and not a migration entry, for example), the actual swapped
- * page is also discarded from swap.
+ * This function returns the pointer to the pte corresponding to @addr in @mm,
+ * similarly to get_locked_pte(). Unlike get_locked_pte(), no attempt is made
+ * to allocate missing page tables. If a missing or large entry is found, the
+ * function will return NULL. If the ptl lock is contended, %-EAGAIN is
+ * returned.
+ *
+ * In case of success, *@ptl will point to the locked pte lock for the returned
+ * pte, like get_locked_pte() does.
+ *
+ * Context: mmap_lock or vma lock for read or for write needs to be held.
+ * Return:
+ * * %NULL if the pte cannot be reached.
+ * * %-EAGAIN if the pte can be reached, but cannot be locked.
+ * * the pointer to the pte corresponding to @addr in @mm, if it can be reached
+ * and locked.
*/
-static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
+pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long vmaddr, spinlock_t **ptl)
{
- if (softleaf_is_swap(entry))
- dec_mm_counter(mm, MM_SWAPENTS);
- else if (softleaf_is_migration(entry))
- dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry)));
- swap_put_entries_direct(entry, 1);
+ pmd_t *pmdp, pmd, pmdval;
+ pud_t *pudp, pud;
+ p4d_t *p4dp, p4d;
+ pgd_t *pgdp, pgd;
+ pte_t *ptep;
+
+ pgdp = pgd_offset(mm, vmaddr);
+ pgd = pgdp_get(pgdp);
+ if (pgd_none(pgd) || !pgd_present(pgd))
+ return NULL;
+ p4dp = p4d_offset(pgdp, vmaddr);
+ p4d = p4dp_get(p4dp);
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ return NULL;
+ pudp = pud_offset(p4dp, vmaddr);
+ pud = pudp_get(pudp);
+ if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
+ return NULL;
+ pmdp = pmd_offset(pudp, vmaddr);
+ pmd = pmdp_get_lockless(pmdp);
+ if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
+ return NULL;
+ ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, ptl);
+ if (!ptep)
+ return NULL;
+
+ if (spin_trylock(*ptl)) {
+ if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmdp)))) {
+ pte_unmap_unlock(ptep, *ptl);
+ return ERR_PTR(-EAGAIN);
+ }
+ return ptep;
+ }
+
+ pte_unmap(ptep);
+ return ERR_PTR(-EAGAIN);
}
+EXPORT_SYMBOL_GPL(try_get_locked_pte);
/**
* gmap_helper_zap_one_page() - discard a page if it was swapped.
@@ -46,7 +92,8 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
struct vm_area_struct *vma;
- spinlock_t *ptl;
+ spinlock_t *ptl; /* Lock for the host (userspace) page table */
+ softleaf_t sl;
pte_t *ptep;
mmap_assert_locked(mm);
@@ -57,11 +104,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
return;
/* Get pointer to the page table entry */
- ptep = get_locked_pte(mm, vmaddr, &ptl);
- if (unlikely(!ptep))
+ ptep = try_get_locked_pte(mm, vmaddr, &ptl);
+ if (IS_ERR_OR_NULL(ptep))
return;
- if (pte_swap(*ptep)) {
- ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
+ sl = softleaf_from_pte(*ptep);
+ if (pte_swap(*ptep) && softleaf_is_swap(sl)) {
+ dec_mm_counter(mm, MM_SWAPENTS);
+ swap_put_entries_direct(sl, 1);
pte_clear(mm, vmaddr, ptep);
}
pte_unmap_unlock(ptep, ptl);
@@ -113,37 +162,9 @@ EXPORT_SYMBOL_GPL(gmap_helper_discard);
*/
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
{
- pmd_t *pmdp, pmd, pmdval;
- pud_t *pudp, pud;
- p4d_t *p4dp, p4d;
- pgd_t *pgdp, pgd;
spinlock_t *ptl; /* Lock for the host (userspace) page table */
pte_t *ptep;
- pgdp = pgd_offset(mm, vmaddr);
- pgd = pgdp_get(pgdp);
- if (pgd_none(pgd) || !pgd_present(pgd))
- return;
-
- p4dp = p4d_offset(pgdp, vmaddr);
- p4d = p4dp_get(p4dp);
- if (p4d_none(p4d) || !p4d_present(p4d))
- return;
-
- pudp = pud_offset(p4dp, vmaddr);
- pud = pudp_get(pudp);
- if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
- return;
-
- pmdp = pmd_offset(pudp, vmaddr);
- pmd = pmdp_get_lockless(pmdp);
- if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
- return;
-
- ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, &ptl);
- if (!ptep)
- return;
-
/*
* Several paths exists that takes the ptl lock and then call the
* mmu_notifier, which takes the mmu_lock. The unmap path, instead,
@@ -156,21 +177,12 @@ void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
* If the lock is contended the bit is not set and the deadlock is
* avoided.
*/
- if (spin_trylock(ptl)) {
- /*
- * Make sure the pte we are touching is still the correct
- * one. In theory this check should not be needed, but
- * better safe than sorry.
- * Disabling interrupts or holding the mmap lock is enough to
- * guarantee that no concurrent updates to the page tables
- * are possible.
- */
- if (likely(pmd_same(pmdval, pmdp_get_lockless(pmdp))))
- __atomic64_or(_PAGE_UNUSED, (long *)ptep);
- spin_unlock(ptl);
- }
+ ptep = try_get_locked_pte(mm, vmaddr, &ptl);
+ if (IS_ERR_OR_NULL(ptep))
+ return;
- pte_unmap(ptep);
+ __atomic64_or(_PAGE_UNUSED, (long *)ptep);
+ pte_unmap_unlock(ptep, ptl);
}
EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 62b5befe0eed..6c6a6d663e29 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3313,37 +3313,6 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
sev_writeback_caches(kvm);
}
-void sev_free_vcpu(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm;
-
- if (!is_sev_es_guest(vcpu))
- return;
-
- svm = to_svm(vcpu);
-
- /*
- * If it's an SNP guest, then the VMSA was marked in the RMP table as
- * a guest-owned page. Transition the page to hypervisor state before
- * releasing it back to the system.
- */
- if (is_sev_snp_guest(vcpu)) {
- u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
-
- if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K))
- goto skip_vmsa_free;
- }
-
- if (vcpu->arch.guest_state_protected)
- sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
-
- __free_page(virt_to_page(svm->sev_es.vmsa));
-
-skip_vmsa_free:
- if (svm->sev_es.ghcb_sa_free)
- kvfree(svm->sev_es.ghcb_sa);
-}
-
static void dump_ghcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3583,6 +3552,20 @@ vmgexit_err:
return 1;
}
+static void __sev_es_unmap_ghcb(struct vcpu_svm *svm)
+{
+ if (svm->sev_es.ghcb_sa_free) {
+ kvfree(svm->sev_es.ghcb_sa);
+ svm->sev_es.ghcb_sa = NULL;
+ svm->sev_es.ghcb_sa_free = false;
+ }
+
+ if (svm->sev_es.ghcb) {
+ kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map);
+ svm->sev_es.ghcb = NULL;
+ }
+}
+
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
/* Clear any indication that the vCPU is in a type of AP Reset Hold */
@@ -3591,31 +3574,51 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
if (!svm->sev_es.ghcb)
return;
- if (svm->sev_es.ghcb_sa_free) {
- /*
- * The scratch area lives outside the GHCB, so there is a
- * buffer that, depending on the operation performed, may
- * need to be synced, then freed.
- */
- if (svm->sev_es.ghcb_sa_sync) {
- kvm_write_guest(svm->vcpu.kvm,
- svm->sev_es.sw_scratch,
- svm->sev_es.ghcb_sa,
- svm->sev_es.ghcb_sa_len);
- svm->sev_es.ghcb_sa_sync = false;
- }
-
- kvfree(svm->sev_es.ghcb_sa);
- svm->sev_es.ghcb_sa = NULL;
- svm->sev_es.ghcb_sa_free = false;
+ /*
+ * If the scratch area lives outside the GHCB, there's a buffer that,
+ * depending on the operation performed, may need to be synced.
+ */
+ if (svm->sev_es.ghcb_sa_sync) {
+ kvm_write_guest(svm->vcpu.kvm, svm->sev_es.sw_scratch,
+ svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len);
+ svm->sev_es.ghcb_sa_sync = false;
}
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
sev_es_sync_to_ghcb(svm);
- kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map);
- svm->sev_es.ghcb = NULL;
+ __sev_es_unmap_ghcb(svm);
+}
+
+void sev_free_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm;
+
+ if (!is_sev_es_guest(vcpu))
+ return;
+
+ svm = to_svm(vcpu);
+
+ /*
+ * If it's an SNP guest, then the VMSA was marked in the RMP table as
+ * a guest-owned page. Transition the page to hypervisor state before
+ * releasing it back to the system.
+ */
+ if (is_sev_snp_guest(vcpu)) {
+ u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
+
+ if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K))
+ goto skip_vmsa_free;
+ }
+
+ if (vcpu->arch.guest_state_protected)
+ sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
+
+ __free_page(virt_to_page(svm->sev_es.vmsa));
+
+skip_vmsa_free:
+ __sev_es_unmap_ghcb(svm);
}
int pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -3685,6 +3688,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len)
goto e_scratch;
}
+ WARN_ON_ONCE(svm->sev_es.ghcb_sa_sync || svm->sev_es.ghcb_sa_free);
+
if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
/* Scratch area begins within GHCB */
ghcb_scratch_beg = control->ghcb_gpa +
@@ -3706,6 +3711,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len)
scratch_va = (void *)svm->sev_es.ghcb;
scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
+ svm->sev_es.ghcb_sa_sync = false;
+ svm->sev_es.ghcb_sa_free = false;
svm->sev_es.ghcb_sa_len = ghcb_scratch_end - scratch_gpa_beg;
} else {
/* GHCB v2 requires the scratch area to be within the GHCB. */
@@ -3841,13 +3848,11 @@ struct psc_buffer {
struct psc_entry entries[];
} __packed;
-static int snp_begin_psc(struct vcpu_svm *svm);
+static int snp_do_psc(struct vcpu_svm *svm);
static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret)
{
- svm->sev_es.psc_inflight = 0;
- svm->sev_es.psc_idx = 0;
- svm->sev_es.psc_2m = false;
+ memset(&svm->sev_es.psc, 0, sizeof(svm->sev_es.psc));
/*
* PSC requests always get a "no action" response in SW_EXITINFO1, with
@@ -3860,9 +3865,8 @@ static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret)
static void __snp_complete_one_psc(struct vcpu_svm *svm)
{
- struct psc_buffer *psc = svm->sev_es.ghcb_sa;
- struct psc_entry *entries = psc->entries;
- struct psc_hdr *hdr = &psc->hdr;
+ struct vcpu_sev_es_state *sev_es = &svm->sev_es;
+ struct psc_buffer *guest_psc = sev_es->ghcb_sa;
__u16 idx;
/*
@@ -3870,14 +3874,15 @@ static void __snp_complete_one_psc(struct vcpu_svm *svm)
* corresponding entries in the guest's PSC buffer and zero out the
* count of in-flight PSC entries.
*/
- for (idx = svm->sev_es.psc_idx; svm->sev_es.psc_inflight;
- svm->sev_es.psc_inflight--, idx++) {
- struct psc_entry entry = READ_ONCE(entries[idx]);
+ for (idx = sev_es->psc.cur_idx; sev_es->psc.batch_size;
+ sev_es->psc.batch_size--, idx++) {
+ struct psc_entry entry = READ_ONCE(guest_psc->entries[idx]);
- entries[idx].cur_page = entry.pagesize ? 512 : 1;
+ guest_psc->entries[idx].cur_page = entry.pagesize ? 512 : 1;
}
- hdr->cur_entry = idx;
+ sev_es->psc.cur_idx = idx;
+ guest_psc->hdr.cur_entry = idx;
}
static int snp_complete_one_psc(struct kvm_vcpu *vcpu)
@@ -3892,63 +3897,30 @@ static int snp_complete_one_psc(struct kvm_vcpu *vcpu)
__snp_complete_one_psc(svm);
/* Handle the next range (if any). */
- return snp_begin_psc(svm);
+ return snp_do_psc(svm);
}
-static int snp_begin_psc(struct vcpu_svm *svm)
+static int snp_do_psc(struct vcpu_svm *svm)
{
struct vcpu_sev_es_state *sev_es = &svm->sev_es;
- struct psc_buffer *psc = sev_es->ghcb_sa;
- struct psc_entry *entries = psc->entries;
+ struct psc_buffer *guest_psc = sev_es->ghcb_sa;
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct psc_hdr *hdr = &psc->hdr;
struct psc_entry entry_start;
- u16 idx, idx_start, idx_end, max_nr_entries;
int npages;
bool huge;
u64 gfn;
-
- if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) {
- snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
- return 1;
- }
-
- /*
- * GHCB v2 requires the scratch area to reside within the GHCB itself,
- * and PSC requests are only supported for GHCB v2+. Thus it should be
- * impossible to exceed the max PSC entry count (which is derived from
- * the size of the shared GHCB buffer).
- */
- max_nr_entries = (sev_es->ghcb_sa_len - sizeof(struct psc_hdr)) /
- sizeof(struct psc_entry);
- if (WARN_ON_ONCE(max_nr_entries > VMGEXIT_PSC_MAX_COUNT)) {
- snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
- return 1;
- }
+ u16 idx;
next_range:
/* There should be no other PSCs in-flight at this point. */
- if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) {
+ if (WARN_ON_ONCE(svm->sev_es.psc.batch_size)) {
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
return 1;
}
- /*
- * The PSC descriptor buffer can be modified by a misbehaved guest after
- * validation, so take care to only use validated copies of values used
- * for things like array indexing.
- */
- idx_start = READ_ONCE(hdr->cur_entry);
- idx_end = READ_ONCE(hdr->end_entry);
-
- if (idx_end >= max_nr_entries) {
- snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR);
- return 1;
- }
-
/* Find the start of the next range which needs processing. */
- for (idx = idx_start; idx <= idx_end; idx++, hdr->cur_entry++) {
- entry_start = READ_ONCE(entries[idx]);
+ for (idx = sev_es->psc.cur_idx; idx <= sev_es->psc.end_idx; idx++) {
+ entry_start = READ_ONCE(guest_psc->entries[idx]);
gfn = entry_start.gfn;
huge = entry_start.pagesize;
@@ -3974,32 +3946,40 @@ next_range:
if (npages)
break;
+
+ /*
+ * Increment the guest-visible index to communicate the current
+ * entry back to the guest, e.g. in case of failure. No need
+ * for READ_ONCE() as KVM doesn't consume the field, i.e. a
+ * misbehaving guest can only break itself.
+ */
+ guest_psc->hdr.cur_entry++;
}
- if (idx > idx_end) {
+ if (idx > sev_es->psc.end_idx) {
/* Nothing more to process. */
snp_complete_psc(svm, 0);
return 1;
}
- svm->sev_es.psc_2m = huge;
- svm->sev_es.psc_idx = idx;
- svm->sev_es.psc_inflight = 1;
+ sev_es->psc.is_2m = huge;
+ sev_es->psc.cur_idx = idx;
+ sev_es->psc.batch_size = 1;
/*
* Find all subsequent PSC entries that contain adjacent GPA
* ranges/operations and can be combined into a single
* KVM_HC_MAP_GPA_RANGE exit.
*/
- while (++idx <= idx_end) {
- struct psc_entry entry = READ_ONCE(entries[idx]);
+ while (++idx <= sev_es->psc.end_idx) {
+ struct psc_entry entry = READ_ONCE(guest_psc->entries[idx]);
if (entry.operation != entry_start.operation ||
entry.gfn != entry_start.gfn + npages ||
entry.cur_page || !!entry.pagesize != huge)
break;
- svm->sev_es.psc_inflight++;
+ sev_es->psc.batch_size++;
npages += huge ? 512 : 1;
}
@@ -4041,6 +4021,46 @@ next_range:
BUG();
}
+static int snp_begin_psc(struct vcpu_svm *svm)
+{
+ struct vcpu_sev_es_state *sev_es = &svm->sev_es;
+ struct psc_buffer *guest_psc = sev_es->ghcb_sa;
+ u16 max_nr_entries;
+
+ if (!user_exit_on_hypercall(svm->vcpu.kvm, KVM_HC_MAP_GPA_RANGE)) {
+ snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
+ return 1;
+ }
+
+ /*
+ * GHCB v2 requires the scratch area to reside within the GHCB itself,
+ * and PSC requests are only supported for GHCB v2+. Thus it should be
+ * impossible to exceed the max PSC entry count (which is derived from
+ * the size of the shared GHCB buffer).
+ */
+ max_nr_entries = (sev_es->ghcb_sa_len - sizeof(struct psc_hdr)) /
+ sizeof(struct psc_entry);
+ if (WARN_ON_ONCE(max_nr_entries > VMGEXIT_PSC_MAX_COUNT)) {
+ snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
+ return 1;
+ }
+
+ /*
+ * The PSC descriptor buffer can be modified by a misbehaved guest after
+ * validation, so take care to only use validated copies of values used
+ * for things like array indexing.
+ */
+ sev_es->psc.cur_idx = READ_ONCE(guest_psc->hdr.cur_entry);
+ sev_es->psc.end_idx = READ_ONCE(guest_psc->hdr.end_entry);
+
+ if (sev_es->psc.end_idx >= max_nr_entries) {
+ snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR);
+ return 1;
+ }
+
+ return snp_do_psc(svm);
+}
+
/*
* Invoked as part of svm_vcpu_reset() processing of an init event.
*/
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index a10668d17a16..5137416be593 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -257,9 +257,12 @@ struct vcpu_sev_es_state {
bool ghcb_sa_free;
/* SNP Page-State-Change buffer entries currently being processed */
- u16 psc_idx;
- u16 psc_inflight;
- bool psc_2m;
+ struct {
+ u16 cur_idx;
+ u16 end_idx;
+ u16 batch_size;
+ bool is_2m;
+ } psc;
u64 ghcb_registered_gpa;
diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index bedc6133f970..1ea7c039160c 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -327,7 +327,7 @@ config PANEL_CHANGE_MESSAGE
say 'N' and keep the default message with the version.
config PANEL_BOOT_MESSAGE
- depends on PANEL_CHANGE_MESSAGE="y"
+ depends on PANEL_CHANGE_MESSAGE
string "New initialization message"
default ""
help
diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c
index fb6d9294140d..915eb5cd96b2 100644
--- a/drivers/auxdisplay/line-display.c
+++ b/drivers/auxdisplay/line-display.c
@@ -173,7 +173,7 @@ static int linedisp_display(struct linedisp *linedisp, const char *msg,
count = strlen(msg);
/* if the string ends with a newline, trim it */
- if (msg[count - 1] == '\n')
+ if (count && msg[count - 1] == '\n')
count--;
if (!count) {
diff --git a/drivers/auxdisplay/max6959.c b/drivers/auxdisplay/max6959.c
index 6bbc8d48fb1b..3bdef099a225 100644
--- a/drivers/auxdisplay/max6959.c
+++ b/drivers/auxdisplay/max6959.c
@@ -86,10 +86,7 @@ static const struct linedisp_ops max6959_linedisp_ops = {
static int max6959_enable(struct max6959_priv *priv, bool enable)
{
- u8 mask = REG_CONFIGURATION_S_BIT;
- u8 value = enable ? mask : 0;
-
- return regmap_update_bits(priv->regmap, REG_CONFIGURATION, mask, value);
+ return regmap_assign_bits(priv->regmap, REG_CONFIGURATION, REG_CONFIGURATION_S_BIT, enable);
}
static void max6959_power_off(void *priv)
diff --git a/drivers/firmware/samsung/exynos-acpm-dvfs.c b/drivers/firmware/samsung/exynos-acpm-dvfs.c
index 06bdf62dea1f..fdea7aa24ca0 100644
--- a/drivers/firmware/samsung/exynos-acpm-dvfs.c
+++ b/drivers/firmware/samsung/exynos-acpm-dvfs.c
@@ -31,6 +31,9 @@ static void acpm_dvfs_set_xfer(struct acpm_xfer *xfer, u32 *cmd, size_t cmdlen,
if (response) {
xfer->rxcnt = cmdlen;
xfer->rxd = cmd;
+ } else {
+ xfer->rxcnt = 0;
+ xfer->rxd = NULL;
}
}
diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c
index 16c46ed60837..19db3674a28f 100644
--- a/drivers/firmware/samsung/exynos-acpm.c
+++ b/drivers/firmware/samsung/exynos-acpm.c
@@ -7,11 +7,12 @@
#include <linux/bitfield.h>
#include <linux/bitmap.h>
-#include <linux/bits.h>
+#include <linux/bitops.h>
#include <linux/cleanup.h>
#include <linux/container_of.h>
#include <linux/delay.h>
#include <linux/device.h>
+#include <linux/find.h>
#include <linux/firmware/samsung/exynos-acpm-protocol.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -104,12 +105,15 @@ struct acpm_queue {
*
* @cmd: pointer to where the data shall be saved.
* @n_cmd: number of 32-bit commands.
- * @response: true if the client expects the RX data.
+ * @rxcnt: expected length of the response in 32-bit words.
+ * @completed: flag indicating if the firmware response has been fully
+ * processed.
*/
struct acpm_rx_data {
u32 *cmd;
size_t n_cmd;
- bool response;
+ size_t rxcnt;
+ bool completed;
};
#define ACPM_SEQNUM_MAX 64
@@ -199,31 +203,33 @@ static void acpm_get_saved_rx(struct acpm_chan *achan,
const struct acpm_rx_data *rx_data = &achan->rx_data[tx_seqnum - 1];
u32 rx_seqnum;
- if (!rx_data->response)
+ if (!rx_data->rxcnt)
return;
rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, rx_data->cmd[0]);
- if (rx_seqnum == tx_seqnum) {
+ if (rx_seqnum == tx_seqnum)
memcpy(xfer->rxd, rx_data->cmd, xfer->rxcnt * sizeof(*xfer->rxd));
- clear_bit(rx_seqnum - 1, achan->bitmap_seqnum);
- }
}
/**
* acpm_get_rx() - get response from RX queue.
* @achan: ACPM channel info.
* @xfer: reference to the transfer to get response for.
+ * @native_match: pointer to a boolean set to true if the thread natively
+ * processed its own sequence number during this call.
*
* Return: 0 on success, -errno otherwise.
*/
-static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
+static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer,
+ bool *native_match)
{
u32 rx_front, rx_seqnum, tx_seqnum, seqnum;
const void __iomem *base, *addr;
struct acpm_rx_data *rx_data;
u32 i, val, mlen;
- bool rx_set = false;
+
+ *native_match = false;
guard(mutex)(&achan->rx_lock);
@@ -232,10 +238,8 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]);
- if (i == rx_front) {
- acpm_get_saved_rx(achan, xfer, tx_seqnum);
+ if (i == rx_front)
return 0;
- }
base = achan->rx.base;
mlen = achan->mlen;
@@ -256,11 +260,16 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
seqnum = rx_seqnum - 1;
rx_data = &achan->rx_data[seqnum];
- if (rx_data->response) {
+ if (rx_data->rxcnt) {
if (rx_seqnum == tx_seqnum) {
__ioread32_copy(xfer->rxd, addr, xfer->rxcnt);
- rx_set = true;
- clear_bit(seqnum, achan->bitmap_seqnum);
+ /*
+ * Signal completion to the polling thread.
+ * Pairs with smp_load_acquire() in polling
+ * loop.
+ */
+ smp_store_release(&rx_data->completed, true);
+ *native_match = true;
} else {
/*
* The RX data corresponds to another request.
@@ -268,10 +277,23 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
* clear yet the bitmap. It will be cleared
* after the response is copied to the request.
*/
- __ioread32_copy(rx_data->cmd, addr, xfer->rxcnt);
+ __ioread32_copy(rx_data->cmd, addr,
+ rx_data->rxcnt);
+ /*
+ * Signal completion to the polling thread.
+ * Pairs with smp_load_acquire() in polling
+ * loop.
+ */
+ smp_store_release(&rx_data->completed, true);
}
} else {
- clear_bit(seqnum, achan->bitmap_seqnum);
+ /*
+ * Signal completion to the polling thread.
+ * Pairs with smp_load_acquire() in polling loop.
+ */
+ smp_store_release(&rx_data->completed, true);
+ if (rx_seqnum == tx_seqnum)
+ *native_match = true;
}
i = (i + 1) % achan->qlen;
@@ -280,13 +302,6 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
/* We saved all responses, mark RX empty. */
writel(rx_front, achan->rx.rear);
- /*
- * If the response was not in this iteration of the queue, check if the
- * RX data was previously saved.
- */
- if (!rx_set)
- acpm_get_saved_rx(achan, xfer, tx_seqnum);
-
return 0;
}
@@ -301,6 +316,7 @@ static int acpm_dequeue_by_polling(struct acpm_chan *achan,
const struct acpm_xfer *xfer)
{
struct device *dev = achan->acpm->dev;
+ bool native_match;
ktime_t timeout;
u32 seqnum;
int ret;
@@ -309,12 +325,25 @@ static int acpm_dequeue_by_polling(struct acpm_chan *achan,
timeout = ktime_add_us(ktime_get(), ACPM_POLL_TIMEOUT_US);
do {
- ret = acpm_get_rx(achan, xfer);
+ ret = acpm_get_rx(achan, xfer, &native_match);
if (ret)
return ret;
- if (!test_bit(seqnum - 1, achan->bitmap_seqnum))
+ /*
+ * Safely check if our specific transaction has been processed.
+ * smp_load_acquire prevents the CPU from speculatively
+ * executing subsequent instructions before the transaction is
+ * synchronized.
+ */
+ if (smp_load_acquire(&achan->rx_data[seqnum - 1].completed)) {
+ /* Retrieve payload if another thread cached it for us */
+ if (!native_match)
+ acpm_get_saved_rx(achan, xfer, seqnum);
+
+ /* Relinquish ownership of the sequence slot */
+ clear_bit_unlock(seqnum - 1, achan->bitmap_seqnum);
return 0;
+ }
/* Determined experimentally. */
udelay(20);
@@ -362,29 +391,48 @@ static int acpm_wait_for_queue_slots(struct acpm_chan *achan, u32 next_tx_front)
* TX queue.
* @achan: ACPM channel info.
* @xfer: reference to the transfer being prepared.
+ *
+ * Return: 0 on success, -errno otherwise.
*/
-static void acpm_prepare_xfer(struct acpm_chan *achan,
- const struct acpm_xfer *xfer)
+static int acpm_prepare_xfer(struct acpm_chan *achan,
+ const struct acpm_xfer *xfer)
{
struct acpm_rx_data *rx_data;
u32 *txd = (u32 *)xfer->txd;
+ unsigned long size = ACPM_SEQNUM_MAX - 1;
+ unsigned long bit = achan->seqnum;
+
+ bit = find_next_zero_bit(achan->bitmap_seqnum, size, bit);
+ if (bit >= size) {
+ bit = find_first_zero_bit(achan->bitmap_seqnum, size);
+ if (bit >= size) {
+ dev_err_ratelimited(achan->acpm->dev,
+ "ACPM sequence number pool exhausted\n");
+ return -EBUSY;
+ }
+ }
- /* Prevent chan->seqnum from being re-used */
- do {
- if (++achan->seqnum == ACPM_SEQNUM_MAX)
- achan->seqnum = 1;
- } while (test_bit(achan->seqnum - 1, achan->bitmap_seqnum));
+ /*
+ * Execute the atomic set to formally claim the bit and establish
+ * LKMM Acquire semantics against the RX thread's clear_bit_unlock().
+ * A loop is unnecessary because allocations are strictly serialized
+ * by tx_lock.
+ */
+ if (WARN_ON_ONCE(test_and_set_bit_lock(bit, achan->bitmap_seqnum)))
+ return -EIO;
+ /* Flag the index based on seqnum. (seqnum: 1~63, bitmap: 0~62) */
+ achan->seqnum = bit + 1;
txd[0] |= FIELD_PREP(ACPM_PROTOCOL_SEQNUM, achan->seqnum);
/* Clear data for upcoming responses */
- rx_data = &achan->rx_data[achan->seqnum - 1];
+ rx_data = &achan->rx_data[bit];
+ rx_data->completed = false;
memset(rx_data->cmd, 0, sizeof(*rx_data->cmd) * rx_data->n_cmd);
- if (xfer->rxd)
- rx_data->response = true;
+ /* zero means no response expected */
+ rx_data->rxcnt = xfer->rxcnt;
- /* Flag the index based on seqnum. (seqnum: 1~63, bitmap: 0~62) */
- set_bit(achan->seqnum - 1, achan->bitmap_seqnum);
+ return 0;
}
/**
@@ -444,7 +492,9 @@ int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer)
if (ret)
return ret;
- acpm_prepare_xfer(achan, xfer);
+ ret = acpm_prepare_xfer(achan, xfer);
+ if (ret)
+ return ret;
/* Write TX command. */
__iowrite32_copy(achan->tx.base + achan->mlen * tx_front,
@@ -526,10 +576,11 @@ static int acpm_achan_alloc_cmds(struct acpm_chan *achan)
/**
* acpm_free_mbox_chans() - free mailbox channels.
- * @acpm: pointer to driver data.
+ * @data: pointer to driver data.
*/
-static void acpm_free_mbox_chans(struct acpm_info *acpm)
+static void acpm_free_mbox_chans(void *data)
{
+ struct acpm_info *acpm = data;
int i;
for (i = 0; i < acpm->num_chans; i++)
@@ -557,6 +608,10 @@ static int acpm_channels_init(struct acpm_info *acpm)
if (!acpm->chans)
return -ENOMEM;
+ ret = devm_add_action_or_reset(dev, acpm_free_mbox_chans, acpm);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to add mbox free action.\n");
+
chans_shmem = acpm->sram_base + readl(&shmem->chans);
for (i = 0; i < acpm->num_chans; i++) {
@@ -578,10 +633,8 @@ static int acpm_channels_init(struct acpm_info *acpm)
cl->dev = dev;
achan->chan = mbox_request_channel(cl, 0);
- if (IS_ERR(achan->chan)) {
- acpm_free_mbox_chans(acpm);
+ if (IS_ERR(achan->chan))
return PTR_ERR(achan->chan);
- }
}
return 0;
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index dd77a93fd68d..1ae304c2f573 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1590,18 +1590,22 @@ static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock)
struct smq_policy *mq = to_smq_policy(p);
struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
unsigned long flags;
-
- if (!e->allocated)
- return -ENODATA;
+ int r = 0;
spin_lock_irqsave(&mq->lock, flags);
+ if (!e->allocated) {
+ r = -ENODATA;
+ goto out;
+ }
// FIXME: what if this block has pending background work?
del_queue(mq, e);
h_remove(&mq->table, e);
free_entry(&mq->cache_alloc, e);
+
+out:
spin_unlock_irqrestore(&mq->lock, flags);
- return 0;
+ return r;
}
static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c
index 8db970da9af9..1e8e8aba2542 100644
--- a/drivers/memory/atmel-ebi.c
+++ b/drivers/memory/atmel-ebi.c
@@ -628,10 +628,11 @@ static __maybe_unused int atmel_ebi_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(atmel_ebi_pm_ops, NULL, atmel_ebi_resume);
static struct platform_driver atmel_ebi_driver = {
+ .probe = atmel_ebi_probe,
.driver = {
.name = "atmel-ebi",
.of_match_table = atmel_ebi_id_table,
.pm = &atmel_ebi_pm_ops,
},
};
-builtin_platform_driver_probe(atmel_ebi_driver, atmel_ebi_probe);
+builtin_platform_driver(atmel_ebi_driver);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 8846550a8892..05444ecf3909 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1371,7 +1371,9 @@ static void mmc_select_driver_type(struct mmc_card *card)
card->drive_strength = drive_strength;
- if (drv_type)
+ if (fixed_drv_type >= 0 && drive_strength)
+ mmc_set_driver_type(card->host, drive_strength);
+ else if (drv_type)
mmc_set_driver_type(card->host, drv_type);
}
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index c6eece4ec3fd..75c82ff20f17 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -441,6 +441,22 @@ static int dw_mci_common_parse_dt(struct dw_mci *host)
return 0;
}
+static int dw_mci_rk2928_parse_dt(struct dw_mci *host)
+{
+ struct dw_mci_rockchip_priv_data *priv;
+ int err;
+
+ err = dw_mci_common_parse_dt(host);
+ if (err)
+ return err;
+
+ priv = host->priv;
+
+ priv->internal_phase = false;
+
+ return 0;
+}
+
static int dw_mci_rk3288_parse_dt(struct dw_mci *host)
{
struct dw_mci_rockchip_priv_data *priv;
@@ -514,6 +530,7 @@ static int dw_mci_rockchip_init(struct dw_mci *host)
static const struct dw_mci_drv_data rk2928_drv_data = {
.init = dw_mci_rockchip_init,
+ .parse_dt = dw_mci_rk2928_parse_dt,
};
static const struct dw_mci_drv_data rk3288_drv_data = {
diff --git a/drivers/mmc/host/litex_mmc.c b/drivers/mmc/host/litex_mmc.c
index d2f19c2dc673..3655542ca998 100644
--- a/drivers/mmc/host/litex_mmc.c
+++ b/drivers/mmc/host/litex_mmc.c
@@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/iopoll.h>
#include <linux/litex.h>
+#include <linux/math.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -68,6 +69,9 @@
#define SD_SLEEP_US 5
#define SD_TIMEOUT_US 20000
+#define SD_INIT_DELAY_US 1000
+#define SD_INIT_CLK_HZ 400000
+
#define SDIRQ_CARD_DETECT 1
#define SDIRQ_SD_TO_MEM_DONE 2
#define SDIRQ_MEM_TO_SD_DONE 4
@@ -436,11 +440,10 @@ static void litex_mmc_setclk(struct litex_mmc_host *host, unsigned int freq)
struct device *dev = mmc_dev(host->mmc);
u32 div;
- div = freq ? host->ref_clk / freq : 256U;
- div = roundup_pow_of_two(div);
+ div = freq ? DIV_ROUND_UP(host->ref_clk, freq) : 256U;
div = clamp(div, 2U, 256U);
dev_dbg(dev, "sd_clk_freq=%d: set to %d via div=%d\n",
- freq, host->ref_clk / div, div);
+ freq, host->ref_clk / ((div + 1) & ~1U), div);
litex_write16(host->sdphy + LITEX_PHY_CLOCKERDIV, div);
host->sd_clk = freq;
}
@@ -450,6 +453,17 @@ static void litex_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
struct litex_mmc_host *host = mmc_priv(mmc);
/*
+ * The SD specification requires at least 74 idle clocks before CMD0.
+ * These dummy cycles is generated by writing LITEX_PHY_INITIALIZE.
+ */
+ if (ios->chip_select == MMC_CS_HIGH) {
+ litex_mmc_setclk(host, SD_INIT_CLK_HZ);
+ litex_write8(host->sdphy + LITEX_PHY_INITIALIZE, 1);
+ fsleep(SD_INIT_DELAY_US);
+ return;
+ }
+
+ /*
* NOTE: Ignore any ios->bus_width updates; they occur right after
* the mmc core sends its own acmd6 bus-width change notification,
* which is redundant since we snoop on the command flow and inject
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index f6ebb7bc7ede..838248bf8dd6 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -279,6 +279,7 @@ static const struct renesas_sdhi_of_data_with_quirks of_rza2_compatible = {
static const struct of_device_id renesas_sdhi_internal_dmac_of_match[] = {
{ .compatible = "renesas,sdhi-r7s9210", .data = &of_rza2_compatible, },
{ .compatible = "renesas,sdhi-mmc-r8a77470", .data = &of_rcar_gen3_compatible, },
+ { .compatible = "renesas,sdhi-r8a774e1", .data = &of_r8a7795_compatible, },
{ .compatible = "renesas,sdhi-r8a7795", .data = &of_r8a7795_compatible, },
{ .compatible = "renesas,sdhi-r8a77961", .data = &of_r8a77961_compatible, },
{ .compatible = "renesas,sdhi-r8a77965", .data = &of_r8a77965_compatible, },
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 633462c0be5f..0882ce74e0c9 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -1918,14 +1918,14 @@ static int sdhci_msm_ice_init(struct sdhci_msm_host *msm_host,
return 0;
ice = devm_of_qcom_ice_get(dev);
- if (ice == ERR_PTR(-EOPNOTSUPP)) {
+ if (IS_ERR(ice)) {
+ if (ice != ERR_PTR(-EOPNOTSUPP))
+ return PTR_ERR(ice);
+
dev_warn(dev, "Disabling inline encryption support\n");
- ice = NULL;
+ return 0;
}
- if (IS_ERR_OR_NULL(ice))
- return PTR_ERR_OR_ZERO(ice);
-
msm_host->ice = ice;
/* Initialize the blk_crypto_profile */
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 0b2158a7e409..b9ecd91f44ad 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -277,6 +277,7 @@
#define PHY_DELAY_CODE_MAX 0x7f
#define PHY_DELAY_CODE_EMMC 0x17
#define PHY_DELAY_CODE_SD 0x55
+#define PHY_DELAY_CODE_SDIO 0x29
struct rk35xx_priv {
struct reset_control *reset;
@@ -1433,10 +1434,7 @@ static void sdhci_eic7700_set_clock(struct sdhci_host *host, unsigned int clock)
clk_set_rate(pltfm_host->clk, clock);
clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
- clk |= SDHCI_CLOCK_INT_EN;
- sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
-
- dwcmshc_enable_card_clk(host);
+ sdhci_enable_clk(host, clk);
}
static void sdhci_eic7700_config_phy_delay(struct sdhci_host *host, int delay)
@@ -1497,7 +1495,7 @@ static void sdhci_eic7700_config_phy(struct sdhci_host *host)
static void sdhci_eic7700_reset(struct sdhci_host *host, u8 mask)
{
- sdhci_reset(host, mask);
+ dwcmshc_reset(host, mask);
/* after reset all, the phy's config will be clear */
if (mask == SDHCI_RESET_ALL)
@@ -1594,18 +1592,17 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
- u32 sd_caps = MMC_CAP2_NO_MMC | MMC_CAP2_NO_SDIO;
+ u32 emmc_caps = MMC_CAP2_NO_SD | MMC_CAP2_NO_SDIO;
int phase_code = -1;
int code_range = -1;
- bool is_sd = false;
int code_min = -1;
int code_max = -1;
int cmd_error = 0;
+ bool is_emmc;
int ret = 0;
int i = 0;
- if ((host->mmc->caps2 & sd_caps) == sd_caps)
- is_sd = true;
+ is_emmc = (host->mmc->caps2 & emmc_caps) == emmc_caps;
for (i = 0; i <= MAX_PHASE_CODE; i++) {
/* Centered Phase code */
@@ -1614,8 +1611,8 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode)
host->ops->reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
if (ret) {
- /* SD specific range tracking */
- if (is_sd && code_min != -1 && code_max != -1) {
+ /* SD/SDIO specific range tracking */
+ if (!is_emmc && code_min != -1 && code_max != -1) {
if (code_max - code_min > code_range) {
code_range = code_max - code_min;
phase_code = (code_min + code_max) / 2;
@@ -1626,17 +1623,17 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode)
code_max = -1;
}
/* EMMC breaks after first valid range */
- if (!is_sd && code_min != -1 && code_max != -1)
+ if (is_emmc && code_min != -1 && code_max != -1)
break;
} else {
/* Track valid phase code range */
if (code_min == -1) {
code_min = i;
- if (!is_sd)
+ if (is_emmc)
continue;
}
code_max = i;
- if (is_sd && i == MAX_PHASE_CODE) {
+ if (!is_emmc && i == MAX_PHASE_CODE) {
if (code_max - code_min > code_range) {
code_range = code_max - code_min;
phase_code = (code_min + code_max) / 2;
@@ -1646,19 +1643,19 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode)
}
/* Handle tuning failure case */
- if ((is_sd && phase_code == -1) ||
- (!is_sd && code_min == -1 && code_max == -1)) {
+ if ((!is_emmc && phase_code == -1) ||
+ (is_emmc && code_min == -1 && code_max == -1)) {
pr_err("%s: phase code tuning failed!\n", mmc_hostname(host->mmc));
sdhci_writew(host, 0, priv->vendor_specific_area1 + DWCMSHC_AT_STAT);
return -EIO;
}
- if (!is_sd)
+ if (is_emmc)
phase_code = (code_min + code_max) / 2;
sdhci_writew(host, phase_code, priv->vendor_specific_area1 + DWCMSHC_AT_STAT);
- /* SD specific final verification */
- if (is_sd) {
+ /* SD/SDIO specific final verification */
+ if (!is_emmc) {
ret = mmc_send_tuning(host->mmc, opcode, &cmd_error);
host->ops->reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
if (ret) {
@@ -1756,9 +1753,9 @@ static void sdhci_eic7700_set_uhs_signaling(struct sdhci_host *host, unsigned in
static void sdhci_eic7700_set_uhs_wrapper(struct sdhci_host *host, unsigned int timing)
{
- u32 sd_caps = MMC_CAP2_NO_MMC | MMC_CAP2_NO_SDIO;
+ u32 emmc_caps = MMC_CAP2_NO_SD | MMC_CAP2_NO_SDIO;
- if ((host->mmc->caps2 & sd_caps) == sd_caps)
+ if ((host->mmc->caps2 & emmc_caps) != emmc_caps)
sdhci_set_uhs_signaling(host, timing);
else
sdhci_eic7700_set_uhs_signaling(host, timing);
@@ -1767,6 +1764,7 @@ static void sdhci_eic7700_set_uhs_wrapper(struct sdhci_host *host, unsigned int
static int eic7700_init(struct device *dev, struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
{
u32 emmc_caps = MMC_CAP2_NO_SD | MMC_CAP2_NO_SDIO;
+ u32 sd_caps = MMC_CAP2_NO_MMC | MMC_CAP2_NO_SDIO;
unsigned int val, hsp_int_status, hsp_pwr_ctrl;
static const char * const clk_ids[] = {"axi"};
struct of_phandle_args args;
@@ -1821,8 +1819,10 @@ static int eic7700_init(struct device *dev, struct sdhci_host *host, struct dwcm
if ((host->mmc->caps2 & emmc_caps) == emmc_caps)
dwc_priv->delay_line = PHY_DELAY_CODE_EMMC;
- else
+ else if ((host->mmc->caps2 & sd_caps) == sd_caps)
dwc_priv->delay_line = PHY_DELAY_CODE_SD;
+ else
+ dwc_priv->delay_line = PHY_DELAY_CODE_SDIO;
if (!of_property_read_u32(dev->of_node, "eswin,drive-impedance-ohms", &val))
priv->drive_impedance = eic7700_convert_drive_impedance_ohm(dev, val);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 605be55f8d2d..e3bf901b10aa 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3836,6 +3836,7 @@ int sdhci_resume_host(struct sdhci_host *host)
host->pwr = 0;
host->clock = 0;
host->reinit_uhs = true;
+ mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios);
mmc->ops->set_ios(mmc, &mmc->ios);
} else {
sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER));
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index f0aa7d2f2171..985ef66dc333 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1386,8 +1386,8 @@ static void ad_churn_machine(struct port *port)
{
if (port->sm_vars & AD_PORT_CHURNED) {
port->sm_vars &= ~AD_PORT_CHURNED;
- port->sm_churn_actor_state = AD_CHURN_MONITOR;
- port->sm_churn_partner_state = AD_CHURN_MONITOR;
+ WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN_MONITOR);
+ WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN_MONITOR);
port->sm_churn_actor_timer_counter =
__ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0);
port->sm_churn_partner_timer_counter =
@@ -1398,20 +1398,22 @@ static void ad_churn_machine(struct port *port)
!(--port->sm_churn_actor_timer_counter) &&
port->sm_churn_actor_state == AD_CHURN_MONITOR) {
if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) {
- port->sm_churn_actor_state = AD_NO_CHURN;
+ WRITE_ONCE(port->sm_churn_actor_state, AD_NO_CHURN);
} else {
- port->churn_actor_count++;
- port->sm_churn_actor_state = AD_CHURN;
+ WRITE_ONCE(port->churn_actor_count,
+ port->churn_actor_count + 1);
+ WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN);
}
}
if (port->sm_churn_partner_timer_counter &&
!(--port->sm_churn_partner_timer_counter) &&
port->sm_churn_partner_state == AD_CHURN_MONITOR) {
if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) {
- port->sm_churn_partner_state = AD_NO_CHURN;
+ WRITE_ONCE(port->sm_churn_partner_state, AD_NO_CHURN);
} else {
- port->churn_partner_count++;
- port->sm_churn_partner_state = AD_CHURN;
+ WRITE_ONCE(port->churn_partner_count,
+ port->churn_partner_count + 1);
+ WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN);
}
}
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 82e779f7916b..8e75453ce0ef 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4621,11 +4621,11 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
slave_dev = __dev_get_by_name(net, ifr->ifr_slave);
- slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev);
-
if (!slave_dev)
return -ENODEV;
+ slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev);
+
switch (cmd) {
case SIOCBONDENSLAVE:
res = bond_enslave(bond_dev, slave_dev, NULL);
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index c7d3e0602c83..90365d3f7ebf 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -82,10 +82,10 @@ static int bond_fill_slave_info(struct sk_buff *skb,
goto nla_put_failure_rcu;
if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE,
- ad_port->sm_churn_actor_state))
+ READ_ONCE(ad_port->sm_churn_actor_state)))
goto nla_put_failure_rcu;
if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE,
- ad_port->sm_churn_partner_state))
+ READ_ONCE(ad_port->sm_churn_partner_state)))
goto nla_put_failure_rcu;
}
rcu_read_unlock();
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 3714aab1a3d9..3607b62f9b63 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -221,13 +221,13 @@ static void bond_info_show_slave(struct seq_file *seq,
seq_printf(seq, "Aggregator ID: %d\n",
agg->aggregator_identifier);
seq_printf(seq, "Actor Churn State: %s\n",
- bond_3ad_churn_desc(port->sm_churn_actor_state));
+ bond_3ad_churn_desc(READ_ONCE(port->sm_churn_actor_state)));
seq_printf(seq, "Partner Churn State: %s\n",
- bond_3ad_churn_desc(port->sm_churn_partner_state));
+ bond_3ad_churn_desc(READ_ONCE(port->sm_churn_partner_state)));
seq_printf(seq, "Actor Churned Count: %d\n",
- port->churn_actor_count);
+ READ_ONCE(port->churn_actor_count));
seq_printf(seq, "Partner Churned Count: %d\n",
- port->churn_partner_count);
+ READ_ONCE(port->churn_partner_count));
if (capable(CAP_NET_ADMIN)) {
seq_puts(seq, "details actor lacp pdu:\n");
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index cecd66251dba..eab6a98d62b9 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -2936,7 +2936,7 @@ static void airoha_metadata_dst_free(struct airoha_gdm_port *port)
if (!port->dsa_meta[i])
continue;
- metadata_dst_free(port->dsa_meta[i]);
+ dst_release(&port->dsa_meta[i]->dst);
}
}
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index 911808ab13a7..4f3076d4ea34 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1407,8 +1407,10 @@ static int pcnet32_poll(struct napi_struct *napi, int budget)
pcnet32_restart(dev, CSR0_START);
netif_wake_queue(dev);
}
+ spin_unlock_irqrestore(&lp->lock, flags);
if (work_done < budget && napi_complete_done(napi, work_done)) {
+ spin_lock_irqsave(&lp->lock, flags);
/* clear interrupt masks */
val = lp->a->read_csr(ioaddr, CSR3);
val &= 0x00ff;
@@ -1416,9 +1418,9 @@ static int pcnet32_poll(struct napi_struct *napi, int budget)
/* Set interrupt enable. */
lp->a->write_csr(ioaddr, CSR0, CSR0_INTEN);
+ spin_unlock_irqrestore(&lp->lock, flags);
}
- spin_unlock_irqrestore(&lp->lock, flags);
return work_done;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 008c34cff7b4..35e1f8f663c7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -14388,13 +14388,28 @@ static void bnxt_unlock_sp(struct bnxt *bp)
netdev_unlock(bp->dev);
}
+/* Same as bnxt_lock_sp() with additional rtnl_lock */
+static void bnxt_rtnl_lock_sp(struct bnxt *bp)
+{
+ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ rtnl_lock();
+ netdev_lock(bp->dev);
+}
+
+static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
+{
+ set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ netdev_unlock(bp->dev);
+ rtnl_unlock();
+}
+
/* Only called from bnxt_sp_task() */
static void bnxt_reset(struct bnxt *bp, bool silent)
{
- bnxt_lock_sp(bp);
+ bnxt_rtnl_lock_sp(bp);
if (test_bit(BNXT_STATE_OPEN, &bp->state))
bnxt_reset_task(bp, silent);
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
}
/* Only called from bnxt_sp_task() */
@@ -14402,9 +14417,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
{
int i;
- bnxt_lock_sp(bp);
+ bnxt_rtnl_lock_sp(bp);
if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
return;
}
/* Disable and flush TPA before resetting the RX ring */
@@ -14443,7 +14458,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
}
if (bp->flags & BNXT_FLAG_TPA)
bnxt_set_tpa(bp, true);
- bnxt_unlock_sp(bp);
+ bnxt_rtnl_unlock_sp(bp);
}
static void bnxt_fw_fatal_close(struct bnxt *bp)
@@ -15358,15 +15373,17 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
fallthrough;
case BNXT_FW_RESET_STATE_OPENING:
- while (!netdev_trylock(bp->dev)) {
+ while (!rtnl_trylock()) {
bnxt_queue_fw_reset_work(bp, HZ / 10);
return;
}
+ netdev_lock(bp->dev);
rc = bnxt_open(bp->dev);
if (rc) {
netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
bnxt_fw_reset_abort(bp, rc);
netdev_unlock(bp->dev);
+ rtnl_unlock();
goto ulp_start;
}
@@ -15386,6 +15403,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_dl_health_fw_status_update(bp, true);
}
netdev_unlock(bp->dev);
+ rtnl_unlock();
bnxt_ulp_start(bp);
bnxt_reenable_sriov(bp);
netdev_lock(bp->dev);
@@ -16379,7 +16397,7 @@ err_reset:
rc);
napi_enable_locked(&bnapi->napi);
bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
- bnxt_reset_task(bp, true);
+ netif_close(dev);
return rc;
}
@@ -17230,6 +17248,7 @@ static int bnxt_resume(struct device *device)
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
+ rtnl_lock();
netdev_lock(dev);
rc = pci_enable_device(bp->pdev);
if (rc) {
@@ -17274,6 +17293,7 @@ static int bnxt_resume(struct device *device)
resume_exit:
netdev_unlock(bp->dev);
+ rtnl_unlock();
if (!rc) {
bnxt_ulp_start(bp);
bnxt_reenable_sriov(bp);
@@ -17445,6 +17465,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
int err;
netdev_info(bp->dev, "PCI Slot Resume\n");
+ rtnl_lock();
netdev_lock(netdev);
err = bnxt_hwrm_func_qcaps(bp);
@@ -17462,6 +17483,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
netif_device_attach(netdev);
netdev_unlock(netdev);
+ rtnl_unlock();
if (!err) {
bnxt_ulp_start(bp);
bnxt_reenable_sriov(bp);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f89aa94ce020..6ebde65d7f1b 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -5594,6 +5594,7 @@ static int fec_resume(struct device *dev)
if (fep->rpm_active)
pm_runtime_force_resume(dev);
+ pinctrl_pm_select_default_state(&fep->pdev->dev);
ret = fec_enet_clk_enable(ndev, true);
if (ret) {
rtnl_unlock();
@@ -5610,8 +5611,6 @@ static int fec_resume(struct device *dev)
val &= ~(FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
writel(val, fep->hwp + FEC_ECNTRL);
fep->wol_flag &= ~FEC_WOL_FLAG_SLEEP_ON;
- } else {
- pinctrl_pm_select_default_state(&fep->pdev->dev);
}
fec_restart(ndev);
netif_tx_lock_bh(ndev);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index de3fbd3d15d6..65397daae4c2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -1145,6 +1145,7 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf,
int slot);
int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc);
int rvu_cpt_init(struct rvu *rvu);
+u32 rvu_get_cpt_chan_mask(struct rvu *rvu);
#define NDC_AF_BANK_MASK GENMASK_ULL(7, 0)
#define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 607d0cf1a778..d301a3f0f87a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -701,6 +701,19 @@ void npc_set_mcam_action(struct rvu *rvu, struct npc_mcam *mcam,
return rvu_write64(rvu, blkaddr, reg, cfg);
}
+u32 rvu_get_cpt_chan_mask(struct rvu *rvu)
+{
+ /* For cn10k the upper two bits of the channel number are
+ * cpt channel number. with masking out these bits in the
+ * mcam entry, same entry used for NIX will allow packets
+ * received from cpt for parsing.
+ */
+ if (!is_rvu_otx2(rvu))
+ return NIX_CHAN_CPT_X2P_MASK;
+ else
+ return 0xFFFu;
+}
+
void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
int nixlf, u64 chan, u8 *mac_addr)
{
@@ -750,7 +763,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
eth_broadcast_addr((u8 *)&req.mask.dmac);
req.features = BIT_ULL(NPC_DMAC);
req.channel = chan;
- req.chan_mask = 0xFFFU;
+ req.chan_mask = rvu_get_cpt_chan_mask(rvu);
req.intf = pfvf->nix_rx_intf;
req.op = action.op;
req.hdr.pcifunc = 0; /* AF is requester */
@@ -845,11 +858,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
* mcam entry, same entry used for NIX will allow packets
* received from cpt for parsing.
*/
- if (!is_rvu_otx2(rvu)) {
- req.chan_mask = NIX_CHAN_CPT_X2P_MASK;
- } else {
- req.chan_mask = 0xFFFU;
- }
+ req.chan_mask = rvu_get_cpt_chan_mask(rvu);
if (chan_cnt > 1) {
if (!is_power_of_2(chan_cnt)) {
@@ -1053,16 +1062,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
ether_addr_copy(req.mask.dmac, mac_addr);
req.features = BIT_ULL(NPC_DMAC);
- /* For cn10k the upper two bits of the channel number are
- * cpt channel number. with masking out these bits in the
- * mcam entry, same entry used for NIX will allow packets
- * received from cpt for parsing.
- */
- if (!is_rvu_otx2(rvu))
- req.chan_mask = NIX_CHAN_CPT_X2P_MASK;
- else
- req.chan_mask = 0xFFFU;
-
+ req.chan_mask = rvu_get_cpt_chan_mask(rvu);
req.channel = chan;
req.intf = pfvf->nix_rx_intf;
req.entry = index;
@@ -2192,8 +2192,8 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
goto free_entry_cntr_map;
/* Alloc memory for saving target device of mcam rule */
- mcam->entry2target_pffunc = kmalloc_array(mcam->total_entries,
- sizeof(u16), GFP_KERNEL);
+ mcam->entry2target_pffunc = kcalloc(mcam->total_entries,
+ sizeof(u16), GFP_KERNEL);
if (!mcam->entry2target_pffunc)
goto free_cntr_refcnt;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 6ae9cdcb608b..34f1e066707b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1820,7 +1820,7 @@ process_flow:
/* ignore chan_mask in case pf func is not AF, revisit later */
if (!is_pffunc_af(req->hdr.pcifunc))
- req->chan_mask = 0xFFF;
+ req->chan_mask = rvu_get_cpt_chan_mask(rvu);
err = npc_check_unsupported_flows(rvu, req->features, req->intf);
if (err) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index ee623476e5ff..f9fbf0c17648 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -3473,7 +3473,7 @@ static void otx2_ndc_sync(struct otx2_nic *pf)
req->nix_lf_rx_sync = 1;
req->npa_lf_sync = 1;
- if (!otx2_sync_mbox_msg(mbox))
+ if (otx2_sync_mbox_msg(mbox))
dev_err(pf->dev, "NDC sync operation failed\n");
mutex_unlock(&mbox->lock);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8d225bc9f063..7d771168b990 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -4491,7 +4491,7 @@ static int mtk_free_dev(struct mtk_eth *eth)
for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
if (!eth->dsa_meta[i])
break;
- metadata_dst_free(eth->dsa_meta[i]);
+ dst_release(&eth->dsa_meta[i]->dst);
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c89417c1a1f9..e2895972cc82 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1002,12 +1002,13 @@ static void cmd_work_handler(struct work_struct *work)
ent->callback(-EBUSY, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
+ complete(&ent->slotted);
cmd_ent_put(ent);
} else {
ent->ret = -EBUSY;
complete(&ent->done);
+ complete(&ent->slotted);
}
- complete(&ent->slotted);
return;
}
alloc_ret = cmd_alloc_index(cmd, ent);
@@ -1017,13 +1018,14 @@ static void cmd_work_handler(struct work_struct *work)
ent->callback(-EAGAIN, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
+ complete(&ent->slotted);
cmd_ent_put(ent);
} else {
ent->ret = -EAGAIN;
complete(&ent->done);
+ complete(&ent->slotted);
}
up(&cmd->vars.sem);
- complete(&ent->slotted);
return;
}
} else {
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index f3332417162e..ffac22883e49 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1219,6 +1219,36 @@ static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
"MAC address set to %pM\n", addr);
}
+static void lan743x_mac_rx_enable_fse(struct lan743x_adapter *adapter)
+{
+ u32 mac_rx;
+ bool rxen;
+
+ mac_rx = lan743x_csr_read(adapter, MAC_RX);
+ if (mac_rx & MAC_RX_FSE_)
+ return;
+
+ rxen = mac_rx & MAC_RX_RXEN_;
+ if (rxen) {
+ mac_rx &= ~MAC_RX_RXEN_;
+ lan743x_csr_write(adapter, MAC_RX, mac_rx);
+ lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+ 1, 1000, 20000, 100);
+ }
+
+ /* Per AN2948, hardware prevents modification of the FSE bit while the
+ * MAC receiver is enabled (RXEN bit set). Use separate register write
+ * to assert the FSE bit before enabling the RXEN bit in MAC_RX
+ */
+ mac_rx |= MAC_RX_FSE_;
+ lan743x_csr_write(adapter, MAC_RX, mac_rx);
+
+ if (rxen) {
+ mac_rx |= MAC_RX_RXEN_;
+ lan743x_csr_write(adapter, MAC_RX, mac_rx);
+ }
+}
+
static int lan743x_mac_init(struct lan743x_adapter *adapter)
{
bool mac_address_valid = true;
@@ -1258,6 +1288,8 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter)
lan743x_mac_set_address(adapter, adapter->mac_address);
eth_hw_addr_set(netdev, adapter->mac_address);
+ lan743x_mac_rx_enable_fse(adapter);
+
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 160d94a7cee6..1573c8f9c993 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -182,6 +182,7 @@
#define MAC_RX (0x104)
#define MAC_RX_MAX_SIZE_SHIFT_ (16)
#define MAC_RX_MAX_SIZE_MASK_ (0x3FFF0000)
+#define MAC_RX_FSE_ BIT(2)
#define MAC_RX_RXD_ BIT(1)
#define MAC_RX_RXEN_ BIT(0)
diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
index ef13109c49cf..55105d34bc79 100644
--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
+++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
@@ -239,6 +239,8 @@ static void rtase_tx_clear(struct rtase_private *tp)
rtase_tx_clear_range(ring, ring->dirty_idx, RTASE_NUM_DESC);
ring->cur_idx = 0;
ring->dirty_idx = 0;
+
+ netdev_tx_reset_subqueue(tp->dev, i);
}
}
@@ -1563,8 +1565,9 @@ static void rtase_dump_tally_counter(const struct rtase_private *tp)
rtase_w32(tp, RTASE_DTCCR0, cmd);
rtase_w32(tp, RTASE_DTCCR0, cmd | RTASE_COUNTER_DUMP);
- err = read_poll_timeout(rtase_r32, val, !(val & RTASE_COUNTER_DUMP),
- 10, 250, false, tp, RTASE_DTCCR0);
+ err = read_poll_timeout_atomic(rtase_r32, val,
+ !(val & RTASE_COUNTER_DUMP),
+ 10, 250, false, tp, RTASE_DTCCR0);
if (err == -ETIMEDOUT)
netdev_err(tp->dev, "error occurred in dump tally counter\n");
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index c6563367d382..715180c3a1b3 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -632,7 +632,7 @@ static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb,
uh = udp_hdr(skb);
uh->len = htons(skb->len - gro_hint->nested_tp_offset);
if (uh->check) {
- len = skb->len - gro_hint->nested_nh_offset;
+ len = skb->len - gro_hint->nested_tp_offset;
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
if (gro_hint->nested_is_v6)
uh->check = ~udp_v6_check(len, &ipv6h->saddr,
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index bd970f753beb..b94b9c433a21 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -822,6 +822,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
return -EINVAL;
}
+ sfp->i2c_block_size = sfp->i2c_max_block_size;
return 0;
}
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index 2042369379ff..3e76f4e21094 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -661,7 +661,7 @@ static int vxlan_vni_update(struct vxlan_dev *vxlan,
if (ret)
return ret;
- if (changed)
+ if (*changed)
vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
return 0;
@@ -759,8 +759,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan,
err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
extack);
- if (changed)
- vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
return err;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ap.c b/drivers/net/wireless/intel/iwlwifi/mld/ap.c
index 5c59acc8c4c5..6598d9333333 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/ap.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/ap.c
@@ -9,7 +9,6 @@
#include "ap.h"
#include "hcmd.h"
#include "tx.h"
-#include "power.h"
#include "key.h"
#include "phy.h"
#include "iwl-utils.h"
@@ -273,9 +272,6 @@ int iwl_mld_start_ap_ibss(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx;
int ret;
- if (vif->type == NL80211_IFTYPE_AP)
- iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link);
-
ret = iwl_mld_update_beacon_template(mld, vif, link);
if (ret)
return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
index da6fd7471568..3c8daddc0bcb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
@@ -1150,6 +1150,13 @@ int iwl_mld_assign_vif_chanctx(struct ieee80211_hw *hw,
if (iwl_mld_can_activate_link(mld, vif, link)) {
iwl_mld_tlc_update_phy(mld, vif, link);
+ /* FW requires AP_TX_POWER_CONSTRAINTS_CMD before link
+ * activation for AP and after link activation for STA,
+ * for an unknown reason.
+ */
+ if (vif->type == NL80211_IFTYPE_AP)
+ iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link);
+
ret = iwl_mld_activate_link(mld, link);
if (ret)
goto err;
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/power.c b/drivers/net/wireless/intel/iwlwifi/mld/power.c
index 49b0d9f8f865..266fe16bb95d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/power.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/power.c
@@ -366,7 +366,7 @@ iwl_mld_send_ap_tx_power_constraint_cmd(struct iwl_mld *mld,
lockdep_assert_wiphy(mld->wiphy);
- if (!mld_link->active)
+ if (!mld_link->active && vif->type != NL80211_IFTYPE_AP)
return;
if (link->chanreq.oper.chan->band != NL80211_BAND_6GHZ)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index f05df3a3300e..6e507d6dcdd2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2025 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2026 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -459,9 +459,14 @@ static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm,
static void iwl_mvm_uats_init(struct iwl_mvm *mvm)
{
+ struct iwl_mcc_allowed_ap_type_cmd_v1 *cmd __free(kfree) = NULL;
int cmd_id = WIDE_ID(REGULATORY_AND_NVM_GROUP,
MCC_ALLOWED_AP_TYPE_CMD);
- struct iwl_mcc_allowed_ap_type_cmd_v1 cmd = {};
+ struct iwl_host_cmd hcmd = {
+ .id = cmd_id,
+ .len[0] = sizeof(*cmd),
+ .dataflags[0] = IWL_HCMD_DFL_NOCOPY,
+ };
u8 cmd_ver;
int ret;
@@ -485,14 +490,25 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm)
if (!mvm->fwrt.ap_type_cmd_valid)
return;
+ /* Since we free the command immediately after iwl_mvm_send_cmd, we
+ * must send this command in SYNC mode.
+ */
+ lockdep_assert_held(&mvm->mutex);
+
+ cmd = kzalloc_obj(*cmd);
+ if (!cmd)
+ return;
+
BUILD_BUG_ON(sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map) !=
- sizeof(cmd.mcc_to_ap_type_map));
+ sizeof(cmd->mcc_to_ap_type_map));
- memcpy(cmd.mcc_to_ap_type_map,
+ memcpy(cmd->mcc_to_ap_type_map,
mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map,
sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map));
- ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cmd), &cmd);
+ hcmd.data[0] = cmd;
+
+ ret = iwl_mvm_send_cmd(mvm, &hcmd);
if (ret < 0)
IWL_ERR(mvm, "failed to send MCC_ALLOWED_AP_TYPE_CMD (%d)\n",
ret);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index ae177477b201..384bed95835d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -1416,6 +1416,12 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_rf_cfg *cfg,
fw_has_capa(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE);
+ /* Those firmware versions claim to support the fw_reset_handshake
+ * but they are buggy.
+ */
+ if (IWL_UCODE_MAJOR(mvm->fw->ucode_ver) <= 77)
+ trans->conf.fw_reset_handshake = false;
+
trans->conf.queue_alloc_cmd_ver =
iwl_fw_lookup_cmd_ver(mvm->fw,
WIDE_ID(DATA_PATH_GROUP,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index dc99e7ac4726..eb3c5a6dd088 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1225,33 +1225,41 @@ static int _iwl_pci_resume(struct device *device, bool restore)
if (!trans->op_mode)
return 0;
- /*
- * Scratch value was altered, this means the device was powered off, we
- * need to reset it completely.
- * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan,
- * but not bits [15:8]. So if we have bits set in lower word, assume
- * the device is alive.
- * Alternatively, if the scratch value is 0xFFFFFFFF, then we no longer
- * have access to the device and consider it powered off.
- * For older devices, just try silently to grab the NIC.
- */
- if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
- u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH);
-
- if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) ||
- scratch == ~0U)
- device_was_powered_off = true;
- } else {
+ if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
/*
- * bh are re-enabled by iwl_trans_pcie_release_nic_access,
- * so re-enable them if _iwl_trans_pcie_grab_nic_access fails.
+ * Scratch value was altered, this means the device was powered
+ * off, we need to reset it completely.
+ * Note: MAC (bits 0:7) will be cleared upon suspend even with
+ * wowlan, but not bits [15:8]. So if we have bits set in lower
+ * word, assume the device is alive.
+ * Alternatively, if the scratch value is 0xFFFFFFFF, then we
+ * no longer have access to the device and consider it powered
+ * off.
+ * For older devices, just try silently to grab the NIC.
*/
- local_bh_disable();
- if (_iwl_trans_pcie_grab_nic_access(trans, true)) {
- iwl_trans_pcie_release_nic_access(trans);
+ if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH);
+
+ if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) ||
+ scratch == ~0U) {
+ IWL_DEBUG_WOWLAN(trans,
+ "Scratch 0x%08x indicates device was powered off\n",
+ scratch);
+ device_was_powered_off = true;
+ }
} else {
- device_was_powered_off = true;
- local_bh_enable();
+ /*
+ * bh are re-enabled by iwl_trans_pcie_release_nic_access,
+ * so re-enable them if _iwl_trans_pcie_grab_nic_access
+ * fails.
+ */
+ local_bh_disable();
+ if (_iwl_trans_pcie_grab_nic_access(trans, true)) {
+ iwl_trans_pcie_release_nic_access(trans);
+ } else {
+ device_was_powered_off = true;
+ local_bh_enable();
+ }
}
}
diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c
index 915a4f6defc9..84cb527f59cc 100644
--- a/drivers/ptp/ptp_vclock.c
+++ b/drivers/ptp/ptp_vclock.c
@@ -19,6 +19,8 @@ static DEFINE_SPINLOCK(vclock_hash_lock);
static DEFINE_READ_MOSTLY_HASHTABLE(vclock_hash, 8);
+DEFINE_STATIC_SRCU(vclock_srcu);
+
static void ptp_vclock_hash_add(struct ptp_vclock *vclock)
{
spin_lock(&vclock_hash_lock);
@@ -37,7 +39,7 @@ static void ptp_vclock_hash_del(struct ptp_vclock *vclock)
spin_unlock(&vclock_hash_lock);
- synchronize_rcu();
+ synchronize_srcu(&vclock_srcu);
}
static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
@@ -276,14 +278,16 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index)
{
unsigned int hash = vclock_index % HASH_SIZE(vclock_hash);
struct ptp_vclock *vclock;
- u64 ns;
u64 vclock_ns = 0;
+ int srcu_idx;
+ u64 ns;
ns = ktime_to_ns(*hwtstamp);
- rcu_read_lock();
+ srcu_idx = srcu_read_lock(&vclock_srcu);
- hlist_for_each_entry_rcu(vclock, &vclock_hash[hash], vclock_hash_node) {
+ hlist_for_each_entry_srcu(vclock, &vclock_hash[hash], vclock_hash_node,
+ srcu_read_lock_held(&vclock_srcu)) {
if (vclock->clock->index != vclock_index)
continue;
@@ -294,7 +298,7 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index)
break;
}
- rcu_read_unlock();
+ srcu_read_unlock(&vclock_srcu, srcu_idx);
return ns_to_ktime(vclock_ns);
}
diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c
index 77763a107edb..fc080e56f50d 100644
--- a/drivers/soc/imx/soc-imx8m.c
+++ b/drivers/soc/imx/soc-imx8m.c
@@ -247,7 +247,7 @@ static int imx8m_soc_probe(struct platform_device *pdev)
if (ret)
return ret;
- data = device_get_match_data(dev);
+ data = of_machine_get_match_data(imx8_soc_match);
if (data) {
soc_dev_attr->soc_id = data->name;
ret = imx8m_soc_prepare(pdev, data->ocotp_compatible);
diff --git a/drivers/soc/qcom/ice.c b/drivers/soc/qcom/ice.c
index b203bc685cad..5f20108aa03e 100644
--- a/drivers/soc/qcom/ice.c
+++ b/drivers/soc/qcom/ice.c
@@ -16,6 +16,7 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/xarray.h>
#include <linux/firmware/qcom/qcom_scm.h>
@@ -108,11 +109,15 @@ struct qcom_ice {
void __iomem *base;
struct clk *core_clk;
+ struct clk *iface_clk;
bool use_hwkm;
bool hwkm_init_complete;
u8 hwkm_version;
};
+static DEFINE_XARRAY(ice_handles);
+static DEFINE_MUTEX(ice_mutex);
+
static bool qcom_ice_check_supported(struct qcom_ice *ice)
{
u32 regval = qcom_ice_readl(ice, QCOM_ICE_REG_VERSION);
@@ -312,8 +317,13 @@ int qcom_ice_resume(struct qcom_ice *ice)
err = clk_prepare_enable(ice->core_clk);
if (err) {
- dev_err(dev, "failed to enable core clock (%d)\n",
- err);
+ dev_err(dev, "Failed to enable core clock: %d\n", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(ice->iface_clk);
+ if (err) {
+ dev_err(dev, "Failed to enable iface clock: %d\n", err);
return err;
}
qcom_ice_hwkm_init(ice);
@@ -323,6 +333,7 @@ EXPORT_SYMBOL_GPL(qcom_ice_resume);
int qcom_ice_suspend(struct qcom_ice *ice)
{
+ clk_disable_unprepare(ice->iface_clk);
clk_disable_unprepare(ice->core_clk);
ice->hwkm_init_complete = false;
@@ -559,7 +570,7 @@ static struct qcom_ice *qcom_ice_create(struct device *dev,
if (!qcom_scm_ice_available()) {
dev_warn(dev, "ICE SCM interface not found\n");
- return NULL;
+ return ERR_PTR(-EOPNOTSUPP);
}
engine = devm_kzalloc(dev, sizeof(*engine), GFP_KERNEL);
@@ -580,10 +591,16 @@ static struct qcom_ice *qcom_ice_create(struct device *dev,
if (!engine->core_clk)
engine->core_clk = devm_clk_get_optional_enabled(dev, "ice");
if (!engine->core_clk)
+ engine->core_clk = devm_clk_get_optional_enabled(dev, "core");
+ if (!engine->core_clk)
engine->core_clk = devm_clk_get_enabled(dev, NULL);
if (IS_ERR(engine->core_clk))
return ERR_CAST(engine->core_clk);
+ engine->iface_clk = devm_clk_get_optional_enabled(dev, "iface");
+ if (IS_ERR(engine->iface_clk))
+ return ERR_CAST(engine->iface_clk);
+
if (!qcom_ice_check_supported(engine))
return ERR_PTR(-EOPNOTSUPP);
@@ -631,6 +648,8 @@ static struct qcom_ice *of_qcom_ice_get(struct device *dev)
return qcom_ice_create(&pdev->dev, base);
}
+ guard(mutex)(&ice_mutex);
+
/*
* If the consumer node does not provider an 'ice' reg range
* (legacy DT binding), then it must at least provide a phandle
@@ -639,20 +658,21 @@ static struct qcom_ice *of_qcom_ice_get(struct device *dev)
struct device_node *node __free(device_node) = of_parse_phandle(dev->of_node,
"qcom,ice", 0);
if (!node)
- return NULL;
+ return ERR_PTR(-EOPNOTSUPP);
pdev = of_find_device_by_node(node);
if (!pdev) {
dev_err(dev, "Cannot find device node %s\n", node->name);
- return ERR_PTR(-EPROBE_DEFER);
+ return ERR_PTR(-ENODEV);
}
- ice = platform_get_drvdata(pdev);
- if (!ice) {
- dev_err(dev, "Cannot get ice instance from %s\n",
- dev_name(&pdev->dev));
+ ice = xa_load(&ice_handles, pdev->dev.of_node->phandle);
+ if (IS_ERR_OR_NULL(ice)) {
platform_device_put(pdev);
- return ERR_PTR(-EPROBE_DEFER);
+ if (!ice)
+ return ERR_PTR(-EPROBE_DEFER);
+ else
+ return ice;
}
link = device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER);
@@ -691,8 +711,7 @@ static void devm_of_qcom_ice_put(struct device *dev, void *res)
* phandle via 'qcom,ice' property to an ICE DT, the ICE instance will already
* be created and so this function will return that instead.
*
- * Return: ICE pointer on success, NULL if there is no ICE data provided by the
- * consumer or ERR_PTR() on error.
+ * Return: ICE pointer on success, ERR_PTR() on error.
*/
struct qcom_ice *devm_of_qcom_ice_get(struct device *dev)
{
@@ -703,7 +722,7 @@ struct qcom_ice *devm_of_qcom_ice_get(struct device *dev)
return ERR_PTR(-ENOMEM);
ice = of_qcom_ice_get(dev);
- if (!IS_ERR_OR_NULL(ice)) {
+ if (!IS_ERR(ice)) {
*dr = ice;
devres_add(dev, dr);
} else {
@@ -716,24 +735,40 @@ EXPORT_SYMBOL_GPL(devm_of_qcom_ice_get);
static int qcom_ice_probe(struct platform_device *pdev)
{
+ unsigned long phandle = pdev->dev.of_node->phandle;
struct qcom_ice *engine;
void __iomem *base;
+ guard(mutex)(&ice_mutex);
+
base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base)) {
dev_warn(&pdev->dev, "ICE registers not found\n");
+ /* Store the error pointer for devm_of_qcom_ice_get() */
+ xa_store(&ice_handles, phandle, (__force void *)base, GFP_KERNEL);
return PTR_ERR(base);
}
engine = qcom_ice_create(&pdev->dev, base);
- if (IS_ERR(engine))
+ if (IS_ERR(engine)) {
+ /* Store the error pointer for devm_of_qcom_ice_get() */
+ xa_store(&ice_handles, phandle, engine, GFP_KERNEL);
return PTR_ERR(engine);
+ }
- platform_set_drvdata(pdev, engine);
+ xa_store(&ice_handles, phandle, engine, GFP_KERNEL);
return 0;
}
+static void qcom_ice_remove(struct platform_device *pdev)
+{
+ unsigned long phandle = pdev->dev.of_node->phandle;
+
+ guard(mutex)(&ice_mutex);
+ xa_store(&ice_handles, phandle, NULL, GFP_KERNEL);
+}
+
static const struct of_device_id qcom_ice_of_match_table[] = {
{ .compatible = "qcom,inline-crypto-engine" },
{ },
@@ -742,6 +777,7 @@ MODULE_DEVICE_TABLE(of, qcom_ice_of_match_table);
static struct platform_driver qcom_ice_driver = {
.probe = qcom_ice_probe,
+ .remove = qcom_ice_remove,
.driver = {
.name = "qcom-ice",
.of_match_table = qcom_ice_of_match_table,
diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c
index a3d11b1f90fa..06747e90c230 100644
--- a/drivers/tee/optee/supp.c
+++ b/drivers/tee/optee/supp.c
@@ -10,7 +10,11 @@
struct optee_supp_req {
struct list_head link;
+ int id;
+
bool in_queue;
+ bool processed;
+
u32 func;
u32 ret;
size_t num_params;
@@ -19,6 +23,9 @@ struct optee_supp_req {
struct completion c;
};
+/* It is temporary request used for revoked pending request in supp->idr. */
+#define INVALID_REQ_PTR ((struct optee_supp_req *)ERR_PTR(-EBADF))
+
void optee_supp_init(struct optee_supp *supp)
{
memset(supp, 0, sizeof(*supp));
@@ -39,21 +46,23 @@ void optee_supp_release(struct optee_supp *supp)
{
int id;
struct optee_supp_req *req;
- struct optee_supp_req *req_tmp;
mutex_lock(&supp->mutex);
- /* Abort all request retrieved by supplicant */
+ /* Abort all request */
idr_for_each_entry(&supp->idr, req, id) {
idr_remove(&supp->idr, id);
- req->ret = TEEC_ERROR_COMMUNICATION;
- complete(&req->c);
- }
+ /* Skip if request was already marked invalid */
+ if (IS_ERR(req))
+ continue;
- /* Abort all queued requests */
- list_for_each_entry_safe(req, req_tmp, &supp->reqs, link) {
- list_del(&req->link);
- req->in_queue = false;
+ /* For queued requests where supplicant has not seen it */
+ if (req->in_queue) {
+ list_del(&req->link);
+ req->in_queue = false;
+ }
+
+ req->processed = true;
req->ret = TEEC_ERROR_COMMUNICATION;
complete(&req->c);
}
@@ -100,8 +109,16 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
/* Insert the request in the request list */
mutex_lock(&supp->mutex);
+ req->id = idr_alloc(&supp->idr, req, 1, 0, GFP_KERNEL);
+ if (req->id < 0) {
+ mutex_unlock(&supp->mutex);
+ kfree(req);
+ return TEEC_ERROR_OUT_OF_MEMORY;
+ }
+
list_add_tail(&req->link, &supp->reqs);
req->in_queue = true;
+ req->processed = false;
mutex_unlock(&supp->mutex);
/* Tell an eventual waiter there's a new request */
@@ -117,21 +134,43 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
if (wait_for_completion_killable(&req->c)) {
mutex_lock(&supp->mutex);
if (req->in_queue) {
+ /* Supplicant has not seen this request yet. */
+ idr_remove(&supp->idr, req->id);
list_del(&req->link);
req->in_queue = false;
+
+ ret = TEEC_ERROR_COMMUNICATION;
+ } else if (req->processed) {
+ /*
+ * Supplicant has processed this request. Ignore the
+ * kill signal for now and submit the result. req is not
+ * in supp->reqs (removed by supp_pop_entry()) nor in
+ * supp->idr (removed by supp_pop_req()).
+ */
+ ret = req->ret;
+ } else {
+ /*
+ * Supplicant is in the middle of processing this
+ * request. Replace req with INVALID_REQ_PTR so that
+ * the ID remains busy, causing optee_supp_send() to
+ * fail on the next call to supp_pop_req() with this ID.
+ */
+ idr_replace(&supp->idr, INVALID_REQ_PTR, req->id);
+ ret = TEEC_ERROR_COMMUNICATION;
}
+
mutex_unlock(&supp->mutex);
- req->ret = TEEC_ERROR_COMMUNICATION;
+ } else {
+ ret = req->ret;
}
- ret = req->ret;
kfree(req);
return ret;
}
static struct optee_supp_req *supp_pop_entry(struct optee_supp *supp,
- int num_params, int *id)
+ int num_params)
{
struct optee_supp_req *req;
@@ -153,10 +192,6 @@ static struct optee_supp_req *supp_pop_entry(struct optee_supp *supp,
return ERR_PTR(-EINVAL);
}
- *id = idr_alloc(&supp->idr, req, 1, 0, GFP_KERNEL);
- if (*id < 0)
- return ERR_PTR(-ENOMEM);
-
list_del(&req->link);
req->in_queue = false;
@@ -214,7 +249,6 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
struct optee *optee = tee_get_drvdata(teedev);
struct optee_supp *supp = &optee->supp;
struct optee_supp_req *req = NULL;
- int id;
size_t num_meta;
int rc;
@@ -224,15 +258,11 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
while (true) {
mutex_lock(&supp->mutex);
- req = supp_pop_entry(supp, *num_params - num_meta, &id);
+ req = supp_pop_entry(supp, *num_params - num_meta);
+ if (req)
+ break; /* Keep mutex held. */
mutex_unlock(&supp->mutex);
- if (req) {
- if (IS_ERR(req))
- return PTR_ERR(req);
- break;
- }
-
/*
* If we didn't get a request we'll block in
* wait_for_completion() to avoid needless spinning.
@@ -245,6 +275,13 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
return -ERESTARTSYS;
}
+ /* supp->mutex held and req != NULL. */
+
+ if (IS_ERR(req)) {
+ mutex_unlock(&supp->mutex);
+ return PTR_ERR(req);
+ }
+
if (num_meta) {
/*
* tee-supplicant support meta parameters -> requsts can be
@@ -252,13 +289,11 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
*/
param->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT |
TEE_IOCTL_PARAM_ATTR_META;
- param->u.value.a = id;
+ param->u.value.a = req->id;
param->u.value.b = 0;
param->u.value.c = 0;
} else {
- mutex_lock(&supp->mutex);
- supp->req_id = id;
- mutex_unlock(&supp->mutex);
+ supp->req_id = req->id;
}
*func = req->func;
@@ -266,6 +301,7 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
memcpy(param + num_meta, req->param,
sizeof(struct tee_param) * req->num_params);
+ mutex_unlock(&supp->mutex);
return 0;
}
@@ -297,12 +333,17 @@ static struct optee_supp_req *supp_pop_req(struct optee_supp *supp,
if (!req)
return ERR_PTR(-ENOENT);
+ /* optee_supp_thrd_req() already returned to optee. */
+ if (IS_ERR(req))
+ goto failed_req;
+
if ((num_params - nm) != req->num_params)
return ERR_PTR(-EINVAL);
+ *num_meta = nm;
+failed_req:
idr_remove(&supp->idr, id);
supp->req_id = -1;
- *num_meta = nm;
return req;
}
@@ -328,10 +369,9 @@ int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
mutex_lock(&supp->mutex);
req = supp_pop_req(supp, num_params, param, &num_meta);
- mutex_unlock(&supp->mutex);
-
if (IS_ERR(req)) {
- /* Something is wrong, let supplicant restart. */
+ mutex_unlock(&supp->mutex);
+ /* Something is wrong, let supplicant handel it. */
return PTR_ERR(req);
}
@@ -355,9 +395,10 @@ int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
}
}
req->ret = ret;
-
+ req->processed = true;
/* Let the requesting thread continue */
complete(&req->c);
+ mutex_unlock(&supp->mutex);
return 0;
}
diff --git a/drivers/tee/qcomtee/core.c b/drivers/tee/qcomtee/core.c
index b1cb50e434f0..60fe3b5776e3 100644
--- a/drivers/tee/qcomtee/core.c
+++ b/drivers/tee/qcomtee/core.c
@@ -306,8 +306,10 @@ int qcomtee_object_user_init(struct qcomtee_object *object,
break;
case QCOMTEE_OBJECT_TYPE_CB:
object->ops = ops;
- if (!object->ops->dispatch)
- return -EINVAL;
+ if (!object->ops->dispatch) {
+ ret = -EINVAL;
+ break;
+ }
/* If failed, "no-name". */
object->name = kvasprintf_const(GFP_KERNEL, fmt, ap);
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index ef9642d72672..1aac50c7c1de 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -530,11 +530,24 @@ static int params_to_user(struct tee_ioctl_param __user *uparams,
return 0;
}
+static void free_params(struct tee_param *params, size_t num_params)
+{
+ size_t n;
+
+ if (!params)
+ return;
+
+ for (n = 0; n < num_params; n++)
+ if (tee_param_is_memref(params + n) && params[n].u.memref.shm)
+ tee_shm_put(params[n].u.memref.shm);
+
+ kfree(params);
+}
+
static int tee_ioctl_open_session(struct tee_context *ctx,
struct tee_ioctl_buf_data __user *ubuf)
{
int rc;
- size_t n;
struct tee_ioctl_buf_data buf;
struct tee_ioctl_open_session_arg __user *uarg;
struct tee_ioctl_open_session_arg arg;
@@ -595,16 +608,7 @@ out:
*/
if (rc && have_session && ctx->teedev->desc->ops->close_session)
ctx->teedev->desc->ops->close_session(ctx, arg.session);
-
- if (params) {
- /* Decrease ref count for all valid shared memory pointers */
- for (n = 0; n < arg.num_params; n++)
- if (tee_param_is_memref(params + n) &&
- params[n].u.memref.shm)
- tee_shm_put(params[n].u.memref.shm);
- kfree(params);
- }
-
+ free_params(params, arg.num_params);
return rc;
}
@@ -612,7 +616,6 @@ static int tee_ioctl_invoke(struct tee_context *ctx,
struct tee_ioctl_buf_data __user *ubuf)
{
int rc;
- size_t n;
struct tee_ioctl_buf_data buf;
struct tee_ioctl_invoke_arg __user *uarg;
struct tee_ioctl_invoke_arg arg;
@@ -657,14 +660,7 @@ static int tee_ioctl_invoke(struct tee_context *ctx,
}
rc = params_to_user(uparams, arg.num_params, params);
out:
- if (params) {
- /* Decrease ref count for all valid shared memory pointers */
- for (n = 0; n < arg.num_params; n++)
- if (tee_param_is_memref(params + n) &&
- params[n].u.memref.shm)
- tee_shm_put(params[n].u.memref.shm);
- kfree(params);
- }
+ free_params(params, arg.num_params);
return rc;
}
@@ -672,7 +668,6 @@ static int tee_ioctl_object_invoke(struct tee_context *ctx,
struct tee_ioctl_buf_data __user *ubuf)
{
int rc;
- size_t n;
struct tee_ioctl_buf_data buf;
struct tee_ioctl_object_invoke_arg __user *uarg;
struct tee_ioctl_object_invoke_arg arg;
@@ -716,14 +711,7 @@ static int tee_ioctl_object_invoke(struct tee_context *ctx,
}
rc = params_to_user(uparams, arg.num_params, params);
out:
- if (params) {
- /* Decrease ref count for all valid shared memory pointers */
- for (n = 0; n < arg.num_params; n++)
- if (tee_param_is_memref(params + n) &&
- params[n].u.memref.shm)
- tee_shm_put(params[n].u.memref.shm);
- kfree(params);
- }
+ free_params(params, arg.num_params);
return rc;
}
@@ -846,9 +834,15 @@ static int tee_ioctl_supp_recv(struct tee_context *ctx,
return -ENOMEM;
rc = params_from_user(ctx, params, num_params, uarg->params);
- if (rc)
- goto out;
+ if (rc) {
+ free_params(params, num_params);
+ return rc;
+ }
+ /*
+ * supp_recv() may consume and replace the supplied parameters, so the
+ * final cleanup cannot use free_params() like the other ioctl paths.
+ */
rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params);
if (rc)
goto out;
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index e9ea9f80cfd9..6742b3579c86 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -435,7 +435,7 @@ register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
num_pages = iov_iter_npages(iter, INT_MAX);
if (!num_pages) {
ret = ERR_PTR(-ENOMEM);
- goto err_ctx_put;
+ goto err_free_shm;
}
shm->pages = kzalloc_objs(*shm->pages, num_pages);
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index bc037db46624..9c0973a7ffc3 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -177,14 +177,14 @@ static int ufs_qcom_ice_init(struct ufs_qcom_host *host)
int i;
ice = devm_of_qcom_ice_get(dev);
- if (ice == ERR_PTR(-EOPNOTSUPP)) {
+ if (IS_ERR(ice)) {
+ if (ice != ERR_PTR(-EOPNOTSUPP))
+ return PTR_ERR(ice);
+
dev_warn(dev, "Disabling inline encryption support\n");
- ice = NULL;
+ return 0;
}
- if (IS_ERR_OR_NULL(ice))
- return PTR_ERR_OR_ZERO(ice);
-
host->ice = ice;
/* Initialize the blk_crypto_profile */
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 27ab7bd844ec..c6240dccbb0f 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1455,6 +1455,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
if (atomic_add_return(bios, &io->pending_bios))
return;
if (z_erofs_in_atomic()) {
+ /* See `sync_decompress` in sysfs-fs-erofs for more details */
+ if (sbi->sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
+ sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
struct kthread_worker *worker;
@@ -1471,9 +1474,6 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
#else
queue_work(z_erofs_workqueue, &io->u.work);
#endif
- /* See `sync_decompress` in sysfs-fs-erofs for more details */
- if (sbi->sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
- sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
return;
}
gfp_flag = memalloc_noio_save();
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index a72db36096ca..e1a02a2c8406 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -716,7 +716,7 @@ static int z_erofs_map_sanity_check(struct inode *inode,
}
if (map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX) {
- if (sbi->available_compr_algs ^ BIT(map->m_algorithmformat)) {
+ if (!(sbi->available_compr_algs & BIT(map->m_algorithmformat))) {
erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
map->m_algorithmformat, EROFS_I(inode)->nid);
return -EFSCORRUPTED;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3134bb17f3e3..d7c399763ad9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -927,7 +927,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
}
if (nfs_write_need_commit(hdr)) {
struct nfs_open_context *ctx =
- hdr->req->wb_lock_context->open_context;
+ req->wb_lock_context->open_context;
/* Reset wb_nio, since the write was successful. */
req->wb_nio = 0;
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 0f5c18520eff..b193dde4810d 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -711,11 +711,16 @@ out:
*/
static int smb2_oplock_break_noti(struct oplock_info *opinfo)
{
- struct ksmbd_conn *conn = opinfo->conn;
+ struct ksmbd_conn *conn;
struct oplock_break_info *br_info;
int ret = 0;
- struct ksmbd_work *work = ksmbd_alloc_work_struct();
+ struct ksmbd_work *work;
+
+ conn = READ_ONCE(opinfo->conn);
+ if (!conn)
+ return 0;
+ work = ksmbd_alloc_work_struct();
if (!work)
return -ENOMEM;
@@ -815,11 +820,15 @@ out:
*/
static int smb2_lease_break_noti(struct oplock_info *opinfo)
{
- struct ksmbd_conn *conn = opinfo->conn;
+ struct ksmbd_conn *conn;
struct ksmbd_work *work;
struct lease_break_info *br_info;
struct lease *lease = opinfo->o_lease;
+ conn = READ_ONCE(opinfo->conn);
+ if (!conn)
+ return 0;
+
work = ksmbd_alloc_work_struct();
if (!work)
return -ENOMEM;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 620bcfbbfd92..3eb3b1711acb 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -7322,6 +7322,17 @@ int smb2_cancel(struct ksmbd_work *work)
le64_to_cpu(hdr->Id.AsyncId))
continue;
+ /*
+ * A cancelled deferred byte-range lock frees its
+ * file_lock and takes the smb2_lock() early-exit that
+ * skips release_async_work(), so the work stays on
+ * conn->async_requests with a live cancel_fn pointing
+ * at the freed file_lock. Re-firing it on a second
+ * SMB2_CANCEL is a use-after-free.
+ */
+ if (iter->state == KSMBD_WORK_CANCELLED)
+ break;
+
ksmbd_debug(SMB,
"smb2 with AsyncId %llu cancelled command = 0x%x\n",
le64_to_cpu(hdr->Id.AsyncId),
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 4d2d33df6231..ba3355a6057a 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -1390,19 +1390,19 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp)
struct ksmbd_lock *smb_lock;
unsigned int old_f_state;
+ write_lock(&global_ft.lock);
if (!fp->is_durable || fp->conn || fp->tcon) {
+ write_unlock(&global_ft.lock);
pr_err("Invalid durable fd [%p:%p]\n", fp->conn, fp->tcon);
return -EBADF;
}
if (has_file_id(fp->volatile_id)) {
+ write_unlock(&global_ft.lock);
pr_err("Still in use durable fd: %llu\n", fp->volatile_id);
return -EBADF;
}
- old_f_state = fp->f_state;
- fp->f_state = FP_NEW;
-
/*
* Initialize fp's connection binding before publishing fp into the
* session's file table. If __open_id() is ordered first, a
@@ -1413,11 +1413,17 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp)
*/
fp->conn = ksmbd_conn_get(conn);
fp->tcon = work->tcon;
+ write_unlock(&global_ft.lock);
+
+ old_f_state = fp->f_state;
+ fp->f_state = FP_NEW;
__open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID);
if (!has_file_id(fp->volatile_id)) {
+ write_lock(&global_ft.lock);
fp->conn = NULL;
fp->tcon = NULL;
+ write_unlock(&global_ft.lock);
ksmbd_conn_put(conn);
fp->f_state = old_f_state;
return -EBADF;
diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c
index bffc4666ce60..c25716fc4fee 100644
--- a/fs/xfs/scrub/cow_repair.c
+++ b/fs/xfs/scrub/cow_repair.c
@@ -300,18 +300,15 @@ xrep_cow_find_bad(
* on the debugging knob, replace everything in the CoW fork.
*/
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
- XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
xc->irec.br_blockcount);
- if (error)
- return error;
- }
out_sa:
xchk_ag_free(sc, &sc->sa);
out_pag:
xfs_perag_put(pag);
- return 0;
+ return error;
}
/*
@@ -385,12 +382,9 @@ xrep_cow_find_bad_rt(
* CoW fork and then scan for staging extents in the refcountbt.
*/
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
- XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
xc->irec.br_blockcount);
- if (error)
- goto out_rtg;
- }
out_sr:
xchk_rtgroup_btcur_free(&sc->sr);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 46e234863644..96af6b62ce39 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -409,6 +409,26 @@ xfs_ioc_ag_geometry(
return 0;
}
+static void
+xfs_rtgroup_report_write_pointer(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo)
+{
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ if (rtg->rtg_open_zone) {
+ rgeo->rg_writepointer = rtg->rtg_open_zone->oz_allocated;
+ } else {
+ xfs_rgblock_t highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
+
+ if (highest_rgbno == NULLRGBLOCK)
+ rgeo->rg_writepointer = 0;
+ else
+ rgeo->rg_writepointer = highest_rgbno + 1;
+ }
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+ rgeo->rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER;
+}
+
STATIC int
xfs_ioc_rtgroup_geometry(
struct xfs_mount *mp,
@@ -416,7 +436,6 @@ xfs_ioc_rtgroup_geometry(
{
struct xfs_rtgroup *rtg;
struct xfs_rtgroup_geometry rgeo;
- xfs_rgblock_t highest_rgbno;
int error;
if (copy_from_user(&rgeo, arg, sizeof(rgeo)))
@@ -433,28 +452,16 @@ xfs_ioc_rtgroup_geometry(
return -EINVAL;
error = xfs_rtgroup_get_geometry(rtg, &rgeo);
- xfs_rtgroup_put(rtg);
if (error)
- return error;
-
- if (xfs_has_zoned(mp)) {
- xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
- if (rtg->rtg_open_zone) {
- rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated;
- } else {
- highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
- if (highest_rgbno == NULLRGBLOCK)
- rgeo.rg_writepointer = 0;
- else
- rgeo.rg_writepointer = highest_rgbno + 1;
- }
- xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
- rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER;
- }
+ goto out_put_rtg;
+ if (xfs_has_zoned(mp))
+ xfs_rtgroup_report_write_pointer(rtg, &rgeo);
if (copy_to_user(arg, &rgeo, sizeof(rgeo)))
- return -EFAULT;
- return 0;
+ error = -EFAULT;
+out_put_rtg:
+ xfs_rtgroup_put(rtg);
+ return error;
}
/*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b24195f570cd..7aa51826b1ca 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1149,9 +1149,12 @@ xfs_mountfs(
* blocks.
*/
error = xfs_fs_reserve_ag_blocks(mp);
- if (error && error == -ENOSPC)
+ if (error) {
+ if (error != -ENOSPC)
+ goto out_rtunmount;
xfs_warn(mp,
- "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
+"ENOSPC reserving per-AG metadata pool, log recovery may fail.");
+ }
error = xfs_log_mount_finish(mp);
xfs_fs_unreserve_ag_blocks(mp);
if (error) {
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 221e55887a2a..d92993367ab6 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -118,7 +118,6 @@ xfs_fs_map_blocks(
struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb;
loff_t limit;
- int bmapi_flags = XFS_BMAPI_ENTIRE;
int nimaps = 1;
uint lock_flags;
int error = 0;
@@ -172,14 +171,18 @@ xfs_fs_map_blocks(
offset_fsb = XFS_B_TO_FSBT(mp, offset);
lock_flags = xfs_ilock_data_map_shared(ip);
+ /* request mappings for the specified range only */
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
- &imap, &nimaps, bmapi_flags);
+ &imap, &nimaps, 0);
+ if (error) {
+ xfs_iunlock(ip, lock_flags);
+ goto out_unlock;
+ }
seq = xfs_iomap_inode_sequence(ip, 0);
ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK);
- if (!error && write &&
- (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) {
+ if (write && (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) {
if (offset + length > XFS_ISIZE(ip))
end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb);
else if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index db23a0f231d6..251dec48f0e3 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -949,16 +949,16 @@ xfs_reflink_end_cow(
* repeatedly cycles the ILOCK to allocate one transaction per remapped
* extent.
*
- * If we're being called by writeback then the pages will still
- * have PageWriteback set, which prevents races with reflink remapping
- * and truncate. Reflink remapping prevents races with writeback by
- * taking the iolock and mmaplock before flushing the pages and
- * remapping, which means there won't be any further writeback or page
- * cache dirtying until the reflink completes.
+ * If we're being called by writeback then the folios will still
+ * have the writeback flag set, which prevents races with reflink
+ * remapping and truncate. Reflink remapping prevents races with
+ * writeback by taking the iolock and mmaplock before flushing
+ * the folios and remapping, which means there won't be any further
+ * writeback or page cache dirtying until the reflink completes.
*
* We should never have two threads issuing writeback for the same file
* region. There are also have post-eof checks in the writeback
- * preparation code so that we don't bother writing out pages that are
+ * preparation code so that we don't bother writing out folios that are
* about to be truncated.
*
* If we're being called as part of directio write completion, the dio
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index c8a1d5c0332c..f03211e4354a 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -400,7 +400,7 @@ retry:
/*
* If the inode was already deleted, skip over it.
*/
- if (error == -ENOENT) {
+ if (error == -ENOENT || error == -EINVAL) {
iter->rec_idx++;
goto retry;
}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5957bc25efa8..2abaf99321e9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared);
void folio_putback_hugetlb(struct folio *folio);
void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
void hugetlb_fix_reserve_counts(struct inode *inode);
@@ -421,12 +419,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb,
return 0;
}
-static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- return 0;
-}
-
static inline void folio_putback_hugetlb(struct folio *folio)
{
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 06bbe9eba636..fc2acedf0b76 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4975,8 +4975,6 @@ extern int soft_offline_page(unsigned long pfn, int flags);
*/
extern const struct attribute_group memory_failure_attr_group;
extern void memory_failure_queue(unsigned long pfn, int flags);
-extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
#else
@@ -4984,12 +4982,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags)
{
}
-static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- return 0;
-}
-
static inline void num_poisoned_pages_inc(unsigned long pfn)
{
}
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 763eea4d80d8..2d2b9f8cdda4 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -20,6 +20,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/tracepoint-defs.h>
#include <linux/static_call.h>
+#include <linux/cfi.h>
struct module;
struct tracepoint;
@@ -389,6 +390,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
void __probestub_##_name(void *__data, proto) \
{ \
} \
+ /* \
+ * Annotate the probestub 'CFI_NOSEAL' to stop objtool from \
+ * requesting the kernel remove the ENDBR, because the only \
+ * references to the function are in the __tracepoint section, \
+ * that objtool doesn't scan. \
+ */ \
+ CFI_NOSEAL(__probestub_##_name); \
DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); \
DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args))
diff --git a/include/net/act_api.h b/include/net/act_api.h
index d11b79107930..fd2967ee08f7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -45,6 +45,7 @@ struct tc_action {
struct tc_cookie __rcu *user_cookie;
struct tcf_chain __rcu *goto_chain;
u32 tcfa_flags;
+ struct rcu_head tcfa_rcu;
u8 hw_stats;
u8 used_hw_stats;
bool used_hw_stats_valid;
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 5172afee5494..e0a1f2293679 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -33,6 +33,7 @@
/* L2CAP defaults */
#define L2CAP_DEFAULT_MTU 672
#define L2CAP_DEFAULT_MIN_MTU 48
+#define L2CAP_SIG_MTU 48 /* BR/EDR signaling MTU */
#define L2CAP_DEFAULT_FLUSH_TO 0xFFFF
#define L2CAP_EFS_DEFAULT_FLUSH_TO 0xFFFFFFFF
#define L2CAP_DEFAULT_TX_WINDOW 63
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a02e569813d2..e517eaaa177b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1824,8 +1824,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
int ip_vs_bind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *scheduler);
-void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
- struct ip_vs_scheduler *sched);
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc);
struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
struct ip_vs_conn *
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index f7263fe2a2e4..ee70f597a4de 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -27,7 +27,9 @@ struct mptcp_ext {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
- u8 use_map:1,
+
+ struct_group(flags,
+ u8 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
@@ -35,9 +37,10 @@ struct mptcp_ext {
mpc_map:1,
frozen:1,
reset_transient:1;
- u8 reset_reason:4,
+ u8 reset_reason:4,
csum_reqd:1,
infinite_map:1;
+ ); /* end of flags group */
};
#define MPTCPOPT_HMAC_LEN 20
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index f58ee15cd858..cb7b82f2cbc7 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -15,7 +15,6 @@ struct tcf_pedit_parms {
struct tc_pedit_key *tcfp_keys;
struct tcf_pedit_key_ex *tcfp_keys_ex;
int action;
- u32 tcfp_off_max_hint;
unsigned char tcfp_nkeys;
unsigned char tcfp_flags;
struct rcu_head rcu;
diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
index cab5cadca8ef..5203977ed35d 100644
--- a/include/uapi/linux/tee.h
+++ b/include/uapi/linux/tee.h
@@ -470,6 +470,7 @@ struct tee_ioctl_object_invoke_arg {
__u32 op;
__u32 ret;
__u32 num_params;
+ __u32 :32;
/* num_params tells the actual number of element in params */
struct tee_ioctl_param params[];
};
diff --git a/io_uring/net.c b/io_uring/net.c
index 8df15b639358..ee848eb65ec9 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -842,7 +842,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
/* bits to clear in old and inherit in new cflags on bundle retry */
-#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
+#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE|\
+ IORING_CQE_F_BUF_MORE)
/*
* Finishes io_recv and io_recvmsg.
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 5c33ab20cc20..c9e14fda3d6f 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1811,9 +1811,9 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
* Compute add/delete mask to/from effective_cpus
*
* For valid partition:
- * addmask = exclusive_cpus & ~newmask
+ * addmask = effective_xcpus & ~newmask
* & parent->effective_xcpus
- * delmask = newmask & ~exclusive_cpus
+ * delmask = newmask & ~effective_xcpus
* & parent->effective_xcpus
*
* For invalid partition:
@@ -1825,11 +1825,11 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
deleting = cpumask_and(tmp->delmask,
newmask, parent->effective_xcpus);
} else {
- cpumask_andnot(tmp->addmask, xcpus, newmask);
+ cpumask_andnot(tmp->addmask, cs->effective_xcpus, newmask);
adding = cpumask_and(tmp->addmask, tmp->addmask,
parent->effective_xcpus);
- cpumask_andnot(tmp->delmask, newmask, xcpus);
+ cpumask_andnot(tmp->delmask, newmask, cs->effective_xcpus);
deleting = cpumask_and(tmp->delmask, tmp->delmask,
parent->effective_xcpus);
}
@@ -1868,7 +1868,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
part_error = PERR_NOCPUS;
deleting = false;
adding = cpumask_and(tmp->addmask,
- xcpus, parent->effective_xcpus);
+ cs->effective_xcpus, parent->effective_xcpus);
}
} else {
/*
@@ -1890,7 +1890,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
part_error = PERR_NOCPUS;
if (is_partition_valid(cs))
adding = cpumask_and(tmp->addmask,
- xcpus, parent->effective_xcpus);
+ cs->effective_xcpus,
+ parent->effective_xcpus);
} else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) &&
cpumask_subset(xcpus, parent->effective_xcpus)) {
struct cgroup_subsys_state *css;
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 65631e577ee9..5d2d19473a82 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4402,11 +4402,13 @@ void scx_cgroup_move_task(struct task_struct *p)
return;
/*
- * @p must have ops.cgroup_prep_move() called on it and thus
- * cgrp_moving_from set.
+ * scx_cgroup_can_attach() sets cgrp_moving_from only when the task's
+ * cgroup changes. Migration keys off css rather than cgroup identity,
+ * so it can hand an unchanged-cgroup task here with cgrp_moving_from
+ * NULL. Nothing to report to the BPF scheduler then, so skip it and
+ * keep prep_move and move paired.
*/
- if (SCX_HAS_OP(sch, cgroup_move) &&
- !WARN_ON_ONCE(!p->scx.cgrp_moving_from))
+ if (SCX_HAS_OP(sch, cgroup_move) && p->scx.cgrp_moving_from)
SCX_CALL_OP_TASK(sch, cgroup_move, task_rq(p),
p, p->scx.cgrp_moving_from,
tg_cgrp(task_group(p)));
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index e0d3a0da26af..44c22d4e7881 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -962,8 +962,6 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t,
code->op = FETCH_OP_COMM;
return 0;
}
- /* backward compatibility */
- ctx->offset = 0;
goto inval;
}
diff --git a/mm/cma.c b/mm/cma.c
index c7ca567f4c5c..a13ce4999b39 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -188,10 +188,13 @@ cleanup:
/* Expose all pages to the buddy, they are useless for CMA. */
if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) {
- for (r = 0; r < allocrange; r++) {
+ for (r = 0; r < cma->nranges; r++) {
+ unsigned long start_pfn;
+
cmr = &cma->ranges[r];
+ start_pfn = r <= allocrange ? early_pfn[r] : cmr->early_pfn;
end_pfn = cmr->base_pfn + cmr->count;
- for (pfn = early_pfn[r]; pfn < end_pfn; pfn++)
+ for (pfn = start_pfn; pfn < end_pfn; pfn++)
free_reserved_page(pfn_to_page(pfn));
}
}
diff --git a/mm/cma_debug.c b/mm/cma_debug.c
index 5ae38f5abbcc..523ba4a0f9f7 100644
--- a/mm/cma_debug.c
+++ b/mm/cma_debug.c
@@ -205,7 +205,8 @@ static int __init cma_debugfs_init(void)
cma_debugfs_root = debugfs_create_dir("cma", NULL);
for (i = 0; i < cma_area_count; i++)
- cma_debugfs_add_one(&cma_areas[i], cma_debugfs_root);
+ if (test_bit(CMA_ACTIVATED, &cma_areas[i].flags))
+ cma_debugfs_add_one(&cma_areas[i], cma_debugfs_root);
return 0;
}
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index 8c6d613425c1..c3e4c871b0bb 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -32,9 +32,9 @@ struct folio *damon_get_folio(unsigned long pfn)
return NULL;
folio = page_folio(page);
- if (!folio_test_lru(folio) || !folio_try_get(folio))
+ if (!folio_try_get(folio))
return NULL;
- if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) {
+ if (unlikely(page_folio(page) != folio) || !folio_test_lru(folio)) {
folio_put(folio);
folio = NULL;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 970e077019b7..653f2dc03403 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3015,9 +3015,9 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
if (!folio_test_referenced(folio) && pud_young(old_pud))
folio_set_referenced(folio);
folio_remove_rmap_pud(folio, page, vma);
- folio_put(folio);
add_mm_counter(vma->vm_mm, mm_counter_file(folio),
-HPAGE_PUD_NR);
+ folio_put(folio);
}
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
@@ -3133,7 +3133,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (!folio_test_referenced(folio) && pmd_young(old_pmd))
folio_set_referenced(folio);
folio_remove_rmap_pmd(folio, page, vma);
+ add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);
folio_put(folio);
+ return;
}
add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);
return;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4b80b167cc9c..c921287489de 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -118,6 +118,9 @@ static int hugetlb_acct_memory(struct hstate *h, long delta);
static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
+static int __huge_pmd_unshare(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ bool check_locks);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
@@ -4974,6 +4977,7 @@ again:
addr, dst_vma);
folio_put(pte_folio);
if (ret) {
+ restore_reserve_on_error(h, dst_vma, addr, new_folio);
folio_put(new_folio);
break;
}
@@ -6270,6 +6274,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
folio_put(*foliop);
*foliop = NULL;
if (ret) {
+ restore_reserve_on_error(h, dst_vma, dst_addr, folio);
folio_put(folio);
goto out;
}
@@ -6891,6 +6896,31 @@ out:
return pte;
}
+static int __huge_pmd_unshare(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ bool check_locks)
+{
+ unsigned long sz = huge_page_size(hstate_vma(vma));
+ struct mm_struct *mm = vma->vm_mm;
+ pgd_t *pgd = pgd_offset(mm, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
+
+ if (sz != PMD_SIZE)
+ return 0;
+ if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep)))
+ return 0;
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ if (check_locks)
+ hugetlb_vma_assert_locked(vma);
+ pud_clear(pud);
+
+ tlb_unshare_pmd_ptdesc(tlb, virt_to_ptdesc(ptep), addr);
+
+ mm_dec_nr_pmds(mm);
+ return 1;
+}
+
/**
* huge_pmd_unshare - Unmap a pmd table if it is shared by multiple users
* @tlb: the current mmu_gather.
@@ -6910,24 +6940,7 @@ out:
int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- unsigned long sz = huge_page_size(hstate_vma(vma));
- struct mm_struct *mm = vma->vm_mm;
- pgd_t *pgd = pgd_offset(mm, addr);
- p4d_t *p4d = p4d_offset(pgd, addr);
- pud_t *pud = pud_offset(p4d, addr);
-
- if (sz != PMD_SIZE)
- return 0;
- if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep)))
- return 0;
- i_mmap_assert_write_locked(vma->vm_file->f_mapping);
- hugetlb_vma_assert_locked(vma);
- pud_clear(pud);
-
- tlb_unshare_pmd_ptdesc(tlb, virt_to_ptdesc(ptep), addr);
-
- mm_dec_nr_pmds(mm);
- return 1;
+ return __huge_pmd_unshare(tlb, vma, addr, ptep, /*check_locks=*/true);
}
/*
@@ -6961,6 +6974,13 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
return NULL;
}
+static int __huge_pmd_unshare(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ bool check_locks)
+{
+ return 0;
+}
+
int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
@@ -7141,17 +7161,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison
return ret;
}
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- int ret;
-
- spin_lock_irq(&hugetlb_lock);
- ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
- spin_unlock_irq(&hugetlb_lock);
- return ret;
-}
-
/**
* folio_putback_hugetlb - unisolate a hugetlb folio
* @folio: the isolated hugetlb folio
@@ -7269,7 +7278,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- huge_pmd_unshare(&tlb, vma, address, ptep);
+ __huge_pmd_unshare(&tlb, vma, address, ptep, take_locks);
spin_unlock(ptl);
}
huge_pmd_unshare_flush(&tlb, vma);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 4a077d231d3a..133b46dfb09f 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -207,6 +207,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
/* Remapping the head page requires r/w */
if (unlikely(walk->nr_walked == 0 && walk->vmemmap_head)) {
+ VM_WARN_ON_ONCE(!PageHead((const struct page *)addr));
+
list_del(&walk->vmemmap_head->lru);
/*
@@ -218,6 +220,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
entry = mk_pte(walk->vmemmap_head, PAGE_KERNEL);
} else {
+ VM_WARN_ON_ONCE(!PageTail((const struct page *)addr));
+
/*
* Remap the tail pages as read-only to catch illegal write
* operation to the tail pages.
@@ -232,33 +236,28 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
struct vmemmap_remap_walk *walk)
{
- struct page *page;
- struct page *from, *to;
-
- page = list_first_entry(walk->vmemmap_pages, struct page, lru);
- list_del(&page->lru);
+ struct page *src = pte_page(ptep_get(pte)), *dst;
/*
- * Initialize tail pages in the newly allocated vmemmap page.
- *
- * There is folio-scope metadata that is encoded in the first few
- * tail pages.
- *
- * Use the value last tail page in the page with the head page
- * to initialize the rest of tail pages.
+ * When rolling back vmemmap_remap_free(), keep the copied head page
+ * mapping and restore only PTEs currently pointing at the shared tail
+ * page.
*/
- from = compound_head((struct page *)addr) +
- PAGE_SIZE / sizeof(struct page) - 1;
- to = page_to_virt(page);
- for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++, to++)
- *to = *from;
+ if (walk->vmemmap_tail && walk->vmemmap_tail != src)
+ return;
+
+ VM_WARN_ON_ONCE(PageHead((const struct page *)addr));
+
+ dst = list_first_entry(walk->vmemmap_pages, struct page, lru);
+ list_del(&dst->lru);
+ copy_page(page_to_virt(dst), page_to_virt(src));
/*
* Makes sure that preceding stores to the page contents become visible
* before the set_pte_at() write.
*/
smp_wmb();
- set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL));
+ set_pte_at(&init_mm, addr, pte, mk_pte(dst, PAGE_KERNEL));
}
/**
@@ -324,6 +323,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
*/
walk = (struct vmemmap_remap_walk) {
.remap_pte = vmemmap_restore_pte,
+ .vmemmap_tail = vmemmap_tail,
.vmemmap_pages = vmemmap_pages,
.flags = 0,
};
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 177732fef010..1a4fd2504bcd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2011,6 +2011,7 @@ struct memcg_stock_pcp {
struct work_struct work;
unsigned long flags;
+ uint8_t drain_idx;
};
static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) = {
@@ -2194,7 +2195,9 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
if (!success) {
i = empty_slot;
if (i == -1) {
- i = get_random_u32_below(NR_MEMCG_STOCK);
+ i = stock->drain_idx++;
+ if (stock->drain_idx == NR_MEMCG_STOCK)
+ stock->drain_idx = 0;
drain_stock(stock, i);
}
css_get(&memcg->css);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ee42d4361309..d47aef256a32 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1966,20 +1966,19 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
folio_free_raw_hwp(folio, true);
}
-/*
- * Called from hugetlb code with hugetlb_lock held.
- */
-int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+static int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
- struct folio *folio = page_folio(page);
+ struct folio *folio;
bool count_increased = false;
int ret, rc;
+ spin_lock_irq(&hugetlb_lock);
+ folio = page_folio(page);
if (!folio_test_hugetlb(folio)) {
ret = MF_HUGETLB_NON_HUGEPAGE;
- goto out;
+ goto out_unlock;
} else if (flags & MF_COUNT_INCREASED) {
ret = MF_HUGETLB_IN_USED;
count_increased = true;
@@ -1995,13 +1994,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
} else {
ret = MF_HUGETLB_RETRY;
if (!(flags & MF_NO_RETRY))
- goto out;
+ goto out_unlock;
}
rc = hugetlb_update_hwpoison(folio, page);
if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) {
ret = rc;
- goto out;
+ goto out_unlock;
}
/*
@@ -2013,8 +2012,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
*migratable_cleared = true;
}
+ spin_unlock_irq(&hugetlb_lock);
return ret;
-out:
+out_unlock:
+ spin_unlock_irq(&hugetlb_lock);
if (count_increased)
folio_put(folio);
return ret;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 180bad42fc79..80cc8be5725f 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -14,6 +14,8 @@
#include <linux/userfaultfd_k.h>
#include <linux/mmu_notifier.h>
#include <linux/hugetlb.h>
+#include <linux/file.h>
+#include <linux/cleanup.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include "internal.h"
@@ -66,7 +68,7 @@ static const struct vm_uffd_ops *vma_uffd_ops(struct vm_area_struct *vma)
{
if (vma_is_anonymous(vma))
return &anon_uffd_ops;
- return vma->vm_ops ? vma->vm_ops->uffd_ops : NULL;
+ return vma->vm_ops->uffd_ops;
}
static __always_inline
@@ -443,16 +445,80 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr)
return ret;
}
-static int mfill_copy_folio_retry(struct mfill_state *state,
+#define MFILL_RETRY_STATE_VMA_FLAGS \
+ append_vma_flags(__VMA_UFFD_FLAGS, VMA_SHARED_BIT)
+
+/*
+ * VMA state saved before dropping the locks in mfill_copy_folio_retry().
+ * Used to detect VMA replacement or incompatible changes after reacquiring the
+ * locks.
+ */
+struct mfill_retry_state {
+ const struct vm_uffd_ops *ops;
+ struct file *file;
+ vma_flags_t flags;
+ pgoff_t pgoff;
+};
+
+static void mfill_retry_state_save(struct mfill_retry_state *s,
+ struct vm_area_struct *vma)
+{
+ s->flags = vma_flags_and_mask(&vma->flags, MFILL_RETRY_STATE_VMA_FLAGS);
+ s->ops = vma_uffd_ops(vma);
+ s->pgoff = vma->vm_pgoff;
+
+ if (vma->vm_file)
+ s->file = get_file(vma->vm_file);
+}
+
+static bool mfill_retry_state_changed(struct mfill_retry_state *state,
+ struct vm_area_struct *vma)
+{
+ vma_flags_t flags = vma_flags_and_mask(&vma->flags,
+ MFILL_RETRY_STATE_VMA_FLAGS);
+
+ /* Have any UFFD flags (missing, WP, minor) changed? */
+ if (!vma_flags_same_pair(&state->flags, &flags))
+ return true;
+
+ /* VMA type or effective uffd_ops changed while the lock was dropped */
+ if (state->ops != vma_uffd_ops(vma))
+ return true;
+
+ /* VMA was anonymous before; changed only if it no longer is */
+ if (!state->file)
+ return !vma_is_anonymous(vma);
+
+ /* VMA was file backed, but file, inode or offset has changed */
+ if (!vma->vm_file || vma->vm_file->f_inode != state->file->f_inode ||
+ state->file != vma->vm_file || vma->vm_pgoff != state->pgoff)
+ return true;
+
+ return false;
+}
+
+static void mfill_retry_state_put(struct mfill_retry_state *s)
+{
+ if (s->file)
+ fput(s->file);
+}
+
+DEFINE_FREE(retry_put, struct mfill_retry_state *,
+ if (_T) mfill_retry_state_put(_T));
+
+static int mfill_copy_folio_retry(struct mfill_state *mfill_state,
struct folio *folio)
{
- const struct vm_uffd_ops *orig_ops = vma_uffd_ops(state->vma);
- unsigned long src_addr = state->src_addr;
+ struct mfill_retry_state retry_state = { 0 };
+ struct mfill_retry_state *for_free __free(retry_put) = &retry_state;
+ unsigned long src_addr = mfill_state->src_addr;
void *kaddr;
int err;
+ mfill_retry_state_save(&retry_state, mfill_state->vma);
+
/* retry copying with mm_lock dropped */
- mfill_put_vma(state);
+ mfill_put_vma(mfill_state);
kaddr = kmap_local_folio(folio, 0);
err = copy_from_user(kaddr, (const void __user *) src_addr, PAGE_SIZE);
@@ -463,19 +529,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state,
flush_dcache_folio(folio);
/* reget VMA and PMD, they could change underneath us */
- err = mfill_get_vma(state);
+ err = mfill_get_vma(mfill_state);
if (err)
return err;
- /*
- * The VMA type may have changed while the lock was dropped
- * (e.g. replaced with a hugetlb mapping), making the caller's
- * ops pointer stale.
- */
- if (vma_uffd_ops(state->vma) != orig_ops)
+ if (mfill_retry_state_changed(&retry_state, mfill_state->vma))
return -EAGAIN;
- err = mfill_establish_pmd(state);
+ err = mfill_establish_pmd(mfill_state);
if (err)
return err;
@@ -491,6 +552,11 @@ static int __mfill_atomic_pte(struct mfill_state *state,
struct folio *folio;
int ret;
+ if (!ops) {
+ VM_WARN_ONCE(1, "UFFDIO_COPY for unsupported VMA");
+ return -EOPNOTSUPP;
+ }
+
folio = ops->alloc_folio(state->vma, state->dst_addr);
if (!folio)
return -ENOMEM;
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index e116d308a8df..37eaff3f7b69 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -1086,12 +1086,12 @@ static u8 lowpan_iphc_mcast_ctx_addr_compress(u8 **hc_ptr,
const struct lowpan_iphc_ctx *ctx,
const struct in6_addr *ipaddr)
{
- u8 data[6];
+ u8 data[6] = {};
/* flags/scope, reserved (RIID) */
memcpy(data, &ipaddr->s6_addr[1], 2);
/* group ID */
- memcpy(&data[1], &ipaddr->s6_addr[11], 4);
+ memcpy(&data[2], &ipaddr->s6_addr[12], 4);
lowpan_push_hc_data(hc_ptr, data, 6);
return LOWPAN_IPHC_DAM_00;
diff --git a/net/802/garp.c b/net/802/garp.c
index 6f563b6797d9..c7a39f298ad6 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -453,7 +453,7 @@ static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb,
if (!pskb_may_pull(skb, ga->len))
return -1;
skb_pull(skb, ga->len);
- dlen = sizeof(*ga) - ga->len;
+ dlen = ga->len - sizeof(*ga);
if (attrtype > app->app->maxattr)
return 0;
diff --git a/net/802/mrp.c b/net/802/mrp.c
index ff0e80574e6b..160a3b14569c 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -703,6 +703,12 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) &
MRP_VECATTR_HDR_LEN_MASK);
+ /* If valen is 0, only a LeaveAllEvent is present; FirstValue and
+ * Vector fields are absent per IEEE 802.1ak.
+ */
+ if (valen == 0)
+ return 0;
+
/* The VectorAttribute structure in a PDU carries event information
* about one or more attributes having consecutive values. Only the
* value for the first attribute is contained in the structure. So
@@ -753,6 +759,9 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
vaevents %= __MRP_VECATTR_EVENT_MAX;
vaevent = vaevents;
mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+ valen--;
+ mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+ mrp_cb(skb)->mh->attrlen);
}
return 0;
}
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 30493ea3c010..078fb7a6efa5 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -393,7 +393,7 @@ static void aarp_purge(void)
*/
static struct aarp_entry *aarp_alloc(void)
{
- struct aarp_entry *a = kmalloc_obj(*a, GFP_ATOMIC);
+ struct aarp_entry *a = kzalloc_obj(*a, GFP_ATOMIC);
if (!a)
return NULL;
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 0de5df690bd0..5c5f53ff30e8 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -206,14 +206,11 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len)
return 0;
}
-static int bnep_rx_control(struct bnep_session *s, void *data, int len)
+static int bnep_rx_control_cmd(struct bnep_session *s, u8 cmd, void *data,
+ int len)
{
- u8 cmd = *(u8 *)data;
int err = 0;
- data++;
- len--;
-
switch (cmd) {
case BNEP_CMD_NOT_UNDERSTOOD:
case BNEP_SETUP_CONN_RSP:
@@ -254,6 +251,14 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len)
return err;
}
+static int bnep_rx_control(struct bnep_session *s, void *data, int len)
+{
+ if (len < 1)
+ return -EILSEQ;
+
+ return bnep_rx_control_cmd(s, *(u8 *)data, data + 1, len - 1);
+}
+
static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb)
{
struct bnep_ext_hdr *h;
@@ -299,19 +304,26 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
{
struct net_device *dev = s->dev;
struct sk_buff *nskb;
+ u8 *data;
u8 type, ctrl_type;
dev->stats.rx_bytes += skb->len;
- type = *(u8 *) skb->data;
- skb_pull(skb, 1);
- ctrl_type = *(u8 *)skb->data;
+ data = skb_pull_data(skb, sizeof(type));
+ if (!data)
+ goto badframe;
+ type = *data;
if ((type & BNEP_TYPE_MASK) >= sizeof(__bnep_rx_hlen))
goto badframe;
if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) {
- if (bnep_rx_control(s, skb->data, skb->len) < 0) {
+ data = skb_pull_data(skb, sizeof(ctrl_type));
+ if (!data)
+ goto badframe;
+ ctrl_type = *data;
+
+ if (bnep_rx_control_cmd(s, ctrl_type, skb->data, skb->len) < 0) {
dev->stats.tx_errors++;
kfree_skb(skb);
return 0;
@@ -324,24 +336,27 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
/* Verify and pull ctrl message since it's already processed */
switch (ctrl_type) {
- case BNEP_SETUP_CONN_REQ:
- /* Pull: ctrl type (1 b), len (1 b), data (len bytes) */
- if (!skb_pull(skb, 2 + *(u8 *)(skb->data + 1) * 2))
+ case BNEP_SETUP_CONN_REQ: {
+ u8 uuid_size;
+
+ /* Pull uuid_size and the dst/src service UUIDs. */
+ data = skb_pull_data(skb, sizeof(uuid_size));
+ if (!data)
+ goto badframe;
+ uuid_size = *data;
+ if (!skb_pull(skb, uuid_size + uuid_size))
goto badframe;
break;
+ }
case BNEP_FILTER_MULTI_ADDR_SET:
- case BNEP_FILTER_NET_TYPE_SET: {
- u8 *hdr;
-
- /* Pull ctrl type (1 b) + len (2 b) */
- hdr = skb_pull_data(skb, 3);
- if (!hdr)
+ case BNEP_FILTER_NET_TYPE_SET:
+ /* Pull: len (2 b), data (len bytes) */
+ data = skb_pull_data(skb, sizeof(u16));
+ if (!data)
goto badframe;
- /* Pull data (len bytes); length is big-endian */
- if (!skb_pull(skb, get_unaligned_be16(&hdr[1])))
+ if (!skb_pull(skb, get_unaligned_be16(data)))
goto badframe;
break;
- }
default:
kfree_skb(skb);
return 0;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index aeccd8084cba..df23245d6ccd 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1725,6 +1725,11 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
/* Generate Broadcast ID */
get_random_bytes(bid, sizeof(bid));
len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
+ if (adv->adv_data_len > sizeof(ad) - len) {
+ bt_dev_err(hdev, "No room for Broadcast Announcement");
+ return -EINVAL;
+ }
+
memcpy(ad + len, adv->adv_data, adv->adv_data_len);
hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len,
ad, 0, NULL);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 041ce9adc378..8957ce7c21b7 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -83,10 +83,12 @@ static void bt_host_release(struct device *dev)
{
struct hci_dev *hdev = to_hci_dev(dev);
- if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
hci_release_dev(hdev);
- else
+ } else {
+ cleanup_srcu_struct(&hdev->srcu);
kfree(hdev);
+ }
module_put(THIS_MODULE);
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 876649556d3c..3abd8111dda8 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -337,12 +337,20 @@ static int iso_connect_bis(struct sock *sk)
struct iso_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type, bc_sid;
int err;
- BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid);
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ bc_sid = iso_pi(sk)->bc_sid;
+ release_sock(sk);
+
+ BT_DBG("%pMR (SID 0x%2.2x)", &src, bc_sid);
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return -EHOSTUNREACH;
@@ -430,12 +438,19 @@ static int iso_connect_cis(struct sock *sk)
struct iso_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type;
int err;
- BT_DBG("%pMR -> %pMR", &iso_pi(sk)->src, &iso_pi(sk)->dst);
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ release_sock(sk);
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ BT_DBG("%pMR -> %pMR", &src, &dst);
+
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return -EHOSTUNREACH;
@@ -1082,7 +1097,7 @@ static int iso_sock_rebind_bc(struct sock *sk, struct sockaddr_iso *sa,
* ordering.
*/
release_sock(sk);
- hci_dev_lock(bis->hdev);
+ hci_dev_lock(hdev);
lock_sock(sk);
if (!iso_pi(sk)->conn || iso_pi(sk)->conn->hcon != bis) {
@@ -1212,18 +1227,25 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr,
static int iso_listen_bis(struct sock *sk)
{
- struct hci_dev *hdev;
- int err = 0;
struct iso_conn *conn;
struct hci_conn *hcon;
+ struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type, bc_sid;
+ int err = 0;
+
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ bc_sid = iso_pi(sk)->bc_sid;
+ release_sock(sk);
- BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src,
- &iso_pi(sk)->dst, iso_pi(sk)->bc_sid);
+ BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &src, &dst, bc_sid);
write_lock(&iso_sk_list.lock);
- if (__iso_get_sock_listen_by_sid(&iso_pi(sk)->src, &iso_pi(sk)->dst,
- iso_pi(sk)->bc_sid))
+ if (__iso_get_sock_listen_by_sid(&src, &dst, bc_sid))
err = -EADDRINUSE;
write_unlock(&iso_sk_list.lock);
@@ -1231,8 +1253,7 @@ static int iso_listen_bis(struct sock *sk)
if (err)
return err;
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return -EHOSTUNREACH;
@@ -1568,9 +1589,16 @@ static void iso_conn_big_sync(struct sock *sk)
{
int err;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ u8 src_type;
- hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
- iso_pi(sk)->src_type);
+ lock_sock(sk);
+ bacpy(&src, &iso_pi(sk)->src);
+ bacpy(&dst, &iso_pi(sk)->dst);
+ src_type = iso_pi(sk)->src_type;
+ release_sock(sk);
+
+ hdev = hci_get_route(&dst, &src, src_type);
if (!hdev)
return;
@@ -1595,6 +1623,7 @@ static void iso_conn_big_sync(struct sock *sk)
release_sock(sk);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
}
static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 45b175399e8d..c4ccfbda9d78 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5643,6 +5643,15 @@ static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident)
l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
}
+static inline void l2cap_sig_send_mtu_rej(struct l2cap_conn *conn, u8 ident)
+{
+ struct l2cap_cmd_rej_mtu rej;
+
+ rej.reason = cpu_to_le16(L2CAP_REJ_MTU_EXCEEDED);
+ rej.max_mtu = cpu_to_le16(L2CAP_SIG_MTU);
+ l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
+}
+
static inline void l2cap_sig_channel(struct l2cap_conn *conn,
struct sk_buff *skb)
{
@@ -5655,6 +5664,43 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn,
if (hcon->type != ACL_LINK)
goto drop;
+ /*
+ * Bluetooth Core v5.4, Vol 3, Part A, Section 4: the BR/EDR
+ * signaling channel has a fixed signaling MTU (MTUsig) whose
+ * minimum and default is 48 octets. Section 4.1 says that on
+ * an MTUExceeded command reject the identifier "shall match
+ * the first request command in the L2CAP packet" and that
+ * packets containing only response commands "shall be
+ * silently discarded".
+ *
+ * Linux intentionally deviates from that prescription:
+ *
+ * 1. Silently discarding desynchronizes the peer. The
+ * remote stack never learns its responses were dropped,
+ * so any state machine waiting on a paired response
+ * stalls until its own timer fires.
+ *
+ * 2. Locating "the first request command" requires walking
+ * command headers past MTUsig, i.e. processing bytes
+ * from a packet we have already decided is too large to
+ * process.
+ *
+ * Reject every over-MTUsig signaling packet with one
+ * L2CAP_REJ_MTU_EXCEEDED command reject. The reject's
+ * reason field is what tells the peer that the whole packet
+ * was discarded; the identifier value is informational, so
+ * we use the identifier from the first command header, a
+ * single fixed-offset byte read.
+ */
+ if (skb->len > L2CAP_SIG_MTU) {
+ u8 ident = skb->data[1];
+
+ BT_DBG("signaling packet exceeds MTU: %u > %u",
+ skb->len, L2CAP_SIG_MTU);
+ l2cap_sig_send_mtu_rej(conn, ident);
+ goto drop;
+ }
+
while (skb->len >= L2CAP_CMD_HDR_SIZE) {
u16 len;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index de5bd6b637b2..f4aa814a0397 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -8638,6 +8638,12 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
if (!cur_len)
continue;
+ /* If the current field length would exceed the total data
+ * length, then it's invalid.
+ */
+ if (i + cur_len >= len)
+ return false;
+
if (data[i + 1] == EIR_FLAGS &&
(!is_adv_data || flags_managed(adv_flags)))
return false;
@@ -8654,12 +8660,6 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
if (data[i + 1] == EIR_APPEARANCE &&
appearance_managed(adv_flags))
return false;
-
- /* If the current field length would exceed the total data
- * length, then it's invalid.
- */
- if (i + cur_len >= len)
- return false;
}
return true;
@@ -9114,8 +9114,9 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data,
BT_DBG("%s", hdev->name);
- expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len);
- if (expected_len != data_len)
+ expected_len = struct_size(cp, data, cp->adv_data_len +
+ cp->scan_rsp_len);
+ if (expected_len > data_len)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA,
MGMT_STATUS_INVALID_PARAMS);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index d11bd5337d57..364b9381c2dc 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1431,10 +1431,15 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn)
static int rfcomm_recv_pn(struct rfcomm_session *s, int cr, struct sk_buff *skb)
{
- struct rfcomm_pn *pn = (void *) skb->data;
+ struct rfcomm_pn *pn;
struct rfcomm_dlc *d;
- u8 dlci = pn->dlci;
+ u8 dlci;
+
+ pn = skb_pull_data(skb, sizeof(*pn));
+ if (!pn)
+ return -EILSEQ;
+ dlci = pn->dlci;
BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
if (!dlci)
@@ -1483,8 +1488,8 @@ static int rfcomm_recv_pn(struct rfcomm_session *s, int cr, struct sk_buff *skb)
static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_buff *skb)
{
- struct rfcomm_rpn *rpn = (void *) skb->data;
- u8 dlci = __get_dlci(rpn->dlci);
+ struct rfcomm_rpn *rpn;
+ u8 dlci;
u8 bit_rate = 0;
u8 data_bits = 0;
@@ -1495,15 +1500,16 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
u8 xoff_char = 0;
u16 rpn_mask = RFCOMM_RPN_PM_ALL;
- BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x",
- dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
- rpn->xon_char, rpn->xoff_char, rpn->param_mask);
+ if (len == 1) {
+ rpn = skb_pull_data(skb, 1);
+ if (!rpn)
+ return -EILSEQ;
- if (!cr)
- return 0;
+ dlci = __get_dlci(rpn->dlci);
+
+ if (!cr)
+ return 0;
- if (len == 1) {
- /* This is a request, return default (according to ETSI TS 07.10) settings */
bit_rate = RFCOMM_RPN_BR_9600;
data_bits = RFCOMM_RPN_DATA_8;
stop_bits = RFCOMM_RPN_STOP_1;
@@ -1514,6 +1520,19 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
goto rpn_out;
}
+ rpn = skb_pull_data(skb, sizeof(*rpn));
+ if (!rpn)
+ return -EILSEQ;
+
+ dlci = __get_dlci(rpn->dlci);
+
+ BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x",
+ dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
+ rpn->xon_char, rpn->xoff_char, rpn->param_mask);
+
+ if (!cr)
+ return 0;
+
/* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit,
* no parity, no flow control lines, normal XON/XOFF chars */
@@ -1589,9 +1608,14 @@ rpn_out:
static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb)
{
- struct rfcomm_rls *rls = (void *) skb->data;
- u8 dlci = __get_dlci(rls->dlci);
+ struct rfcomm_rls *rls;
+ u8 dlci;
+ rls = skb_pull_data(skb, sizeof(*rls));
+ if (!rls)
+ return -EILSEQ;
+
+ dlci = __get_dlci(rls->dlci);
BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status);
if (!cr)
@@ -1608,10 +1632,15 @@ static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb
static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb)
{
- struct rfcomm_msc *msc = (void *) skb->data;
+ struct rfcomm_msc *msc;
struct rfcomm_dlc *d;
- u8 dlci = __get_dlci(msc->dlci);
+ u8 dlci;
+
+ msc = skb_pull_data(skb, sizeof(*msc));
+ if (!msc)
+ return -EILSEQ;
+ dlci = __get_dlci(msc->dlci);
BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig);
d = rfcomm_dlc_get(s, dlci);
@@ -1644,17 +1673,19 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
static int rfcomm_recv_mcc(struct rfcomm_session *s, struct sk_buff *skb)
{
- struct rfcomm_mcc *mcc = (void *) skb->data;
+ struct rfcomm_mcc *mcc;
u8 type, cr, len;
+ mcc = skb_pull_data(skb, sizeof(*mcc));
+ if (!mcc)
+ return -EILSEQ;
+
cr = __test_cr(mcc->type);
type = __get_mcc_type(mcc->type);
len = __get_mcc_len(mcc->len);
BT_DBG("%p type 0x%x cr %d", s, type, cr);
- skb_pull(skb, 2);
-
switch (type) {
case RFCOMM_PN:
rfcomm_recv_pn(s, cr, skb);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index bd7d959c6e9e..805ed5d28ed6 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -122,7 +122,7 @@ static struct sock *__rfcomm_get_listen_sock_by_addr(u8 channel, bdaddr_t *src)
}
/* Find socket with channel and source bdaddr.
- * Returns closest match.
+ * Returns closest match with an extra reference held.
*/
static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
{
@@ -136,15 +136,25 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *
if (rfcomm_pi(sk)->channel == channel) {
/* Exact match. */
- if (!bacmp(&rfcomm_pi(sk)->src, src))
+ if (!bacmp(&rfcomm_pi(sk)->src, src)) {
+ sock_hold(sk);
break;
+ }
/* Closest match */
- if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY))
+ if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY)) {
+ if (sk1)
+ sock_put(sk1);
+
sk1 = sk;
+ sock_hold(sk1);
+ }
}
}
+ if (sk && sk1)
+ sock_put(sk1);
+
read_unlock(&rfcomm_sk_list.lock);
return sk ? sk : sk1;
@@ -941,6 +951,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
{
struct sock *sk, *parent;
bdaddr_t src, dst;
+ bool defer_setup = false;
int result = 0;
BT_DBG("session %p channel %d", s, channel);
@@ -954,6 +965,11 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
lock_sock(parent);
+ if (parent->sk_state != BT_LISTEN)
+ goto done;
+
+ defer_setup = test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags);
+
/* Check for backlog size */
if (sk_acceptq_is_full(parent)) {
BT_DBG("backlog full %d", parent->sk_ack_backlog);
@@ -981,9 +997,11 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
done:
release_sock(parent);
- if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
+ if (defer_setup)
parent->sk_state_change(parent);
+ sock_put(parent);
+
return result;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f1799c6a6f87..140869e5b2df 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -312,11 +312,21 @@ static int sco_connect(struct sock *sk)
struct sco_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
+ bdaddr_t src, dst;
+ struct bt_codec codec;
+ __u16 setting;
int err, type;
- BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
+ lock_sock(sk);
+ bacpy(&src, &sco_pi(sk)->src);
+ bacpy(&dst, &sco_pi(sk)->dst);
+ setting = sco_pi(sk)->setting;
+ codec = sco_pi(sk)->codec;
+ release_sock(sk);
+
+ BT_DBG("%pMR -> %pMR", &src, &dst);
- hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
+ hdev = hci_get_route(&dst, &src, BDADDR_BREDR);
if (!hdev)
return -EHOSTUNREACH;
@@ -327,7 +337,7 @@ static int sco_connect(struct sock *sk)
else
type = SCO_LINK;
- switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) {
+ switch (setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_TRANSP:
if (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)) {
err = -EOPNOTSUPP;
@@ -336,8 +346,8 @@ static int sco_connect(struct sock *sk)
break;
}
- hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
- sco_pi(sk)->setting, &sco_pi(sk)->codec,
+ hcon = hci_connect_sco(hdev, type, &dst,
+ setting, &codec,
READ_ONCE(sk->sk_sndtimeo));
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 7dfbcdfc30e5..c9e229af0366 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -31,6 +31,9 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
const struct arphdr *ap;
struct arphdr _ah;
+ if (skb_ensure_writable(skb, sizeof(_ah) + ETH_ALEN))
+ return EBT_DROP;
+
ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
if (ap == NULL)
return EBT_DROP;
diff --git a/net/core/sock.c b/net/core/sock.c
index b37b664b6eb9..d097025c116a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2676,8 +2676,12 @@ void sock_wfree(struct sk_buff *skb)
int old;
if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+ void (*sk_write_space)(struct sock *sk);
+
+ sk_write_space = READ_ONCE(sk->sk_write_space);
+
if (sock_flag(sk, SOCK_RCU_FREE) &&
- sk->sk_write_space == sock_def_write_space) {
+ sk_write_space == sock_def_write_space) {
rcu_read_lock();
free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc,
&old);
@@ -2693,7 +2697,7 @@ void sock_wfree(struct sk_buff *skb)
* after sk_write_space() call
*/
WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
- sk->sk_write_space(sk);
+ sk_write_space(sk);
len = 1;
}
/*
diff --git a/net/devlink/core.c b/net/devlink/core.c
index eeb6a71f5f56..fe9f6a0a67d5 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -518,6 +518,8 @@ void devlink_free(struct devlink *devlink)
{
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ devlink_rel_put(devlink);
+
WARN_ON(!list_empty(&devlink->trap_policer_list));
WARN_ON(!list_empty(&devlink->trap_group_list));
WARN_ON(!list_empty(&devlink->trap_list));
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index b514e43766ef..a28dfd8490c5 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -35,10 +35,8 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
rcu_read_lock();
sn = rcu_dereference(hsr->self_node);
- if (!sn) {
- WARN_ONCE(1, "HSR: No self node\n");
+ if (!sn)
goto out;
- }
if (ether_addr_equal(addr, sn->macaddress_A) ||
ether_addr_equal(addr, sn->macaddress_B))
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index 0c07662b44c0..4df76ff50699 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -255,6 +255,11 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
pr_debug("package xmit\n");
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
WARN_ON_ONCE(skb->len > IPV6_MIN_MTU);
/* We must take a copy of the skb before we modify/replace the ipv6
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index dbcd37dfdc15..5b934ce8d98a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1148,6 +1148,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req)
/* The timer needs to be setup after a successful insertion. */
req->timeout = tcp_timeout_init((struct sock *)req);
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
+
+ preempt_disable_nested();
+
mod_timer(&req->rsk_timer, jiffies + req->timeout);
/* before letting lookups find us, make sure all req fields
@@ -1155,6 +1158,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req)
*/
smp_wmb();
refcount_set(&req->rsk_refcnt, 2 + 1);
+
+ preempt_enable_nested();
+
return true;
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index be8815ce3ac2..09d745112c15 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -530,6 +530,10 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp,
kfree(opt);
return -EINVAL;
}
+ if (opt->opt.srr && !ns_capable(net->user_ns, CAP_NET_RAW)) {
+ kfree(opt);
+ return -EPERM;
+ }
kfree(*optp);
*optp = opt;
return 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0ac2bf4f8759..70f6cbd4ef73 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2011,6 +2011,14 @@ try_again:
}
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
+
+ /*
+ * skb->dev still aliases the UDP rx dev_scratch (its charge was freed
+ * on dequeue above); a sockmap verdict program may deref it via
+ * bpf_sk_lookup_*(), so clear it -> bpf_skc_lookup() uses skb->sk
+ */
+ skb->dev = NULL;
+
return recv_actor(sk, skb);
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 67a42e01dfc3..be6dac8a8566 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -243,16 +243,16 @@ static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
{
unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
- spin_lock(&acaddr_hash_lock);
+ spin_lock_bh(&acaddr_hash_lock);
hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
- spin_unlock(&acaddr_hash_lock);
+ spin_unlock_bh(&acaddr_hash_lock);
}
static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
{
- spin_lock(&acaddr_hash_lock);
+ spin_lock_bh(&acaddr_hash_lock);
hlist_del_init_rcu(&aca->aca_addr_lst);
- spin_unlock(&acaddr_hash_lock);
+ spin_unlock_bh(&acaddr_hash_lock);
}
static void aca_get(struct ifacaddr6 *aca)
@@ -371,10 +371,10 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca->aca_next = idev->ac_list;
rcu_assign_pointer(idev->ac_list, aca);
- write_unlock_bh(&idev->lock);
-
ipv6_add_acaddr_hash(net, aca);
+ write_unlock_bh(&idev->lock);
+
ip6_ins_rt(net, f6i);
addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -649,8 +649,8 @@ void ipv6_anycast_cleanup(void)
{
int i;
- spin_lock(&acaddr_hash_lock);
+ spin_lock_bh(&acaddr_hash_lock);
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
- spin_unlock(&acaddr_hash_lock);
+ spin_unlock_bh(&acaddr_hash_lock);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3330adcf26db..d9b855d5191b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1424,9 +1424,9 @@ out:
static void __mld_query_work(struct sk_buff *skb)
{
struct mld2_query *mlh2 = NULL;
- const struct in6_addr *group;
unsigned long max_delay;
struct inet6_dev *idev;
+ struct in6_addr group;
struct ifmcaddr6 *ma;
struct mld_msg *mld;
int group_type;
@@ -1458,8 +1458,8 @@ static void __mld_query_work(struct sk_buff *skb)
goto kfree_skb;
mld = (struct mld_msg *)icmp6_hdr(skb);
- group = &mld->mld_mca;
- group_type = ipv6_addr_type(group);
+ group = mld->mld_mca;
+ group_type = ipv6_addr_type(&group);
if (group_type != IPV6_ADDR_ANY &&
!(group_type&IPV6_ADDR_MULTICAST))
@@ -1509,7 +1509,7 @@ static void __mld_query_work(struct sk_buff *skb)
}
} else {
for_each_mc_mclock(idev, ma) {
- if (!ipv6_addr_equal(group, &ma->mca_addr))
+ if (!ipv6_addr_equal(&group, &ma->mca_addr))
continue;
if (ma->mca_flags & MAF_TIMER_RUNNING) {
/* gsquery <- gsquery && mark */
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index c0a0075e2590..2dbe44715df3 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -191,6 +191,9 @@ static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt,
if (nft_fib6_info_nh_dev_match(nh_dev, dev))
return true;
+
+ if (!READ_ONCE(rt->fib6_nsiblings))
+ return false;
}
return false;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 99d6582f41de..e0b1915be1a6 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1045,64 +1045,76 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
{
struct pppol2tp_ioc_stats stats;
struct l2tp_session *session;
+ int err = 0;
+
+ session = pppol2tp_sock_to_session(sock->sk);
+ /* Validate session presence and magic integrity ONLY for commands
+ * that belong to L2TP and require a valid session.
+ */
switch (cmd) {
case PPPIOCGMRU:
case PPPIOCGFLAGS:
- session = sock->sk->sk_user_data;
+ case PPPIOCSMRU:
+ case PPPIOCSFLAGS:
+ case PPPIOCGL2TPSTATS:
if (!session)
return -ENOTCONN;
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ if (session->magic != L2TP_SESSION_MAGIC) {
+ l2tp_session_put(session);
return -EBADF;
+ }
+ break;
+ default:
+ break;
+ }
+ switch (cmd) {
+ case PPPIOCGMRU:
+ case PPPIOCGFLAGS:
/* Not defined for tunnels */
- if (!session->session_id && !session->peer_session_id)
- return -ENOSYS;
+ if (!session->session_id && !session->peer_session_id) {
+ err = -ENOSYS;
+ break;
+ }
- if (put_user(0, (int __user *)arg))
- return -EFAULT;
+ if (put_user(0, (int __user *)arg)) {
+ err = -EFAULT;
+ break;
+ }
break;
case PPPIOCSMRU:
case PPPIOCSFLAGS:
- session = sock->sk->sk_user_data;
- if (!session)
- return -ENOTCONN;
-
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return -EBADF;
-
/* Not defined for tunnels */
- if (!session->session_id && !session->peer_session_id)
- return -ENOSYS;
+ if (!session->session_id && !session->peer_session_id) {
+ err = -ENOSYS;
+ break;
+ }
- if (!access_ok((int __user *)arg, sizeof(int)))
- return -EFAULT;
+ if (!access_ok((int __user *)arg, sizeof(int))) {
+ err = -EFAULT;
+ break;
+ }
break;
case PPPIOCGL2TPSTATS:
- session = sock->sk->sk_user_data;
- if (!session)
- return -ENOTCONN;
-
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return -EBADF;
-
/* Session 0 represents the parent tunnel */
if (!session->session_id && !session->peer_session_id) {
u32 session_id;
- int err;
if (copy_from_user(&stats, (void __user *)arg,
- sizeof(stats)))
- return -EFAULT;
+ sizeof(stats))) {
+ err = -EFAULT;
+ break;
+ }
session_id = stats.session_id;
err = pppol2tp_tunnel_copy_stats(&stats,
session->tunnel);
if (err < 0)
- return err;
+ break;
stats.session_id = session_id;
} else {
@@ -1112,15 +1124,21 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
stats.tunnel_id = session->tunnel->tunnel_id;
stats.using_ipsec = l2tp_tunnel_uses_xfrm(session->tunnel);
- if (copy_to_user((void __user *)arg, &stats, sizeof(stats)))
- return -EFAULT;
+ if (copy_to_user((void __user *)arg, &stats, sizeof(stats))) {
+ err = -EFAULT;
+ break;
+ }
break;
default:
- return -ENOIOCTLCMD;
+ err = -ENOIOCTLCMD;
+ break;
}
- return 0;
+ if (session)
+ l2tp_session_put(session);
+
+ return err;
}
/*****************************************************************************
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b487d2330f25..ea7f63e1fc17 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2181,7 +2181,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
case IEEE80211_RADIOTAP_ANTENNA:
/* this can appear multiple times, keep a bitmap */
- info->control.antennas |= BIT(*iterator.this_arg);
+ /* control.antennas is only a 2-bit bitmap */
+ if (*iterator.this_arg < 2)
+ info->control.antennas |= BIT(*iterator.this_arg);
break;
case IEEE80211_RADIOTAP_DATA_RETRIES:
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8a1c5698983c..b3ea7854818f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -566,12 +566,17 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct tcp_sock *tp = tcp_sk(sk);
unsigned int dss_size = 0;
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
- u64 ack_seq;
+ /* Zero `use_ack` and `use_map` flags with one shot. */
+ BUILD_BUG_ON(sizeof_field(struct mptcp_ext, flags) != sizeof(u16));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_ext, flags),
+ sizeof(u16)));
+ *(u16 *)&opts->ext_copy.flags = 0;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
@@ -595,20 +600,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
/* passive sockets msk will set the 'can_ack' after accept(), even
* if the first subflow may have the already the remote key handy
*/
- opts->ext_copy.use_ack = 0;
if (!READ_ONCE(msk->can_ack)) {
*size = ALIGN(dss_size, 4);
return ret;
}
- ack_seq = READ_ONCE(msk->ack_seq);
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
- opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
} else {
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
- opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
@@ -618,6 +619,12 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
if (dss_size == 0)
ack_size += TCPOLEN_MPTCP_DSS_BASE;
+ /* The caller is __tcp_transmit_skb(), and will compute the new rcv
+ * wnd soon: ensure that the window can shrink.
+ */
+ if (skb)
+ tp->rcv_wnd = tp->rcv_nxt - tp->rcv_wup;
+
dss_size += ack_size;
*size = ALIGN(dss_size, 4);
@@ -658,7 +665,6 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- bool drop_other_suboptions = false;
unsigned int opt_size = *size;
struct mptcp_addr_info addr;
bool echo;
@@ -669,36 +675,20 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
*/
if (!mptcp_pm_should_add_signal(msk) ||
(opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
- !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr,
- &echo, &drop_other_suboptions))
+ !skb || !skb_is_tcp_pure_ack(skb) ||
+ !mptcp_pm_add_addr_signal(msk, opt_size, remaining, &addr, &echo))
return false;
- /*
- * Later on, mptcp_write_options() will enforce mutually exclusion with
- * DSS, bail out if such option is set and we can't drop it.
- */
- if (drop_other_suboptions)
- remaining += opt_size;
- else if (opts->suboptions & OPTION_MPTCP_DSS)
- return false;
+ remaining += opt_size;
len = mptcp_add_addr_len(addr.family, echo, !!addr.port);
if (remaining < len)
return false;
*size = len;
- if (drop_other_suboptions) {
- pr_debug("drop other suboptions\n");
- opts->suboptions = 0;
-
- /* note that e.g. DSS could have written into the memory
- * aliased by ahmac, we must reset the field here
- * to avoid appending the hmac even for ADD_ADDR echo
- * options
- */
- opts->ahmac = 0;
- *size -= opt_size;
- }
+ pr_debug("drop other suboptions\n");
+ opts->suboptions = 0;
+ *size -= opt_size;
opts->addr = addr;
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
@@ -708,6 +698,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
&opts->addr);
} else {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
+ opts->ahmac = 0;
}
pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n",
opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
@@ -1297,19 +1288,14 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
-static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
+static u64 mptcp_set_rwin(struct mptcp_sock *msk, struct tcp_sock *tp,
+ struct tcphdr *th, u64 ack_seq)
{
const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
- u64 ack_seq, rcv_wnd_old, rcv_wnd_new;
- struct mptcp_sock *msk;
+ u64 rcv_wnd_old, rcv_wnd_new;
u32 new_win;
u64 win;
- subflow = mptcp_subflow_ctx(ssk);
- msk = mptcp_sk(subflow->conn);
-
- ack_seq = READ_ONCE(msk->ack_seq);
rcv_wnd_new = ack_seq + tp->rcv_wnd;
rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
@@ -1362,7 +1348,7 @@ raise_win:
update_wspace:
WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
- subflow->rcv_wnd_sent = rcv_wnd_new;
+ return rcv_wnd_new;
}
static void mptcp_track_rwin(struct tcp_sock *tp)
@@ -1474,13 +1460,25 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
if (mpext->use_ack) {
+ struct mptcp_sock *msk;
+ u64 ack_seq;
+
+ /* DSS option is set only by mptcp_established_options,
+ * the caller is __tcp_transmit_skb() and ssk is always
+ * not NULL.
+ */
+ subflow = mptcp_subflow_ctx(ssk);
+ msk = mptcp_sk(subflow->conn);
+ ack_seq = READ_ONCE(msk->ack_seq);
if (mpext->ack64) {
- put_unaligned_be64(mpext->data_ack, ptr);
+ put_unaligned_be64(ack_seq, ptr);
ptr += 2;
} else {
- put_unaligned_be32(mpext->data_ack32, ptr);
+ put_unaligned_be32(ack_seq, ptr);
ptr += 1;
}
+ subflow->rcv_wnd_sent = mptcp_set_rwin(msk, tp, th,
+ ack_seq);
}
if (mpext->use_map) {
@@ -1708,9 +1706,6 @@ mp_capable_done:
i += 4;
}
}
-
- if (tp)
- mptcp_set_rwin(tp, th);
}
__be32 mptcp_get_reset_option(const struct sk_buff *skb)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 3e770c7407e1..470501470fe5 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -887,10 +887,9 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
}
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
- unsigned int opt_size, unsigned int remaining,
- struct mptcp_addr_info *addr, bool *echo,
- bool *drop_other_suboptions)
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int opt_size,
+ unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo)
{
bool skip_add_addr = false;
int ret = false;
@@ -908,10 +907,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
* plain dup-ack from TCP perspective. The other MPTCP-relevant info,
* if any, will be carried by the 'original' TCP ack
*/
- if (skb && skb_is_tcp_pure_ack(skb)) {
- remaining += opt_size;
- *drop_other_suboptions = true;
- }
+ remaining += opt_size;
*echo = mptcp_pm_should_add_signal_echo(msk);
if (*echo) {
@@ -929,9 +925,6 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
if (remaining < mptcp_add_addr_len(family, *echo, port)) {
struct net *net = sock_net((struct sock *)msk);
- if (!*drop_other_suboptions)
- goto out_unlock;
-
if (*echo) {
MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP);
} else {
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 8cbc1920afb4..0d3a95e676f1 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -408,19 +408,21 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
local.flags = entry.flags;
local.ifindex = entry.ifindex;
+ spin_lock_bh(&msk->pm.lock);
+ msk->pm.extra_subflows++;
+ spin_unlock_bh(&msk->pm.lock);
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &local, &addr_r);
release_sock(sk);
- if (err)
+ if (err) {
GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err);
- spin_lock_bh(&msk->pm.lock);
- if (err)
+ spin_lock_bh(&msk->pm.lock);
mptcp_userspace_pm_delete_local_addr(msk, &entry);
- else
- msk->pm.extra_subflows++;
- spin_unlock_bh(&msk->pm.lock);
+ spin_unlock_bh(&msk->pm.lock);
+ }
create_err:
sock_put(sk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a72a6ad6ee8b..cb9515f505aa 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2276,6 +2276,10 @@ static bool mptcp_move_skbs(struct sock *sk)
mptcp_backlog_spooled(sk, moved, &skbs);
}
mptcp_data_unlock(sk);
+
+ if (enqueued && mptcp_epollin_ready(sk))
+ sk->sk_data_ready(sk);
+
return enqueued;
}
@@ -2865,6 +2869,10 @@ static void __mptcp_retrans(struct sock *sk)
msk->bytes_retrans += len;
dfrag->already_sent = max(dfrag->already_sent, len);
+ /* With csum enabled retransmission can send new data. */
+ if (after64(dfrag->already_sent + dfrag->data_seq, msk->snd_nxt))
+ WRITE_ONCE(msk->snd_nxt, dfrag->already_sent + dfrag->data_seq);
+
reset_timer:
mptcp_check_and_set_pending(sk);
@@ -4420,6 +4428,8 @@ static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
}
mptcp_eat_recv_skb(sk, skb);
+ if (!desc->count)
+ break;
}
if (noack)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e4f5aba24da7..b93b878478d2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -1229,10 +1229,9 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
- unsigned int opt_size, unsigned int remaining,
- struct mptcp_addr_info *addr, bool *echo,
- bool *drop_other_suboptions);
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int opt_size,
+ unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 87b5796d0135..fcf6feb2a9eb 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -241,15 +241,19 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err;
lock_sock(ssk);
- sock_set_timestamping(ssk, optname, timestamping);
+ err = sock_set_timestamping(ssk, optname, timestamping);
release_sock(ssk);
+
+ if (err < 0 && ret == 0)
+ ret = err;
}
release_sock(sk);
- return 0;
+ return ret;
}
static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
@@ -813,10 +817,11 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err;
- ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
- if (ret)
- break;
+ err = tcp_setsockopt(ssk, level, optname, optval, optlen);
+ if (err < 0 && ret == 0)
+ ret = err;
}
if (!ret)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd9cae44d214..16daba8cac83 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1898,7 +1898,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
if (ret_hooks >= 0)
ip_vs_unregister_hooks(ipvs, u->af);
if (svc != NULL) {
- ip_vs_unbind_scheduler(svc, sched);
+ ip_vs_unbind_scheduler(svc);
ip_vs_service_free(svc);
}
ip_vs_scheduler_put(sched);
@@ -1962,9 +1962,8 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
old_sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched != old_sched) {
if (old_sched) {
- ip_vs_unbind_scheduler(svc, old_sched);
- RCU_INIT_POINTER(svc->scheduler, NULL);
- /* Wait all svc->sched_data users */
+ ip_vs_unbind_scheduler(svc);
+ /* Wait all svc->scheduler/sched_data users */
synchronize_rcu();
}
/* Bind the new scheduler */
@@ -1972,6 +1971,10 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
ret = ip_vs_bind_scheduler(svc, sched);
if (ret) {
ip_vs_scheduler_put(sched);
+ /* Try to restore the old_sched */
+ if (old_sched &&
+ !ip_vs_bind_scheduler(svc, old_sched))
+ old_sched = NULL;
goto out;
}
}
@@ -2027,7 +2030,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/* Unbind scheduler */
old_sched = rcu_dereference_protected(svc->scheduler, 1);
- ip_vs_unbind_scheduler(svc, old_sched);
+ ip_vs_unbind_scheduler(svc);
ip_vs_scheduler_put(old_sched);
/* Unbind persistence engine, keep svc->pe */
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index c6e421c4e299..24adc38942a0 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -56,19 +56,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
/*
* Unbind a service with its scheduler
*/
-void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
- struct ip_vs_scheduler *sched)
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc)
{
- struct ip_vs_scheduler *cur_sched;
+ struct ip_vs_scheduler *sched;
- cur_sched = rcu_dereference_protected(svc->scheduler, 1);
- /* This check proves that old 'sched' was installed */
- if (!cur_sched)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (!sched)
return;
+ /* Reset the scheduler before initiating any RCU callbacks */
+ rcu_assign_pointer(svc->scheduler, NULL);
+ smp_wmb(); /* paired with smp_rmb() in ip_vs_schedule() */
if (sched->done_service)
sched->done_service(svc);
- /* svc->scheduler can be set to NULL only by caller */
}
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 522183b9a604..2ebe4cb47cf6 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -203,7 +203,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
if (parse_dcc(data, data_limit, &dcc_ip,
&dcc_port, &addr_beg_p, &addr_end_p)) {
pr_debug("unable to parse dcc command\n");
- continue;
+ goto out;
}
pr_debug("DCC bound ip/port: %pI4:%u\n",
@@ -217,7 +217,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
&tuple->src.u3.ip,
&dcc_ip, dcc_port);
- continue;
+ goto out;
}
exp = nf_ct_expect_alloc(ct);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 036c8586f49b..ed00114f65f3 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -22,6 +22,8 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_synproxy.h>
+static DEFINE_MUTEX(synproxy_mutex);
+
unsigned int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
@@ -769,26 +771,31 @@ static const struct nf_hook_ops ipv4_synproxy_ops[] = {
int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net)
{
- int err;
+ int err = 0;
+ mutex_lock(&synproxy_mutex);
if (snet->hook_ref4 == 0) {
err = nf_register_net_hooks(net, ipv4_synproxy_ops,
ARRAY_SIZE(ipv4_synproxy_ops));
if (err)
- return err;
+ goto out;
}
snet->hook_ref4++;
- return 0;
+out:
+ mutex_unlock(&synproxy_mutex);
+ return err;
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init);
void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net)
{
+ mutex_lock(&synproxy_mutex);
snet->hook_ref4--;
if (snet->hook_ref4 == 0)
nf_unregister_net_hooks(net, ipv4_synproxy_ops,
ARRAY_SIZE(ipv4_synproxy_ops));
+ mutex_unlock(&synproxy_mutex);
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini);
@@ -1193,27 +1200,32 @@ static const struct nf_hook_ops ipv6_synproxy_ops[] = {
int
nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net)
{
- int err;
+ int err = 0;
+ mutex_lock(&synproxy_mutex);
if (snet->hook_ref6 == 0) {
err = nf_register_net_hooks(net, ipv6_synproxy_ops,
ARRAY_SIZE(ipv6_synproxy_ops));
if (err)
- return err;
+ goto out;
}
snet->hook_ref6++;
- return 0;
+out:
+ mutex_unlock(&synproxy_mutex);
+ return err;
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init);
void
nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net)
{
+ mutex_lock(&synproxy_mutex);
snet->hook_ref6--;
if (snet->hook_ref6 == 0)
nf_unregister_net_hooks(net, ipv6_synproxy_ops,
ARRAY_SIZE(ipv6_synproxy_ops));
+ mutex_unlock(&synproxy_mutex);
}
EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini);
#endif /* CONFIG_IPV6 */
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 2316c77f4228..dfd41fc8d9b8 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -19,7 +19,6 @@ struct nft_byteorder {
u8 sreg;
u8 dreg;
enum nft_byteorder_ops op:8;
- u8 len;
u8 size;
};
@@ -28,13 +27,8 @@ void nft_byteorder_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- u32 *src = &regs->data[priv->sreg];
+ const u32 *src = &regs->data[priv->sreg];
u32 *dst = &regs->data[priv->dreg];
- u16 *s16, *d16;
- unsigned int i;
-
- s16 = (void *)src;
- d16 = (void *)dst;
switch (priv->size) {
case 8: {
@@ -43,18 +37,14 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
- for (i = 0; i < priv->len / 8; i++) {
- src64 = nft_reg_load64(&src[i]);
- nft_reg_store64(&dst64[i],
- be64_to_cpu((__force __be64)src64));
- }
+ src64 = nft_reg_load64(src);
+
+ nft_reg_store64(dst64, be64_to_cpu((__force __be64)src64));
break;
case NFT_BYTEORDER_HTON:
- for (i = 0; i < priv->len / 8; i++) {
- src64 = (__force __u64)
- cpu_to_be64(nft_reg_load64(&src[i]));
- nft_reg_store64(&dst64[i], src64);
- }
+ src64 = (__force __u64)cpu_to_be64(nft_reg_load64(src));
+
+ nft_reg_store64(dst64, src64);
break;
}
break;
@@ -62,24 +52,20 @@ void nft_byteorder_eval(const struct nft_expr *expr,
case 4:
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
- for (i = 0; i < priv->len / 4; i++)
- dst[i] = ntohl((__force __be32)src[i]);
+ *dst = ntohl((__force __be32)*src);
break;
case NFT_BYTEORDER_HTON:
- for (i = 0; i < priv->len / 4; i++)
- dst[i] = (__force __u32)htonl(src[i]);
+ *dst = (__force __u32)htonl(*src);
break;
}
break;
case 2:
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
- for (i = 0; i < priv->len / 2; i++)
- d16[i] = ntohs((__force __be16)s16[i]);
+ nft_reg_store16(dst, ntohs(nft_reg_load_be16(src)));
break;
case NFT_BYTEORDER_HTON:
- for (i = 0; i < priv->len / 2; i++)
- d16[i] = (__force __u16)htons(s16[i]);
+ nft_reg_store_be16(dst, htons(nft_reg_load16(src)));
break;
}
break;
@@ -137,20 +123,22 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- priv->len = len;
+ /* no longer support multi-reg conversions */
+ if (len != size)
+ return -EOPNOTSUPP;
err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg,
- priv->len);
+ len);
if (err < 0)
return err;
err = nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG],
&priv->dreg, NULL, NFT_DATA_VALUE,
- priv->len);
+ len);
if (err < 0)
return err;
- if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len))
+ if (nft_reg_overlap(priv->sreg, priv->dreg, len))
return -EINVAL;
return 0;
@@ -167,10 +155,11 @@ static int nft_byteorder_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
- goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
goto nla_put_failure;
+ /* compatibility for old userspace which permitted size != len */
+ if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->size)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index fa2cc556331c..357513c6dcea 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -78,7 +78,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
break;
}
- if (ct == NULL)
+ if (!ct || nf_ct_is_template(ct))
goto err;
switch (priv->key) {
@@ -180,12 +180,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
tuple = &ct->tuplehash[priv->dir].tuple;
switch (priv->key) {
case NFT_CT_SRC:
- memcpy(dest, tuple->src.u3.all,
- nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ memcpy(dest, tuple->src.u3.all, priv->len);
return;
case NFT_CT_DST:
- memcpy(dest, tuple->dst.u3.all,
- nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ memcpy(dest, tuple->dst.u3.all, priv->len);
return;
case NFT_CT_PROTO_SRC:
nft_reg_store16(dest, (__force u16)tuple->src.u.all);
diff --git a/net/netfilter/nft_ct_fast.c b/net/netfilter/nft_ct_fast.c
index e684c8a91848..ecf7b3a404be 100644
--- a/net/netfilter/nft_ct_fast.c
+++ b/net/netfilter/nft_ct_fast.c
@@ -30,7 +30,7 @@ void nft_ct_get_fast_eval(const struct nft_expr *expr,
break;
}
- if (!ct) {
+ if (!ct || nf_ct_is_template(ct)) {
regs->verdict.code = NFT_BREAK;
return;
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 0b987bc2132a..68f7cfbbee06 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -676,7 +676,7 @@ static void nft_tunnel_obj_destroy(const struct nft_ctx *ctx,
{
struct nft_tunnel_obj *priv = nft_obj_data(obj);
- metadata_dst_free(priv->md);
+ dst_release(&priv->md->dst);
}
static struct nft_object_type nft_tunnel_obj_type;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 466da23e36ff..b32d153e3a18 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -91,7 +91,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
if (info->queues_total > 1) {
if (info->flags & NFQ_FLAG_CPU_FANOUT) {
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
queue = info->queuenum + cpu % info->queues_total;
} else {
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0c64c504f79d..4001de0c4959 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -656,6 +656,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
sends_out:
vfree(ic->i_sends);
+ ic->i_sends = NULL;
ack_dma_out:
rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 332fd9695e54..04ea11c90e03 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -112,11 +112,6 @@ struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
}
EXPORT_SYMBOL(tcf_action_set_ctrlact);
-/* XXX: For standalone actions, we don't need a RCU grace period either, because
- * actions are always connected to filters and filters are already destroyed in
- * RCU callbacks, so after a RCU grace period actions are already disconnected
- * from filters. Readers later can not find us.
- */
static void free_tcf(struct tc_action *p)
{
struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
@@ -129,7 +124,7 @@ static void free_tcf(struct tc_action *p)
if (chain)
tcf_chain_put_by_act(chain);
- kfree(p);
+ kfree_rcu(p, tcfa_rcu);
}
static void offload_action_hw_count_set(struct tc_action *act,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index bc20f08a2789..bd3b1da3cd63 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -16,6 +16,8 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/slab.h>
+#include <linux/overflow.h>
+#include <linux/unaligned.h>
#include <net/ipv6.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -242,7 +244,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_free_ex;
}
- nparms->tcfp_off_max_hint = 0;
nparms->tcfp_flags = parm->flags;
nparms->tcfp_nkeys = parm->nkeys;
@@ -268,14 +269,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
BITS_PER_TYPE(int) - 1,
nparms->tcfp_keys[i].shift);
- /* The AT option can read a single byte, we can bound the actual
- * value with uchar max.
- */
- cur += (0xff & offmask) >> nparms->tcfp_keys[i].shift;
-
- /* Each key touches 4 bytes starting from the computed offset */
- nparms->tcfp_off_max_hint =
- max(nparms->tcfp_off_max_hint, cur + 4);
}
p = to_pedit(*a);
@@ -318,15 +311,12 @@ static void tcf_pedit_cleanup(struct tc_action *a)
call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
}
-static bool offset_valid(struct sk_buff *skb, int offset)
+static bool offset_valid(struct sk_buff *skb, int offset, int len)
{
- if (offset > 0 && offset > skb->len)
- return false;
-
- if (offset < 0 && -offset > skb_headroom(skb))
+ if (offset < -(int)skb_headroom(skb))
return false;
- return true;
+ return offset <= (int)skb->len - len;
}
static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type)
@@ -393,18 +383,10 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
struct tcf_pedit_key_ex *tkey_ex;
struct tcf_pedit_parms *parms;
struct tc_pedit_key *tkey;
- u32 max_offset;
int i;
parms = rcu_dereference_bh(p->parms);
- max_offset = (skb_transport_header_was_set(skb) ?
- skb_transport_offset(skb) :
- skb_network_offset(skb)) +
- parms->tcfp_off_max_hint;
- if (skb_ensure_writable(skb, min(skb->len, max_offset)))
- goto done;
-
tcf_lastuse_update(&p->tcf_tm);
tcf_action_update_bstats(&p->common, skb);
@@ -412,10 +394,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
tkey_ex = parms->tcfp_keys_ex;
for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
+ int write_offset, write_len;
int offset = tkey->off;
int hoffset = 0;
- u32 *ptr, hdata;
- u32 val;
+ u32 cur_val, val;
+ u32 *ptr;
int rc;
if (tkey_ex) {
@@ -433,13 +416,15 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
if (tkey->offmask) {
u8 *d, _d;
+ int at_offset;
- if (!offset_valid(skb, hoffset + tkey->at)) {
+ if (check_add_overflow(hoffset, (int)tkey->at, &at_offset) ||
+ !offset_valid(skb, at_offset, sizeof(_d))) {
pr_info_ratelimited("tc action pedit 'at' offset %d out of bounds\n",
hoffset + tkey->at);
goto bad;
}
- d = skb_header_pointer(skb, hoffset + tkey->at,
+ d = skb_header_pointer(skb, at_offset,
sizeof(_d), &_d);
if (!d)
goto bad;
@@ -451,31 +436,51 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
}
}
- if (!offset_valid(skb, hoffset + offset)) {
- pr_info_ratelimited("tc action pedit offset %d out of bounds\n", hoffset + offset);
+ if (check_add_overflow(hoffset, offset, &write_offset)) {
+ pr_info_ratelimited("tc action pedit offset overflow\n");
goto bad;
}
- ptr = skb_header_pointer(skb, hoffset + offset,
- sizeof(hdata), &hdata);
- if (!ptr)
+ if (!offset_valid(skb, write_offset, sizeof(*ptr))) {
+ pr_info_ratelimited("tc action pedit offset %d out of bounds\n",
+ write_offset);
goto bad;
+ }
+
+ if (write_offset < 0) {
+ if (skb_cow(skb, -write_offset))
+ goto bad;
+ if (write_offset + (int)sizeof(*ptr) > 0) {
+ if (skb_ensure_writable(skb,
+ min_t(int, skb->len,
+ write_offset + (int)sizeof(*ptr))))
+ goto bad;
+ }
+ } else {
+ if (check_add_overflow(write_offset, (int)sizeof(*ptr),
+ &write_len))
+ goto bad;
+ if (skb_ensure_writable(skb, min_t(int, skb->len,
+ write_len)))
+ goto bad;
+ }
+
+ ptr = (u32 *)(skb->data + write_offset);
+ cur_val = get_unaligned(ptr);
/* just do it, baby */
switch (cmd) {
case TCA_PEDIT_KEY_EX_CMD_SET:
val = tkey->val;
break;
case TCA_PEDIT_KEY_EX_CMD_ADD:
- val = (*ptr + tkey->val) & ~tkey->mask;
+ val = (cur_val + tkey->val) & ~tkey->mask;
break;
default:
pr_info_ratelimited("tc action pedit bad command (%d)\n", cmd);
goto bad;
}
- *ptr = ((*ptr & tkey->mask) ^ val);
- if (ptr == &hdata)
- skb_store_bits(skb, hoffset + offset, ptr, 4);
+ put_unaligned((cur_val & tkey->mask) ^ val, ptr);
}
goto done;
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 2afb376299fe..d758f5c3e06e 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -266,15 +266,15 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t
lock_sock(sk);
- rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL);
- if (!rep) {
- release_sock(sk);
- return -ENOMEM;
+ if (ep != assoc->ep || assoc->base.dead) {
+ err = -ESTALE;
+ goto out_unlock;
}
- if (ep != assoc->ep) {
- err = -EAGAIN;
- goto out;
+ rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL);
+ if (!rep) {
+ err = -ENOMEM;
+ goto out_unlock;
}
err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(skb).sk),
@@ -289,8 +289,9 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t
return nlmsg_unicast(sock_net(skb->sk)->diag_nlsk, rep, NETLINK_CB(skb).portid);
out:
- release_sock(sk);
kfree_skb(rep);
+out_unlock:
+ release_sock(sk);
return err;
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index de86ac088289..85264862fb6b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1730,6 +1730,7 @@ struct sctp_association *sctp_unpack_cookie(
struct sctp_signed_cookie *cookie;
struct sk_buff *skb = chunk->skb;
struct sctp_cookie *bear_cookie;
+ struct sctp_chunkhdr *ch;
enum sctp_scope scope;
unsigned int len;
ktime_t kt;
@@ -1759,6 +1760,10 @@ struct sctp_association *sctp_unpack_cookie(
cookie = chunk->subh.cookie_hdr;
bear_cookie = &cookie->c;
+ ch = (struct sctp_chunkhdr *)(bear_cookie + 1);
+ if (ntohs(ch->length) > len - fixed_size)
+ goto malformed;
+
/* Verify the cookie's MAC, if cookie authentication is enabled. */
if (sctp_sk(ep->base.sk)->cookie_auth_enable) {
u8 mac[SHA256_DIGEST_SIZE];
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8e89a870780c..9b23c11cbb9e 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2598,11 +2598,7 @@ static enum sctp_disposition sctp_sf_do_5_2_6_stale(
*/
sctp_add_cmd_sf(commands, SCTP_CMD_DEL_NON_PRIMARY, SCTP_NULL());
- /* If we've sent any data bundled with COOKIE-ECHO we will need to
- * resend
- */
- sctp_add_cmd_sf(commands, SCTP_CMD_T1_RETRAN,
- SCTP_TRANSPORT(asoc->peer.primary_path));
+ sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
/* Cast away the const modifier, as we want to just
* rerun it through as a sideffect.
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dc71ed79be4a..0d9cd977c7b7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2886,7 +2886,7 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
return -EAGAIN;
}
- WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
+ WRITE_ONCE(u->inq_len, u->inq_len - unix_skb_len(skb));
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (skb == u->oob_skb) {
@@ -3063,11 +3063,12 @@ unlock:
unix_detach_fds(&scm, skb);
}
- if (unix_skb_len(skb))
- break;
-
spin_lock(&sk->sk_receive_queue.lock);
- WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
+ WRITE_ONCE(u->inq_len, u->inq_len - chunk);
+ if (unix_skb_len(skb)) {
+ spin_unlock(&sk->sk_receive_queue.lock);
+ break;
+ }
__skb_unlink(skb, &sk->sk_receive_queue);
spin_unlock(&sk->sk_receive_queue.lock);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 5c1ecd5bfdbc..91516488a742 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -980,8 +980,10 @@ static int vmci_transport_recv_listen(struct sock *sk,
err = -EINVAL;
}
- if (err < 0)
+ if (err < 0) {
vsock_remove_pending(sk, pending);
+ sk_acceptq_removed(sk);
+ }
release_sock(pending);
vmci_transport_release_pending(pending);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7db9cd433801..76c537a6e8b5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6366,6 +6366,9 @@ nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs,
if (ret)
return ERR_PTR(ret);
+ if (num_elems >= 255)
+ return ERR_PTR(-EINVAL);
+
num_elems++;
}
@@ -6711,6 +6714,12 @@ static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
return -EINVAL;
}
+ if (!!params->he_cap != !!params->he_oper)
+ return -EINVAL;
+
+ if (!!params->eht_cap != !!params->eht_oper)
+ return -EINVAL;
+
return 0;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 358cbc9e43d8..27a56ee2e8f0 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1071,6 +1071,7 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev)
struct cfg80211_scan_request_int *request;
struct cfg80211_scan_request_int *rdev_req = rdev->scan_req;
u32 n_channels = 0, idx, i;
+ int err;
if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) {
rdev_req->req.first_part = true;
@@ -1100,8 +1101,14 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev)
rdev_req->req.scan_6ghz = false;
rdev_req->req.first_part = true;
+ err = rdev_scan(rdev, request);
+ if (err) {
+ kfree(request);
+ return err;
+ }
+
rdev->int_scan_req = request;
- return rdev_scan(rdev, request);
+ return 0;
}
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5e5786cd9af5..f8c8a8c9dfba 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -802,6 +802,7 @@ static int xsk_skb_metadata(struct sk_buff *skb, void *buffer,
u32 hr)
{
struct xsk_tx_metadata *meta = NULL;
+ u16 csum_start, csum_offset;
if (unlikely(pool->tx_metadata_len == 0))
return -EINVAL;
@@ -811,13 +812,15 @@ static int xsk_skb_metadata(struct sk_buff *skb, void *buffer,
return -EINVAL;
if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
- if (unlikely(meta->request.csum_start +
- meta->request.csum_offset +
+ csum_start = READ_ONCE(meta->request.csum_start);
+ csum_offset = READ_ONCE(meta->request.csum_offset);
+
+ if (unlikely(csum_start + csum_offset +
sizeof(__sum16) > desc->len))
return -EINVAL;
- skb->csum_start = hr + meta->request.csum_start;
- skb->csum_offset = meta->request.csum_offset;
+ skb->csum_start = hr + csum_start;
+ skb->csum_offset = csum_offset;
skb->ip_summed = CHECKSUM_PARTIAL;
if (unlikely(pool->tx_sw_csum)) {
diff --git a/scripts/kconfig/tests/err_repeated_inc/expected_stderr b/scripts/kconfig/tests/err_repeated_inc/expected_stderr
index 95d90d6a93c5..53071430ea7d 100644
--- a/scripts/kconfig/tests/err_repeated_inc/expected_stderr
+++ b/scripts/kconfig/tests/err_repeated_inc/expected_stderr
@@ -1,2 +1,2 @@
-Kconfig.inc1:4: error: Repeated inclusion of Kconfig.inc3
-Kconfig.inc2:3: note: Location of first inclusion of Kconfig.inc3
+Kconfig.inc1:4: error: repeated inclusion of Kconfig.inc3
+Kconfig.inc2:3: note: location of first inclusion of Kconfig.inc3
diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py
index 02e43c184d43..446d82807f90 100644
--- a/tools/sched_ext/scx_show_state.py
+++ b/tools/sched_ext/scx_show_state.py
@@ -27,18 +27,25 @@ def read_static_key(name):
def state_str(state):
return prog['scx_enable_state_str'][state].string_().decode()
+def read_root_ops_name():
+ if root:
+ return root.ops.name.string_().decode()
+ return ''
+
+def read_root_field(name, default):
+ if root:
+ return getattr(root, name).value_()
+ return default
+
root = prog['scx_root']
enable_state = read_atomic("scx_enable_state_var")
-if root:
- print(f'ops : {root.ops.name.string_().decode()}')
-else:
- print('ops : ')
+print(f'ops : {read_root_ops_name()}')
print(f'enabled : {read_static_key("__scx_enabled")}')
print(f'switching_all : {read_int("scx_switching_all")}')
print(f'switched_all : {read_static_key("__scx_switched_all")}')
print(f'enable_state : {state_str(enable_state)} ({enable_state})')
-print(f'aborting : {prog["scx_aborting"].value_()}')
-print(f'bypass_depth : {prog["scx_bypass_depth"].value_()}')
+print(f'aborting : {read_root_field("aborting", False)}')
+print(f'bypass_depth : {read_root_field("bypass_depth", 0)}')
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
print(f'enable_seq : {read_atomic("scx_enable_seq")}')
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index a56f4153c64d..683b05062810 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -492,6 +492,16 @@ REMOTE_TEST_MATRIX=(
" C1-5:P1 . C1-4:P1 C2-3 . . \
. . . P1 . . p1:5|c11:1-4|c12:5 \
p1:P1|c11:P1|c12:P-1"
+ # Narrowing cpuset.cpus to previously sibling-excluded CPUs should
+ # not return CPUs that were never actually owned.
+ " C1-4:P1 . C1-2:P1 C1-3:P2 . . \
+ . . . C3 . . p1:4|c11:1-2|c12:3 \
+ p1:P1|c11:P1|c12:P2 3"
+ # Expanding cpuset.cpus to include a previously sibling-excluded CPU
+ # after the sibling has become a member should correctly request it.
+ " C1-4:P1 . C1-2:P1 C1-3:P2 . . \
+ . . P0 C2-3 . . p1:1,4|c11:1|c12:2-3 \
+ p1:P1|c11:P0|c12:P2 2-3"
)
#
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
index 4f5e8c665156..2a680c086047 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
@@ -20,7 +20,7 @@ check_error 'e:foo/^123456789012345678901234567890123456789012345678901234567890
check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME
check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG
-check_error 'e:foo/bar syscalls/sys_enter_openat ^arg=$foo' # BAD_ATTACH_ARG
+check_error 'e:foo/bar syscalls/sys_enter_openat arg=^$foo' # BAD_ATTACH_ARG
if grep -q '<attached-group>\.<attached-event>.*\[if <filter>\]' README; then
check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f3da38c54d27..2ed7d803eb54 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -109,6 +109,7 @@ TEST_PROGS := \
test_vxlan_nh.sh \
test_vxlan_nolocalbypass.sh \
test_vxlan_under_vrf.sh \
+ test_vxlan_vnifilter_notify.sh \
test_vxlan_vnifiltering.sh \
tfo_passive.sh \
traceroute.sh \
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
index 3a86be9bda17..6268b5bf50be 100644
--- a/tools/testing/selftests/net/af_unix/scm_inq.c
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -8,8 +8,9 @@
#include "kselftest_harness.h"
-#define NR_CHUNKS 100
-#define MSG_LEN 256
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+#define NR_PARTIAL_READS 3
FIXTURE(scm_inq)
{
@@ -120,4 +121,53 @@ TEST_F(scm_inq, basic)
recv_chunks(_metadata, self);
}
+TEST_F(scm_inq, partial_read)
+{
+ char buf[MSG_LEN * NR_PARTIAL_READS] = {};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct cmsghdr *cmsg;
+ int err, inq, ret, i;
+ int remain;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+ ASSERT_EQ(0, err);
+
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ iov.iov_base = buf;
+ iov.iov_len = MSG_LEN;
+
+ for (i = 0; i < NR_PARTIAL_READS; i++) {
+ remain = MSG_LEN * (NR_PARTIAL_READS - 1 - i);
+
+ memset(buf, 0, MSG_LEN);
+ memset(cmsg_buf, 0, sizeof(cmsg_buf));
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(NULL, cmsg);
+ ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg->cmsg_type);
+ ASSERT_EQ(remain, *(int *)CMSG_DATA(cmsg));
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(remain, inq);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 5acd12021e6e..4b3f71e66609 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -4100,6 +4100,10 @@ userspace_tests()
chk_rm_nr 0 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
+ # check counters are not affected by errors at creation time
+ userspace_pm_add_sf $ns2 10.0.12.2 10 2>/dev/null
+ chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
kill_events_pids
mptcp_lib_kill_group_wait $tests_pid
fi
diff --git a/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh b/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh
new file mode 100755
index 000000000000..9d51a9e02ae0
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2034,SC2154,SC2317,SC2329
+#
+# Test for VXLAN vnifilter netlink notifications (RTM_NEWTUNNEL /
+# RTM_DELTUNNEL).
+#
+# Verifies that:
+# - Adding a new VNI sends a notification
+# - Adding a new VNI with a remote sends a notification
+# - Deleting a VNI sends a notification
+# - Re-adding an existing VNI with the same attributes does not send
+# a spurious notification
+# - Updating an existing VNI's remote sends a notification
+# - Deleting a non-existent VNI does not send a notification
+
+source lib.sh
+
+require_command bridge
+
+VXLAN_DEV=vxlan100
+
+ALL_TESTS="
+ test_vni_add_notify
+ test_vni_add_remote_notify
+ test_vni_del_notify
+ test_vni_readd_no_notify
+ test_vni_update_remote_notify
+ test_vni_del_nonexistent_no_notify
+"
+
+setup_prepare()
+{
+ setup_ns NS1
+ defer cleanup_all_ns
+
+ ip -n "$NS1" link add $VXLAN_DEV type vxlan dstport 4789 \
+ local 10.0.0.1 nolearning external vnifilter
+ ip -n "$NS1" link set $VXLAN_DEV up
+}
+
+# Run bridge monitor in the background, execute a command, then count
+# the notification lines.
+# Usage: vni_notify_check <command> [args...]
+# Sets: NOTIFY_COUNT with the number of notifications observed.
+vni_notify_check()
+{
+ local tmpf cmd_ret monitor_pid
+
+ tmpf=$(mktemp)
+ defer rm "$tmpf"
+
+ defer_scope_push
+ ip netns exec "$NS1" bridge monitor vni > "$tmpf" 2>/dev/null &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 0.5
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "iproute2 'bridge monitor vni' not supported"
+ return "$RET"
+ fi
+
+ "$@"
+ cmd_ret=$?
+ sleep 0.2
+ defer_scope_pop
+
+ NOTIFY_COUNT=$(grep -c "$VXLAN_DEV" "$tmpf")
+ NOTIFY_COUNT=${NOTIFY_COUNT:-0}
+ return "$cmd_ret"
+}
+
+# Adding a brand new VNI should produce a notification.
+test_vni_add_notify()
+{
+ RET=0
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 1000 dev "$VXLAN_DEV"
+ check_err $? "Failed to add VNI"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI add, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 1000 dev "$VXLAN_DEV" 2>/dev/null
+
+ log_test "VNI add sends notification"
+}
+
+# Adding a VNI with a remote should produce a notification.
+test_vni_add_remote_notify()
+{
+ RET=0
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 4000 remote 10.0.0.2 dev "$VXLAN_DEV"
+ check_err $? "Failed to add VNI with remote"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI add with remote, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 4000 dev "$VXLAN_DEV"
+
+ log_test "VNI add with remote sends notification"
+}
+
+# Deleting a VNI should produce a notification.
+test_vni_del_notify()
+{
+ RET=0
+
+ bridge -n "$NS1" vni add vni 2000 dev "$VXLAN_DEV"
+
+ vni_notify_check \
+ bridge -n "$NS1" vni delete vni 2000 dev "$VXLAN_DEV"
+ check_err $? "Failed to delete VNI"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI del, got $NOTIFY_COUNT"
+
+ log_test "VNI delete sends notification"
+}
+
+# Re-adding an existing VNI with the same attributes should not produce
+# a notification.
+test_vni_readd_no_notify()
+{
+ RET=0
+
+ bridge -n "$NS1" vni add vni 3000 dev "$VXLAN_DEV"
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 3000 dev "$VXLAN_DEV"
+ check_err $? "Failed to re-add VNI"
+
+ [ "$NOTIFY_COUNT" -eq 0 ]
+ check_err $? "Expected 0 notifications for VNI re-add, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 3000 dev "$VXLAN_DEV"
+
+ log_test "VNI re-add does not send spurious notification"
+}
+
+# Updating an existing VNI's remote should produce a notification.
+test_vni_update_remote_notify()
+{
+ RET=0
+
+ bridge -n "$NS1" vni add vni 5000 remote 10.0.0.2 dev "$VXLAN_DEV"
+
+ vni_notify_check \
+ bridge -n "$NS1" vni add vni 5000 remote 10.0.0.3 dev "$VXLAN_DEV"
+ check_err $? "Failed to update VNI remote"
+
+ [ "$NOTIFY_COUNT" -eq 1 ]
+ check_err $? "Expected 1 notification for VNI remote update, got $NOTIFY_COUNT"
+
+ bridge -n "$NS1" vni delete vni 5000 dev "$VXLAN_DEV"
+
+ log_test "VNI remote update sends notification"
+}
+
+# Deleting a non-existent VNI should not produce a notification.
+test_vni_del_nonexistent_no_notify()
+{
+ RET=0
+
+ vni_notify_check \
+ bridge -n "$NS1" vni delete vni 9999 dev "$VXLAN_DEV" 2>/dev/null
+
+ [ "$NOTIFY_COUNT" -eq 0 ]
+ check_err $? "Expected 0 notifications for non-existent VNI del, got $NOTIFY_COUNT"
+
+ log_test "Non-existent VNI delete does not send notification"
+}
+
+trap defer_scopes_cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 89489996fbc1..881f92d7a469 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3520,7 +3520,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm))
return;
- WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm));
+ WARN_ON_ONCE(!vcpu && refcount_read(&kvm->users_count) &&
+ !kvm_arch_allow_write_without_running_vcpu(kvm));
#endif
if (memslot && kvm_slot_dirty_track_enabled(memslot)) {