summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-05-23 10:03:58 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2026-05-23 10:03:58 +0200
commit37f32d5ab83968d63cfba6092ecaae3e582db964 (patch)
tree89302eb3515325d10aa669e592c3d1fe0ca29c95
parente23844b2ddbdd004285f14bdc672b4d854ad4c4e (diff)
parent9029496abfae3c208336855ae6f3e1f5f881ef76 (diff)
Merge tag 'kvm-s390-master-7.1-2' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: s390: some vSIE and UCONTROL fixes Fix some memory issues and some hangs in vSIE.
-rw-r--r--.mailmap2
-rw-r--r--Documentation/.renames.txt1
-rw-r--r--Documentation/crypto/krb5.rst17
-rw-r--r--Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml13
-rw-r--r--Documentation/networking/device_drivers/ethernet/3com/3c509.rst249
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst1
-rw-r--r--MAINTAINERS15
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arm/boot/dts/renesas/r7s72100-genmai.dts3
-rw-r--r--arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts2
-rw-r--r--arch/arm/boot/dts/renesas/r7s72100.dtsi2
-rw-r--r--arch/arm/boot/dts/renesas/r8a7778.dtsi2
-rw-r--r--arch/arm/boot/dts/renesas/r8a7779.dtsi2
-rw-r--r--arch/arm/boot/dts/renesas/r8a7792.dtsi2
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/mach-versatile/integrator_cp.c13
-rw-r--r--arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso5
-rw-r--r--arch/arm64/boot/dts/renesas/r8a78000.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r9a09g056.dtsi1
-rw-r--r--arch/arm64/boot/dts/renesas/r9a09g057.dtsi2
-rw-r--r--arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi5
-rw-r--r--arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi5
-rw-r--r--arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso5
-rw-r--r--arch/arm64/include/asm/page.h2
-rw-r--r--arch/arm64/include/asm/ring_buffer.h10
-rw-r--r--arch/arm64/mm/fault.c11
-rw-r--r--arch/csky/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/Kbuild1
-rw-r--r--arch/nios2/include/asm/Kbuild1
-rw-r--r--arch/openrisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi10
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts10
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts19
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-common.dtsi27
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110.dtsi28
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/kvm/dat.c1
-rw-r--r--arch/s390/kvm/dat.h3
-rw-r--r--arch/s390/kvm/gaccess.c1
-rw-r--r--arch/s390/kvm/gmap.c18
-rw-r--r--arch/s390/kvm/gmap.h61
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--crypto/krb5/krb5_api.c54
-rw-r--r--drivers/ata/libata-core.c9
-rw-r--r--drivers/ata/libata-eh.c8
-rw-r--r--drivers/ata/libata-pmp.c18
-rw-r--r--drivers/ata/libata-scsi.c89
-rw-r--r--drivers/ata/sata_sil24.c6
-rw-r--r--drivers/base/memory.c8
-rw-r--r--drivers/block/rbd.c20
-rw-r--r--drivers/bluetooth/btintel_pcie.c20
-rw-r--r--drivers/bluetooth/btintel_pcie.h3
-rw-r--r--drivers/bluetooth/btmtk.c6
-rw-r--r--drivers/bluetooth/hci_ldisc.c48
-rw-r--r--drivers/bluetooth/hci_qca.c33
-rw-r--r--drivers/dpll/zl3073x/dpll.c6
-rw-r--r--drivers/firmware/arm_ffa/bus.c4
-rw-r--r--drivers/firmware/arm_ffa/driver.c144
-rw-r--r--drivers/firmware/efi/efi.c28
-rw-r--r--drivers/firmware/efi/sysfb_efi.c9
-rw-r--r--drivers/firmware/psci/psci.c10
-rw-r--r--drivers/hid/hid-core.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c8
-rw-r--r--drivers/media/rc/ttusbir.c2
-rw-r--r--drivers/net/dsa/mt7530.c160
-rw-r--r--drivers/net/ethernet/3com/3c509.c1543
-rw-r--r--drivers/net/ethernet/3com/Kconfig14
-rw-r--r--drivers/net/ethernet/3com/Makefile1
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c10
-rw-r--r--drivers/net/ethernet/amd/pds_core/debugfs.c7
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c11
-rw-r--r--drivers/net/ethernet/amd/pds_core/devlink.c6
-rw-r--r--drivers/net/ethernet/atheros/ag71xx.c3
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c9
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_hw.h15
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_msg.c106
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c75
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c33
-rw-r--r--drivers/net/ethernet/intel/ice/virt/queues.c2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_tsn.c9
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/rep.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c5
-rw-r--r--drivers/net/ethernet/microsoft/mana/hw_channel.c29
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c2
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c8
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c126
-rw-r--r--drivers/net/ifb.c11
-rw-r--r--drivers/net/ovpn/io.c12
-rw-r--r--drivers/net/ovpn/main.c12
-rw-r--r--drivers/net/ovpn/netlink.c8
-rw-r--r--drivers/net/ovpn/peer.c23
-rw-r--r--drivers/net/ovpn/peer.h1
-rw-r--r--drivers/net/ovpn/stats.h16
-rw-r--r--drivers/net/ovpn/tcp.c19
-rw-r--r--drivers/net/ovpn/udp.c2
-rw-r--r--drivers/net/phy/phy-c45.c8
-rw-r--r--drivers/net/phy/phy_device.c6
-rw-r--r--drivers/net/pse-pd/pse_core.c2
-rw-r--r--drivers/net/tap.c2
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi.c15
-rw-r--r--drivers/net/wireless/ath/ath11k/dp_rx.c9
-rw-r--r--drivers/net/wireless/ath/ath11k/hal.c14
-rw-r--r--drivers/net/wireless/ath/ath11k/hal_rx.c5
-rw-r--r--drivers/net/wireless/ath/ath11k/testmode.c1
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.c131
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/constants.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/d3.c6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/link.c13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/tx.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c27
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/utils.c14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c6
-rw-r--r--drivers/net/wireless/microchip/wilc1000/wlan.c2
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_imem.c2
-rw-r--r--drivers/pci/controller/pcie-brcmstb.c4
-rw-r--r--fs/9p/v9fs_vfs.h13
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c12
-rw-r--r--fs/afs/Makefile1
-rw-r--r--fs/afs/dir.c79
-rw-r--r--fs/afs/file.c24
-rw-r--r--fs/afs/fsclient.c4
-rw-r--r--fs/afs/inode.c127
-rw-r--r--fs/afs/internal.h45
-rw-r--r--fs/afs/symlink.c278
-rw-r--r--fs/afs/validation.c14
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/afs/yfsclient.c4
-rw-r--r--fs/cachefiles/namei.c2
-rw-r--r--fs/erofs/xattr.c4
-rw-r--r--fs/erofs/zdata.c15
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/inode.c8
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/mnt_idmapping.c2
-rw-r--r--fs/netfs/buffered_read.c73
-rw-r--r--fs/netfs/buffered_write.c174
-rw-r--r--fs/netfs/direct_read.c42
-rw-r--r--fs/netfs/direct_write.c6
-rw-r--r--fs/netfs/internal.h3
-rw-r--r--fs/netfs/iterator.c41
-rw-r--r--fs/netfs/misc.c41
-rw-r--r--fs/netfs/read_collect.c19
-rw-r--r--fs/netfs/read_retry.c17
-rw-r--r--fs/netfs/read_single.c23
-rw-r--r--fs/netfs/write_collect.c15
-rw-r--r--fs/netfs/write_issue.c49
-rw-r--r--fs/netfs/write_retry.c6
-rw-r--r--fs/nsfs.c2
-rw-r--r--fs/ntfs/attrib.c46
-rw-r--r--fs/ntfs/attrlist.c7
-rw-r--r--fs/ntfs/bdev-io.c7
-rw-r--r--fs/ntfs/index.c17
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ntfs/logfile.c5
-rw-r--r--fs/ntfs/mft.c93
-rw-r--r--fs/ntfs/namei.c4
-rw-r--r--fs/ntfs/runlist.c9
-rw-r--r--fs/ntfs/super.c35
-rw-r--r--fs/orangefs/namei.c2
-rw-r--r--fs/select.c11
-rw-r--r--fs/smb/client/cifsfs.c40
-rw-r--r--fs/smb/client/cifssmb.c3
-rw-r--r--fs/smb/client/file.c13
-rw-r--r--fs/smb/client/inode.c14
-rw-r--r--fs/smb/client/readdir.c3
-rw-r--r--fs/smb/client/smb2ops.c42
-rw-r--r--fs/smb/client/smb2pdu.c3
-rw-r--r--fs/smb/server/oplock.c6
-rw-r--r--fs/smb/server/smbacl.c12
-rw-r--r--fs/smb/server/vfs_cache.c2
-rw-r--r--include/asm-generic/ring_buffer.h13
-rw-r--r--include/crypto/krb5.h9
-rw-r--r--include/linux/efi.h2
-rw-r--r--include/linux/gfp_types.h10
-rw-r--r--include/linux/highmem.h7
-rw-r--r--include/linux/libata.h7
-rw-r--r--include/linux/list.h37
-rw-r--r--include/linux/netfs.h295
-rw-r--r--include/linux/soc/airoha/airoha_offload.h6
-rw-r--r--include/net/bluetooth/bluetooth.h1
-rw-r--r--include/net/ip_vs.h3
-rw-r--r--include/net/neighbour.h8
-rw-r--r--include/net/net_shaper.h1
-rw-r--r--include/net/netfilter/nf_queue.h1
-rw-r--r--include/net/tcp.h7
-rw-r--r--include/trace/events/damon.h2
-rw-r--r--include/trace/events/netfs.h8
-rw-r--r--include/trace/events/rxrpc.h1
-rw-r--r--kernel/rcu/srcutree.c12
-rw-r--r--kernel/trace/Makefile3
-rw-r--r--kernel/trace/ring_buffer.c30
-rw-r--r--kernel/trace/simple_ring_buffer.c4
-rw-r--r--lib/tests/kunit_iov_iter.c10
-rw-r--r--mm/damon/sysfs-schemes.c1
-rw-r--r--mm/memory.c24
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/migrate_device.c2
-rw-r--r--mm/page_alloc.c8
-rw-r--r--net/appletalk/aarp.c5
-rw-r--r--net/batman-adv/bat_iv_ogm.c82
-rw-r--r--net/batman-adv/bat_v_ogm.c59
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c109
-rw-r--r--net/batman-adv/distributed-arp-table.c3
-rw-r--r--net/batman-adv/fragmentation.c58
-rw-r--r--net/batman-adv/gateway_client.c4
-rw-r--r--net/batman-adv/mesh-interface.c1
-rw-r--r--net/batman-adv/originator.c4
-rw-r--r--net/batman-adv/tp_meter.c117
-rw-r--r--net/batman-adv/translation-table.c55
-rw-r--r--net/batman-adv/tvlv.c28
-rw-r--r--net/batman-adv/tvlv.h2
-rw-r--r--net/batman-adv/types.h59
-rw-r--r--net/bluetooth/af_bluetooth.c97
-rw-r--r--net/bluetooth/bnep/core.c2
-rw-r--r--net/bluetooth/hci_sync.c6
-rw-r--r--net/bluetooth/iso.c14
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/l2cap_sock.c51
-rw-r--r--net/bluetooth/mgmt.c6
-rw-r--r--net/bluetooth/rfcomm/sock.c9
-rw-r--r--net/bluetooth/sco.c9
-rw-r--r--net/bridge/br_multicast.c22
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/bridge/br_netlink.c10
-rw-r--r--net/core/devmem.c11
-rw-r--r--net/core/gro.c7
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c11
-rw-r--r--net/core/skmsg.c9
-rw-r--r--net/hsr/hsr_framereg.c4
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp_offload.c22
-rw-r--r--net/ipv6/exthdrs.c21
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c4
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/mac80211/cfg.c5
-rw-r--r--net/mac80211/mlme.c5
-rw-r--r--net/mac80211/parse.c105
-rw-r--r--net/mac80211/rx.c6
-rw-r--r--net/mptcp/pm.c56
-rw-r--r--net/mptcp/protocol.c25
-rw-r--r--net/mptcp/sockopt.c10
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h57
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c187
-rw-r--r--net/netfilter/nf_conntrack_helper.c13
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_inner.c3
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/phonet/pep.c19
-rw-r--r--net/rds/tcp.c9
-rw-r--r--net/rxrpc/ar-internal.h14
-rw-r--r--net/rxrpc/call_event.c22
-rw-r--r--net/rxrpc/call_object.c2
-rw-r--r--net/rxrpc/conn_event.c32
-rw-r--r--net/rxrpc/insecure.c8
-rw-r--r--net/rxrpc/recvmsg.c68
-rw-r--r--net/rxrpc/rxgk.c160
-rw-r--r--net/rxrpc/rxgk_app.c46
-rw-r--r--net/rxrpc/rxgk_common.h66
-rw-r--r--net/rxrpc/rxkad.c115
-rw-r--r--net/shaper/shaper.c98
-rw-r--r--net/tls/tls_sw.c20
-rw-r--r--net/unix/af_unix.c11
-rw-r--r--net/vmw_vsock/virtio_transport_common.c112
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/scan.c3
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--scripts/gdb/linux/mm.py6
-rw-r--r--scripts/gdb/linux/slab.py4
-rw-r--r--scripts/mod/file2alias.c79
-rw-r--r--scripts/package/PKGBUILD2
-rw-r--r--security/lsm_syscalls.c9
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py37
-rw-r--r--tools/perf/Makefile5
-rw-r--r--tools/perf/Makefile.perf9
-rw-r--r--tools/perf/builtin-trace.c9
-rw-r--r--tools/perf/trace/beauty/beauty.h3
-rwxr-xr-xtools/perf/trace/beauty/clone.sh4
-rw-r--r--tools/perf/trace/beauty/fsmount.c18
-rwxr-xr-xtools/perf/trace/beauty/fsmount.sh11
-rw-r--r--tools/perf/trace/beauty/fsmount_attr.sh22
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h2
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h12
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/mount.h1
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/sched.h17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c103
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c21
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c50
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh30
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c55
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh6
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh31
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh10
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh2
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-mark.sh6
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh4
-rw-r--r--tools/testing/selftests/net/rds/config1
343 files changed, 6520 insertions, 2138 deletions
diff --git a/.mailmap b/.mailmap
index bd6a72f29d9c..99dba08041fa 100644
--- a/.mailmap
+++ b/.mailmap
@@ -584,6 +584,8 @@ Mayuresh Janorkar <mayur@ti.com>
Md Sadre Alam <quic_mdalam@quicinc.com> <mdalam@codeaurora.org>
Miaoqing Pan <quic_miaoqing@quicinc.com> <miaoqing@codeaurora.org>
Michael Buesch <m@bues.ch>
+Michal Grzeschik <mgr@kernel.org> <m.grzeschik@pengutronix.de>
+Michal Grzeschik <mgr@kernel.org> <mgr@pengutronix.de>
Michael Riesch <michael.riesch@collabora.com> <michael.riesch@wolfvision.net>
Michal Simek <michal.simek@amd.com> <michal.simek@xilinx.com>
Michel Dänzer <michel@tungstengraphics.com>
diff --git a/Documentation/.renames.txt b/Documentation/.renames.txt
index 43d44753ab93..aa7e5aa4a81b 100644
--- a/Documentation/.renames.txt
+++ b/Documentation/.renames.txt
@@ -786,6 +786,7 @@ networking/altera_tse networking/device_drivers/ethernet/altera/altera_tse
networking/bpf_flow_dissector bpf/prog_flow_dissector
networking/cxacru networking/device_drivers/atm/cxacru
networking/defza networking/device_drivers/fddi/defza
+networking/device_drivers/3com/3c509 networking/device_drivers/ethernet/3com/3c509
networking/device_drivers/3com/vortex networking/device_drivers/ethernet/3com/vortex
networking/device_drivers/amazon/ena networking/device_drivers/ethernet/amazon/ena
networking/device_drivers/aquantia/atlantic networking/device_drivers/ethernet/aquantia/atlantic
diff --git a/Documentation/crypto/krb5.rst b/Documentation/crypto/krb5.rst
index beffa0133446..f62e07ac6811 100644
--- a/Documentation/crypto/krb5.rst
+++ b/Documentation/crypto/krb5.rst
@@ -158,13 +158,22 @@ returned.
When a message has been received, the location and size of the data with the
message can be determined by calling::
- void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5,
- enum krb5_crypto_mode mode,
- size_t *_offset, size_t *_len);
+ int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t *_offset, size_t *_len);
The caller provides the offset and length of the message to the function, which
then alters those values to indicate the region containing the data (plus any
-padding). It is up to the caller to determine how much padding there is.
+padding). It is up to the caller to determine how much padding there is. The
+function returns an error if the length is too small or if the mode is
+unsupported. An additional function::
+
+ int crypto_krb5_check_data_len(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t len, size_t min_content);
+
+is provided to just do a basic check that the decrypted/verified message would
+have a sufficient minimum payload.
Preparation Functions
---------------------
diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
index 91e8cd1db67b..b66ae6300faf 100644
--- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
+++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml
@@ -73,6 +73,15 @@ properties:
HSP CSR is to control and get status of different high-speed peripherals
(such as Ethernet, USB, SATA, etc.) via register, which can tune
board-level's parameters of PHY, etc.
+
+ Additional background information about the High-Speed Subsystem
+ and the HSP CSR block is available in Chapter 10 ("High-Speed Interface")
+ of the EIC7700X SoC Technical Reference Manual, Part 4
+ (EIC7700X_SoC_Technical_Reference_Manual_Part4.pdf). The manual is
+ publicly available at
+ https://github.com/eswincomputing/EIC7700X-SoC-Technical-Reference-Manual/releases
+
+ This reference is provided for background information only.
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
- items:
@@ -82,6 +91,8 @@ properties:
- description: Offset of AXI clock controller Low-Power request
register
- description: Offset of register controlling TX/RX clock delay
+ - description: Optional offset of register controlling TXD delay
+ - description: Optional offset of register controlling RXD delay
required:
- compatible
@@ -116,7 +127,7 @@ examples:
reset-names = "stmmaceth";
rx-internal-delay-ps = <200>;
tx-internal-delay-ps = <200>;
- eswin,hsp-sp-csr = <&hsp_sp_csr 0x100 0x108 0x118>;
+ eswin,hsp-sp-csr = <&hsp_sp_csr 0x100 0x108 0x118 0x114 0x11c>;
snps,axi-config = <&stmmac_axi_setup>;
snps,aal;
snps,fixed-burst;
diff --git a/Documentation/networking/device_drivers/ethernet/3com/3c509.rst b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst
new file mode 100644
index 000000000000..a8c5e5e6841d
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst
@@ -0,0 +1,249 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================================================
+Linux and the 3Com EtherLink III Series Ethercards (driver v1.18c and higher)
+=============================================================================
+
+This file contains the instructions and caveats for v1.18c and higher versions
+of the 3c509 driver. You should not use the driver without reading this file.
+
+release 1.0
+
+28 February 2002
+
+Current maintainer (corrections to):
+ Maciej W. Rozycki <macro@orcam.me.uk>
+
+Introduction
+============
+
+The following are notes and information on using the 3Com EtherLink III series
+ethercards in Linux. These cards are commonly known by the most widely-used
+card's 3Com model number, 3c509. They are all 10mb/s ISA-bus cards and shouldn't
+be (but sometimes are) confused with the similarly-numbered PCI-bus "3c905"
+(aka "Vortex" or "Boomerang") series. Kernel support for the 3c509 family is
+provided by the module 3c509.c, which has code to support all of the following
+models:
+
+ - 3c509 (original ISA card)
+ - 3c509B (later revision of the ISA card; supports full-duplex)
+ - 3c589 (PCMCIA)
+ - 3c589B (later revision of the 3c589; supports full-duplex)
+ - 3c579 (EISA)
+
+Large portions of this documentation were heavily borrowed from the guide
+written the original author of the 3c509 driver, Donald Becker. The master
+copy of that document, which contains notes on older versions of the driver,
+currently resides on Scyld web server: http://www.scyld.com/.
+
+
+Special Driver Features
+=======================
+
+Overriding card settings
+
+The driver allows boot- or load-time overriding of the card's detected IOADDR,
+IRQ, and transceiver settings, although this capability shouldn't generally be
+needed except to enable full-duplex mode (see below). An example of the syntax
+for LILO parameters for doing this::
+
+ ether=10,0x310,3,0x3c509,eth0
+
+This configures the first found 3c509 card for IRQ 10, base I/O 0x310, and
+transceiver type 3 (10base2). The flag "0x3c509" must be set to avoid conflicts
+with other card types when overriding the I/O address. When the driver is
+loaded as a module, only the IRQ may be overridden. For example,
+setting two cards to IRQ10 and IRQ11 is done by using the irq module
+option::
+
+ options 3c509 irq=10,11
+
+
+Full-duplex mode
+================
+
+The v1.18c driver added support for the 3c509B's full-duplex capabilities.
+In order to enable and successfully use full-duplex mode, three conditions
+must be met:
+
+(a) You must have a Etherlink III card model whose hardware supports full-
+duplex operations. Currently, the only members of the 3c509 family that are
+positively known to support full-duplex are the 3c509B (ISA bus) and 3c589B
+(PCMCIA) cards. Cards without the "B" model designation do *not* support
+full-duplex mode; these include the original 3c509 (no "B"), the original
+3c589, the 3c529 (MCA bus), and the 3c579 (EISA bus).
+
+(b) You must be using your card's 10baseT transceiver (i.e., the RJ-45
+connector), not its AUI (thick-net) or 10base2 (thin-net/coax) interfaces.
+AUI and 10base2 network cabling is physically incapable of full-duplex
+operation.
+
+(c) Most importantly, your 3c509B must be connected to a link partner that is
+itself full-duplex capable. This is almost certainly one of two things: a full-
+duplex-capable Ethernet switch (*not* a hub), or a full-duplex-capable NIC on
+another system that's connected directly to the 3c509B via a crossover cable.
+
+Full-duplex mode can be enabled using 'ethtool'.
+
+.. warning::
+
+ Extremely important caution concerning full-duplex mode
+
+ Understand that the 3c509B's hardware's full-duplex support is much more
+ limited than that provide by more modern network interface cards. Although
+ at the physical layer of the network it fully supports full-duplex operation,
+ the card was designed before the current Ethernet auto-negotiation (N-way)
+ spec was written. This means that the 3c509B family ***cannot and will not
+ auto-negotiate a full-duplex connection with its link partner under any
+ circumstances, no matter how it is initialized***. If the full-duplex mode
+ of the 3c509B is enabled, its link partner will very likely need to be
+ independently _forced_ into full-duplex mode as well; otherwise various nasty
+ failures will occur - at the very least, you'll see massive numbers of packet
+ collisions. This is one of very rare circumstances where disabling auto-
+ negotiation and forcing the duplex mode of a network interface card or switch
+ would ever be necessary or desirable.
+
+
+Available Transceiver Types
+===========================
+
+For versions of the driver v1.18c and above, the available transceiver types are:
+
+== =========================================================================
+0 transceiver type from EEPROM config (normally 10baseT); force half-duplex
+1 AUI (thick-net / DB15 connector)
+2 (undefined)
+3 10base2 (thin-net == coax / BNC connector)
+4 10baseT (RJ-45 connector); force half-duplex mode
+8 transceiver type and duplex mode taken from card's EEPROM config settings
+12 10baseT (RJ-45 connector); force full-duplex mode
+== =========================================================================
+
+Prior to driver version 1.18c, only transceiver codes 0-4 were supported. Note
+that the new transceiver codes 8 and 12 are the *only* ones that will enable
+full-duplex mode, no matter what the card's detected EEPROM settings might be.
+This insured that merely upgrading the driver from an earlier version would
+never automatically enable full-duplex mode in an existing installation;
+it must always be explicitly enabled via one of these code in order to be
+activated.
+
+The transceiver type can be changed using 'ethtool'.
+
+
+Interpretation of error messages and common problems
+----------------------------------------------------
+
+Error Messages
+^^^^^^^^^^^^^^
+
+eth0: Infinite loop in interrupt, status 2011.
+These are "mostly harmless" message indicating that the driver had too much
+work during that interrupt cycle. With a status of 0x2011 you are receiving
+packets faster than they can be removed from the card. This should be rare
+or impossible in normal operation. Possible causes of this error report are:
+
+ - a "green" mode enabled that slows the processor down when there is no
+ keyboard activity.
+
+ - some other device or device driver hogging the bus or disabling interrupts.
+ Check /proc/interrupts for excessive interrupt counts. The timer tick
+ interrupt should always be incrementing faster than the others.
+
+No received packets
+^^^^^^^^^^^^^^^^^^^
+
+If a 3c509, 3c562 or 3c589 can successfully transmit packets, but never
+receives packets (as reported by /proc/net/dev or 'ifconfig') you likely
+have an interrupt line problem. Check /proc/interrupts to verify that the
+card is actually generating interrupts. If the interrupt count is not
+increasing you likely have a physical conflict with two devices trying to
+use the same ISA IRQ line. The common conflict is with a sound card on IRQ10
+or IRQ5, and the easiest solution is to move the 3c509 to a different
+interrupt line. If the device is receiving packets but 'ping' doesn't work,
+you have a routing problem.
+
+Tx Carrier Errors Reported in /proc/net/dev
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+If an EtherLink III appears to transmit packets, but the "Tx carrier errors"
+field in /proc/net/dev increments as quickly as the Tx packet count, you
+likely have an unterminated network or the incorrect media transceiver selected.
+
+3c509B card is not detected on machines with an ISA PnP BIOS.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+While the updated driver works with most PnP BIOS programs, it does not work
+with all. This can be fixed by disabling PnP support using the 3Com-supplied
+setup program.
+
+3c509 card is not detected on overclocked machines
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Increase the delay time in id_read_eeprom() from the current value, 500,
+to an absurdly high value, such as 5000.
+
+
+Decoding Status and Error Messages
+----------------------------------
+
+
+The bits in the main status register are:
+
+===== ======================================
+value description
+===== ======================================
+0x01 Interrupt latch
+0x02 Tx overrun, or Rx underrun
+0x04 Tx complete
+0x08 Tx FIFO room available
+0x10 A complete Rx packet has arrived
+0x20 A Rx packet has started to arrive
+0x40 The driver has requested an interrupt
+0x80 Statistics counter nearly full
+===== ======================================
+
+The bits in the transmit (Tx) status word are:
+
+===== ============================================
+value description
+===== ============================================
+0x02 Out-of-window collision.
+0x04 Status stack overflow (normally impossible).
+0x08 16 collisions.
+0x10 Tx underrun (not enough PCI bus bandwidth).
+0x20 Tx jabber.
+0x40 Tx interrupt requested.
+0x80 Status is valid (this should always be set).
+===== ============================================
+
+
+When a transmit error occurs the driver produces a status message such as::
+
+ eth0: Transmit error, Tx status register 82
+
+The two values typically seen here are:
+
+0x82
+^^^^
+
+Out of window collision. This typically occurs when some other Ethernet
+host is incorrectly set to full duplex on a half duplex network.
+
+0x88
+^^^^
+
+16 collisions. This typically occurs when the network is exceptionally busy
+or when another host doesn't correctly back off after a collision. If this
+error is mixed with 0x82 errors it is the result of a host incorrectly set
+to full duplex (see above).
+
+Both of these errors are the result of network problems that should be
+corrected. They do not represent driver malfunction.
+
+
+Revision history (this file)
+============================
+
+28Feb02 v1.0 DR New; major portions based on Becker original 3c509 docs
+
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 64621c21fd78..1d25be493ae9 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -10,6 +10,7 @@ Contents:
.. toctree::
:maxdepth: 2
+ 3com/3c509
3com/vortex
amazon/ena
altera/altera_tse
diff --git a/MAINTAINERS b/MAINTAINERS
index c2c6d79275c6..8d173418d483 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2064,7 +2064,7 @@ F: Documentation/devicetree/bindings/display/snps,arcpgu.txt
F: drivers/gpu/drm/tiny/arcpgu.c
ARCNET NETWORK LAYER
-M: Michael Grzeschik <m.grzeschik@pengutronix.de>
+M: Michael Grzeschik <mgr@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/arcnet/
@@ -3367,7 +3367,9 @@ F: drivers/irqchip/irq-rda-intc.c
F: drivers/tty/serial/rda-uart.c
ARM/REALTEK ARCHITECTURE
-M: Andreas Färber <afaerber@suse.de>
+M: James Tai <james.tai@realtek.com>
+M: Yu-Chun Lin <eleanor.lin@realtek.com>
+R: Andreas Färber <afaerber@suse.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-realtek-soc@lists.infradead.org (moderated for non-subscribers)
S: Maintained
@@ -3375,6 +3377,7 @@ F: Documentation/devicetree/bindings/arm/realtek.yaml
F: arch/arm/boot/dts/realtek/
F: arch/arm/mach-realtek/
F: arch/arm64/boot/dts/realtek/
+F: drivers/pinctrl/realtek/
ARM/RISC-V/RENESAS ARCHITECTURE
M: Geert Uytterhoeven <geert+renesas@glider.be>
@@ -13869,6 +13872,7 @@ M: Pratyush Yadav <pratyush@kernel.org>
R: Dave Young <ruirui.yang@linux.dev>
L: kexec@lists.infradead.org
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git
F: Documentation/admin-guide/kdump/
F: fs/proc/vmcore.c
F: include/linux/crash_core.h
@@ -14186,6 +14190,7 @@ M: Pasha Tatashin <pasha.tatashin@soleen.com>
M: Pratyush Yadav <pratyush@kernel.org>
L: kexec@lists.infradead.org
W: http://kernel.org/pub/linux/utils/kernel/kexec/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git
F: include/linux/kexec.h
F: include/uapi/linux/kexec.h
F: kernel/kexec*
@@ -14902,6 +14907,7 @@ LIVE UPDATE
M: Pasha Tatashin <pasha.tatashin@soleen.com>
M: Mike Rapoport <rppt@kernel.org>
M: Pratyush Yadav <pratyush@kernel.org>
+L: kexec@lists.infradead.org
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git
@@ -18632,6 +18638,7 @@ F: tools/testing/selftests/net/
X: Documentation/networking/mac80211-injection.rst
X: Documentation/networking/mac80211_hwsim/
X: Documentation/networking/regulatory.rst
+X: include/net/bluetooth/
X: include/net/cfg80211.h
X: include/net/ieee80211_radiotap.h
X: include/net/iw_handler.h
@@ -20737,15 +20744,13 @@ F: Documentation/devicetree/bindings/pci/intel,keembay-pcie*
F: drivers/pci/controller/dwc/pcie-keembay.c
PCIE DRIVER FOR INTEL LGM GW SOC
-M: Chuanhua Lei <lchuanhua@maxlinear.com>
L: linux-pci@vger.kernel.org
-S: Maintained
+S: Orphan
F: Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml
F: drivers/pci/controller/dwc/pcie-intel-gw.c
PCIE DRIVER FOR MEDIATEK
M: Ryder Lee <ryder.lee@mediatek.com>
-M: Jianjun Wang <jianjun.wang@mediatek.com>
L: linux-pci@vger.kernel.org
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
S: Supported
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 483965c5a4de..b154b4e3dfa8 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += agp.h
generic-y += asm-offsets.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 4c69522e0328..483caacc6988 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += text-patching.h
diff --git a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts
index 3c3756509714..da552a66615e 100644
--- a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts
+++ b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts
@@ -34,9 +34,6 @@
clocks = <&mstp9_clks R7S72100_CLK_SPIBSC0>;
power-domains = <&cpg_clocks>;
- #address-cells = <1>;
- #size-cells = <1>;
-
partitions {
compatible = "fixed-partitions";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts
index 91178fb9e721..3306bc9b7bc3 100644
--- a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts
+++ b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts
@@ -36,8 +36,6 @@
power-domains = <&cpg_clocks>;
bank-width = <4>;
device-width = <1>;
- #address-cells = <1>;
- #size-cells = <1>;
partitions {
compatible = "fixed-partitions";
diff --git a/arch/arm/boot/dts/renesas/r7s72100.dtsi b/arch/arm/boot/dts/renesas/r7s72100.dtsi
index 245c26bb8e03..6ec57ffa72e8 100644
--- a/arch/arm/boot/dts/renesas/r7s72100.dtsi
+++ b/arch/arm/boot/dts/renesas/r7s72100.dtsi
@@ -37,7 +37,7 @@
clock-div = <3>;
};
- bsc: bus {
+ bsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r8a7778.dtsi b/arch/arm/boot/dts/renesas/r8a7778.dtsi
index 859dd29dfce3..7db456b19795 100644
--- a/arch/arm/boot/dts/renesas/r8a7778.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7778.dtsi
@@ -40,7 +40,7 @@
spi2 = &hspi2;
};
- lbsc: bus {
+ lbsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r8a7779.dtsi b/arch/arm/boot/dts/renesas/r8a7779.dtsi
index e437c22f452d..9e8a7e190c89 100644
--- a/arch/arm/boot/dts/renesas/r8a7779.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7779.dtsi
@@ -704,7 +704,7 @@
};
};
- lbsc: bus {
+ lbsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/renesas/r8a7792.dtsi b/arch/arm/boot/dts/renesas/r8a7792.dtsi
index 9e0de69ac3a3..fbdbcff1cbed 100644
--- a/arch/arm/boot/dts/renesas/r8a7792.dtsi
+++ b/arch/arm/boot/dts/renesas/r8a7792.dtsi
@@ -86,7 +86,7 @@
bootph-all;
};
- lbsc: bus {
+ lbsc: bus@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 03657ff8fbe3..decad5f2c826 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += early_ioremap.h
generic-y += extable.h
generic-y += flat.h
generic-y += parport.h
+generic-y += ring_buffer.h
generated-y += mach-types.h
generated-y += unistd-nr.h
diff --git a/arch/arm/mach-versatile/integrator_cp.c b/arch/arm/mach-versatile/integrator_cp.c
index 2ed4ded56b3f..03dfb5f720b7 100644
--- a/arch/arm/mach-versatile/integrator_cp.c
+++ b/arch/arm/mach-versatile/integrator_cp.c
@@ -86,14 +86,6 @@ static u64 notrace intcp_read_sched_clock(void)
return val;
}
-static void __init intcp_init_early(void)
-{
- cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator");
- if (IS_ERR(cm_map))
- return;
- sched_clock_register(intcp_read_sched_clock, 32, 24000000);
-}
-
static void __init intcp_init_irq_of(void)
{
cm_init();
@@ -119,6 +111,10 @@ static void __init intcp_init_of(void)
{
struct device_node *cpcon;
+ cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator");
+ if (!IS_ERR(cm_map))
+ sched_clock_register(intcp_read_sched_clock, 32, 24000000);
+
cpcon = of_find_matching_node(NULL, intcp_syscon_match);
if (!cpcon)
return;
@@ -138,7 +134,6 @@ static const char * intcp_dt_board_compat[] = {
DT_MACHINE_START(INTEGRATOR_CP_DT, "ARM Integrator/CP (Device Tree)")
.reserve = integrator_reserve,
.map_io = intcp_map_io,
- .init_early = intcp_init_early,
.init_irq = intcp_init_irq_of,
.init_machine = intcp_init_of,
.dt_compat = intcp_dt_board_compat,
diff --git a/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso b/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso
index 258f8668ca36..90767d74e21b 100644
--- a/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso
+++ b/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso
@@ -27,7 +27,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@1 {
+ reg = <1>;
+
lvds1_out: endpoint {
remote-endpoint = <&panel_in>;
};
diff --git a/arch/arm64/boot/dts/renesas/r8a78000.dtsi b/arch/arm64/boot/dts/renesas/r8a78000.dtsi
index 3e1c98903cea..3ec1b53d2782 100644
--- a/arch/arm64/boot/dts/renesas/r8a78000.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a78000.dtsi
@@ -699,7 +699,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0700000 0 0x40>;
interrupts = <GIC_ESPI 10 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
@@ -709,7 +709,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0704000 0 0x40>;
interrupts = <GIC_ESPI 11 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
@@ -719,7 +719,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc0708000 0 0x40>;
interrupts = <GIC_ESPI 12 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
@@ -729,7 +729,7 @@
"renesas,rcar-gen5-scif", "renesas,scif";
reg = <0 0xc070c000 0 0x40>;
interrupts = <GIC_ESPI 13 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>;
+ clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/renesas/r9a09g056.dtsi b/arch/arm64/boot/dts/renesas/r9a09g056.dtsi
index 40525470194e..7ccddd6a4a9a 100644
--- a/arch/arm64/boot/dts/renesas/r9a09g056.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a09g056.dtsi
@@ -1327,6 +1327,7 @@
resets = <&cpg 0xaf>;
power-domains = <&cpg>;
#reset-cells = <0>;
+ #mux-state-cells = <1>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
index 9581af58024e..6f6fe5f36bef 100644
--- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
@@ -1345,6 +1345,7 @@
resets = <&cpg 0xaf>;
power-domains = <&cpg>;
#reset-cells = <0>;
+ #mux-state-cells = <1>;
status = "disabled";
};
@@ -1355,6 +1356,7 @@
resets = <&cpg 0xaf>;
power-domains = <&cpg>;
#reset-cells = <0>;
+ #mux-state-cells = <1>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi
index 4d2b0655859a..3feffa4f16a9 100644
--- a/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi
+++ b/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi
@@ -46,7 +46,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@0 {
+ reg = <0>;
+
csi2_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi
index 36707576030d..f5412578ee65 100644
--- a/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi
+++ b/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi
@@ -26,7 +26,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@0 {
+ reg = <0>;
+
du_out_rgb: endpoint {
remote-endpoint = <&adv7513_in>;
};
diff --git a/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso b/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso
index c83a30adc6ad..7807c3f80409 100644
--- a/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso
+++ b/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso
@@ -27,7 +27,12 @@
status = "okay";
ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
port@1 {
+ reg = <1>;
+
lvds0_out: endpoint {
remote-endpoint = <&panel_in>;
};
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index e25d0d18f6d7..58200de8a221 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr);
#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
-bool tag_clear_highpages(struct page *to, int numpages);
+bool tag_clear_highpages(struct page *to, int numpages, bool clear_pages);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
diff --git a/arch/arm64/include/asm/ring_buffer.h b/arch/arm64/include/asm/ring_buffer.h
new file mode 100644
index 000000000000..62316c406888
--- /dev/null
+++ b/arch/arm64/include/asm/ring_buffer.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_ARM64_RING_BUFFER_H
+#define _ASM_ARM64_RING_BUFFER_H
+
+#include <asm/cacheflush.h>
+
+/* Flush D-cache on persistent ring buffer */
+#define arch_ring_buffer_flush_range(start, end) dcache_clean_pop(start, end)
+
+#endif /* _ASM_ARM64_RING_BUFFER_H */
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 0f3c5c7ca054..739800835920 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -1018,7 +1018,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
return vma_alloc_folio(flags, 0, vma, vaddr);
}
-bool tag_clear_highpages(struct page *page, int numpages)
+bool tag_clear_highpages(struct page *page, int numpages, bool clear_pages)
{
/*
* Check if MTE is supported and fall back to clear_highpage().
@@ -1026,13 +1026,16 @@ bool tag_clear_highpages(struct page *page, int numpages)
* post_alloc_hook() will invoke tag_clear_highpages().
*/
if (!system_supports_mte())
- return false;
+ return clear_pages;
/* Newly allocated pages, shouldn't have been tagged yet */
for (int i = 0; i < numpages; i++, page++) {
WARN_ON_ONCE(!try_page_mte_tagging(page));
- mte_zero_clear_page_tags(page_address(page));
+ if (clear_pages)
+ mte_zero_clear_page_tags(page_address(page));
+ else
+ mte_clear_page_tags(page_address(page));
set_page_mte_tagged(page);
}
- return true;
+ return false;
}
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 3a5c7f6e5aac..7dca0c6cdc84 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += qspinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += vmlinux.lds.h
generic-y += text-patching.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 1efa1e993d4b..0f887d4238ed 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += extable.h
generic-y += iomap.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 9034b583a88a..7e92957baf6a 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -10,5 +10,6 @@ generic-y += qrwlock.h
generic-y += user.h
generic-y += ioctl.h
generic-y += mmzone.h
+generic-y += ring_buffer.h
generic-y += statfs.h
generic-y += text-patching.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index b282e0dd8dc1..62543bf305ff 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -3,5 +3,6 @@ generated-y += syscall_table.h
generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += spinlock.h
generic-y += text-patching.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 7178f990e8b3..0030309b47ad 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += syscalls.h
generic-y += tlb.h
generic-y += user.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 684569b2ecd6..9771c3d85074 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -12,5 +12,6 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += qrwlock.h
generic-y += qspinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += text-patching.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 28004301c236..0a2530964413 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cmpxchg.h
generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += spinlock.h
generic-y += user.h
generic-y += text-patching.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index cef49d60d74c..8aa34621702d 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -8,4 +8,5 @@ generic-y += spinlock_types.h
generic-y += spinlock.h
generic-y += qrwlock_types.h
generic-y += qrwlock.h
+generic-y += ring_buffer.h
generic-y += user.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 4fb596d94c89..d48d158f7241 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -4,4 +4,5 @@ generated-y += syscall_table_64.h
generic-y += agp.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index ccabc6e17168..eda1fec7ffd9 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -393,6 +393,7 @@ CONFIG_NETCONSOLE=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
+CONFIG_EL3=m
CONFIG_VORTEX=m
CONFIG_TYPHOON=m
CONFIG_ADAPTEC_STARFIRE=m
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 2e23533b67e3..805b5aeebb6f 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -5,4 +5,5 @@ generated-y += syscall_table_spu.h
generic-y += agp.h
generic-y += mcs_spinlock.h
generic-y += qrwlock.h
+generic-y += ring_buffer.h
generic-y += early_ioremap.h
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
index 2d14e92f068d..9078e5b1e49c 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi
@@ -101,16 +101,6 @@
status = "okay";
};
-&i2c0 {
- pinctrl-names = "default";
- pinctrl-0 = <&i2c0_fabric>;
-};
-
-&i2c1 {
- pinctrl-names = "default";
- pinctrl-0 = <&i2c1_mssio>;
-};
-
&mmuart1 {
pinctrl-names = "default";
pinctrl-0 = <&uart1_fabric>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts
index 8afedece89d1..636493f6584d 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts
@@ -14,6 +14,16 @@
"microchip,mpfs";
};
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_fabric>;
+};
+
+&i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_mssio>;
+};
+
&syscontroller {
microchip,bitstream-flash = <&sys_ctrl_flash>;
};
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
index 556aa9638282..6fadce815c9a 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
@@ -11,3 +11,22 @@
"microchip,mpfs-icicle-kit",
"microchip,mpfs";
};
+
+&i2c0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_fabric>;
+};
+
+/*
+ * Due to silicon errata, routing via MSS IOs doesn't work on ES devices.
+ * Instead, i2c1, appearing on B1/C1, which are normally MSS IOs, is routed
+ * via the fabric and back to B1/C1 via "fabric-test" functionality.
+ * This is done silently by Libero, so the iomux0 setting for i2c1 has to
+ * be fabric IO, despite tooling etc saying that MSS IOs are used.
+ *
+ * See Section 3.3 of https://ww1.microchip.com/downloads/aemDocuments/documents/FPGA/ProductDocuments/Errata/polarfiresoc/microsemi_polarfire_soc_fpga_egineering_samples_errata_er0219_v1.pdf
+ */
+&i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_fabric>;
+};
diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
index 8cfe8033305d..a7a1c09a2c90 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
@@ -135,29 +135,6 @@
clock-frequency = <49152000>;
};
-&camss {
- assigned-clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>,
- <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>;
- assigned-clock-rates = <49500000>, <198000000>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
-
- port@0 {
- reg = <0>;
- };
-
- port@1 {
- reg = <1>;
-
- camss_from_csi2rx: endpoint {
- remote-endpoint = <&csi2rx_to_camss>;
- };
- };
- };
-};
-
&csi2rx {
assigned-clocks = <&ispcrg JH7110_ISPCLK_VIN_SYS>;
assigned-clock-rates = <297000000>;
@@ -175,9 +152,7 @@
port@1 {
reg = <1>;
- csi2rx_to_camss: endpoint {
- remote-endpoint = <&camss_from_csi2rx>;
- };
+ /* remote CAMSS endpoint */
};
};
};
diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi
index 6e56e9d20bb0..9c3e4598747e 100644
--- a/arch/riscv/boot/dts/starfive/jh7110.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi
@@ -1199,34 +1199,6 @@
#phy-cells = <0>;
};
- camss: isp@19840000 {
- compatible = "starfive,jh7110-camss";
- reg = <0x0 0x19840000 0x0 0x10000>,
- <0x0 0x19870000 0x0 0x30000>;
- reg-names = "syscon", "isp";
- clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>,
- <&ispcrg JH7110_ISPCLK_ISPV2_TOP_WRAPPER_C>,
- <&ispcrg JH7110_ISPCLK_DVP_INV>,
- <&ispcrg JH7110_ISPCLK_VIN_P_AXI_WR>,
- <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>,
- <&syscrg JH7110_SYSCLK_ISP_TOP_CORE>,
- <&syscrg JH7110_SYSCLK_ISP_TOP_AXI>;
- clock-names = "apb_func", "wrapper_clk_c", "dvp_inv",
- "axiwr", "mipi_rx0_pxl", "ispcore_2x",
- "isp_axi";
- resets = <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_P>,
- <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_C>,
- <&ispcrg JH7110_ISPRST_VIN_P_AXI_RD>,
- <&ispcrg JH7110_ISPRST_VIN_P_AXI_WR>,
- <&syscrg JH7110_SYSRST_ISP_TOP>,
- <&syscrg JH7110_SYSRST_ISP_TOP_AXI>;
- reset-names = "wrapper_p", "wrapper_c", "axird",
- "axiwr", "isp_top_n", "isp_top_axi";
- power-domains = <&pwrc JH7110_PD_ISP>;
- interrupts = <92>, <87>, <90>, <88>;
- status = "disabled";
- };
-
voutcrg: clock-controller@295c0000 {
compatible = "starfive,jh7110-voutcrg";
reg = <0x0 0x295c0000 0x0 0x10000>;
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index bd5fc9403295..7721b63642f4 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -14,5 +14,6 @@ generic-y += ticket_spinlock.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
generic-y += qspinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 80bad7de7a04..0c1fc47c3ba0 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,3 +7,4 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += mcs_spinlock.h
generic-y += mmzone.h
+generic-y += ring_buffer.h
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 7b8d70fe406d..4a41c0247ffa 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
pgste_init.vsie_notif = old.s.fc1.vsie_notif;
+ pgste_init.vsie_gmem = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 8f8278c44879..873e13ac5a27 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -145,7 +145,8 @@ union pgste {
unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 5;
+ unsigned long vsie_gmem : 1; /* Contains nested guest memory */
+ unsigned long : 4;
unsigned long : 8;
};
struct {
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index b07accd19618..4f8d5592c9a9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
} else {
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
pgste.vsie_notif = 1;
+ pgste.vsie_gmem = 1;
}
pgste_set_unlock(ptep_h, pgste);
if (rc)
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 3c26e35af0ef..957126ab991c 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -125,7 +125,7 @@ struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit)
int gmap_set_limit(struct gmap *gmap, gfn_t limit)
{
- struct kvm_s390_mmu_cache *mc;
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
int rc, type;
type = gmap_limit_to_type(limit);
@@ -142,7 +142,6 @@ int gmap_set_limit(struct gmap *gmap, gfn_t limit)
rc = dat_set_asce_limit(mc, &gmap->asce, type);
} while (rc == -ENOMEM);
- kvm_s390_free_mmu_cache(mc);
return 0;
}
@@ -822,8 +821,8 @@ int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t
int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count)
{
- struct kvm_s390_mmu_cache *mc;
- int rc;
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
+ int rc = 0;
mc = kvm_s390_new_mmu_cache();
if (!mc)
@@ -1026,13 +1025,15 @@ int gmap_insert_rmap(struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, int level)
int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn,
kvm_pfn_t pfn, int level, bool wr)
{
+ unsigned long bitmask;
union crste *crstep;
union pgste pgste;
union pte *ptep;
union pte pte;
int flags, rc;
- KVM_BUG_ON(!is_shadow(sg), sg->kvm);
+ if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm))
+ return -EINVAL;
lockdep_assert_held(&sg->parent->children_lock);
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
@@ -1041,8 +1042,9 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf
if (rc)
return rc;
if (level <= TABLE_TYPE_REGION1) {
+ bitmask = -1UL << (8 + 11 * level);
scoped_guard(spinlock, &sg->host_to_rmap_lock)
- rc = gmap_insert_rmap(sg, p_gfn, r_gfn, level);
+ rc = gmap_insert_rmap(sg, p_gfn, r_gfn & bitmask, level);
}
if (rc)
return rc;
@@ -1143,8 +1145,10 @@ void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn)
}
scoped_guard(spinlock, &sg->host_to_rmap_lock)
head = radix_tree_delete(&sg->host_to_rmap, gfn);
- gmap_for_each_rmap_safe(rmap, rnext, head)
+ gmap_for_each_rmap_safe(rmap, rnext, head) {
gmap_unshadow_level(sg, rmap->r_gfn, rmap->level);
+ kfree(rmap);
+ }
}
}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index 96ee1395a592..742e42a31744 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -167,6 +167,36 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
return _gmap_unmap_prefix(gmap, gfn, end, false);
}
+/**
+ * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing.
+ * @oldpte: the previous value for the guest pte.
+ * @newpte: the new pte being set.
+ * @pgste: the pgste for the pte entry.
+ *
+ * If the pgste.vsie_notif bit is not set, return false: the page is not
+ * involved in vsie and thus should not trigger an unshadow operation.
+ *
+ * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest
+ * memory. The access rights on g3's memory should be synchronized with g1's
+ * and g2's. Therefore unshadowing is triggered if the new and old pte
+ * differ in protection, or if the new pte is invalid.
+ *
+ * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables
+ * for g3. If the entry becomes writable or absent, it becomes impossible to
+ * guarantee that the shadow mapping will match g2's mapping. In that case,
+ * trigger an unshadow event.
+ *
+ * Return: true if an unshadow event should be triggered, otherwise false.
+ */
+static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste)
+{
+ if (!pgste.vsie_notif)
+ return false;
+ if (pgste.vsie_gmem)
+ return (oldpte.h.p != newpte.h.p) || newpte.h.i;
+ return !newpte.h.p || !newpte.s.pr;
+}
+
static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
union pgste pgste, gfn_t gfn, bool needs_lock)
{
@@ -180,8 +210,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
pgste.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + 1);
}
- if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) {
+ if (pte_needs_unshadow(*ptep, newpte, pgste)) {
pgste.vsie_notif = 0;
+ pgste.vsie_gmem = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
@@ -189,6 +220,7 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
}
if (!ptep->s.d && newpte.s.d && !newpte.s.s)
SetPageDirty(pfn_to_page(newpte.h.pfra));
+ pgste.zero = 0;
return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
}
@@ -198,6 +230,30 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
+/**
+ * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing.
+ * @oldcrste: the previous value for the crste.
+ * @newcrste: the new value for the crste.
+ *
+ * If the old crste did not have the vsie_notif bit set, return false: the
+ * page is not involved in vsie and thus should not trigger an unshadow
+ * operation. Conversely, if the bit is set, it can only be g3 memory, since
+ * dat tables are never mapped using large pages.
+ *
+ * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the
+ * protection bit is changing or the new page is invalid, trigger an
+ * unshadow event. Also trigger an unshadow event if the new crste does not
+ * have the vsie_notif bit set.
+ *
+ * Return: true if an unshadow event should be triggered, otherwise false.
+ */
+static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste)
+{
+ if (!oldcrste.s.fc1.vsie_notif)
+ return false;
+ return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif;
+}
+
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
union crste oldcrste, union crste newcrste,
gfn_t gfn, bool needs_lock)
@@ -216,8 +272,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
- if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
- (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
+ if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 4d3f10ed8275..f0403d3ee8ab 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -3,4 +3,5 @@ generated-y += syscall_table.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += parport.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 17ee8a273aa6..49c6bb326b75 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -4,4 +4,5 @@ generated-y += syscall_table_64.h
generic-y += agp.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
generic-y += text-patching.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 1b9b82bbe322..2a1629ba8140 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -17,6 +17,7 @@ generic-y += module.lds.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += ring_buffer.h
generic-y += runtime-const.h
generic-y += softirq_stack.h
generic-y += switch_to.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4566000e15c4..078fd2c0d69d 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -14,3 +14,4 @@ generic-y += early_ioremap.h
generic-y += fprobe.h
generic-y += mcs_spinlock.h
generic-y += mmzone.h
+generic-y += ring_buffer.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 13fe45dea296..e57af619263a 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -6,5 +6,6 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += qrwlock.h
generic-y += qspinlock.h
+generic-y += ring_buffer.h
generic-y += user.h
generic-y += text-patching.h
diff --git a/crypto/krb5/krb5_api.c b/crypto/krb5/krb5_api.c
index 23026d4206c8..c7ea40f900a7 100644
--- a/crypto/krb5/krb5_api.c
+++ b/crypto/krb5/krb5_api.c
@@ -134,27 +134,69 @@ EXPORT_SYMBOL(crypto_krb5_how_much_data);
* Find the offset and size of the data in a secure message so that this
* information can be used in the metadata buffer which will get added to the
* digest by crypto_krb5_verify_mic().
+ *
+ * Return: 0 if successful, -EBADMSG if the message is too short or -EINVAL if
+ * the mode is unsupported.
*/
-void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5,
- enum krb5_crypto_mode mode,
- size_t *_offset, size_t *_len)
+int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t *_offset, size_t *_len)
{
switch (mode) {
case KRB5_CHECKSUM_MODE:
+ if (*_len < krb5->cksum_len)
+ return -EBADMSG;
*_offset += krb5->cksum_len;
*_len -= krb5->cksum_len;
- return;
+ return 0;
case KRB5_ENCRYPT_MODE:
+ if (*_len < krb5->conf_len + krb5->cksum_len)
+ return -EBADMSG;
*_offset += krb5->conf_len;
*_len -= krb5->conf_len + krb5->cksum_len;
- return;
+ return 0;
default:
WARN_ON_ONCE(1);
- return;
+ return -EINVAL;
}
}
EXPORT_SYMBOL(crypto_krb5_where_is_the_data);
+/**
+ * crypto_krb5_check_data_len - Check a message is big enough
+ * @krb5: The encoding to use.
+ * @mode: Mode of operation.
+ * @len: The length of the secure blob.
+ * @min_content: Minimum length of the content inside the blob.
+ *
+ * Check that a message is large enough to hold whatever bits the encryption
+ * type wants to glue on (nonce, checksum) plus a minimum amount of content.
+ *
+ * Return: 0 if successful, -EBADMSG if the message is too short or -EINVAL if
+ * the mode is unsupported.
+ */
+int crypto_krb5_check_data_len(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t len, size_t min_content)
+{
+ switch (mode) {
+ case KRB5_CHECKSUM_MODE:
+ if (len < krb5->cksum_len ||
+ len - krb5->cksum_len < min_content)
+ return -EBADMSG;
+ return 0;
+ case KRB5_ENCRYPT_MODE:
+ if (len < krb5->conf_len + krb5->cksum_len ||
+ len - (krb5->conf_len + krb5->cksum_len) < min_content)
+ return -EBADMSG;
+ return 0;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(crypto_krb5_check_data_len);
+
/*
* Prepare the encryption with derived key data.
*/
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e76d15411e2a..3d0027ec33c2 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5584,6 +5584,7 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp)
link->pmp = pmp;
link->active_tag = ATA_TAG_POISON;
link->hw_sata_spd_limit = UINT_MAX;
+ INIT_WORK(&link->deferred_qc_work, ata_scsi_deferred_qc_work);
/* can't use iterator, ap isn't initialized yet */
for (i = 0; i < ATA_MAX_DEVICES; i++) {
@@ -5666,7 +5667,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
mutex_init(&ap->scsi_scan_mutex);
INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
- INIT_WORK(&ap->deferred_qc_work, ata_scsi_deferred_qc_work);
INIT_LIST_HEAD(&ap->eh_done_q);
init_waitqueue_head(&ap->eh_wait_q);
init_completion(&ap->park_req_pending);
@@ -6291,12 +6291,15 @@ static void ata_port_detach(struct ata_port *ap)
/* It better be dead now and not have any remaining deferred qc. */
WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
- WARN_ON(ap->deferred_qc);
- cancel_work_sync(&ap->deferred_qc_work);
cancel_delayed_work_sync(&ap->hotplug_task);
cancel_delayed_work_sync(&ap->scsi_rescan_task);
+ ata_for_each_link(link, ap, PMP_FIRST) {
+ WARN_ON(link->deferred_qc);
+ cancel_work_sync(&link->deferred_qc_work);
+ }
+
/* Delete port multiplier link transport devices */
if (ap->pmp_link) {
int i;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 9a4b67b90b17..d623eb32ed8b 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -651,11 +651,11 @@ int ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
if (qc->scsicmd != scmd)
continue;
if ((qc->flags & ATA_QCFLAG_ACTIVE) ||
- qc == ap->deferred_qc)
+ qc == qc->dev->link->deferred_qc)
break;
}
- if (i < ATA_MAX_QUEUE && qc == ap->deferred_qc) {
+ if (i < ATA_MAX_QUEUE && qc == qc->dev->link->deferred_qc) {
/*
* This is a deferred command that timed out while
* waiting for the command queue to drain. Since the qc
@@ -666,8 +666,8 @@ int ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
* deferred qc work from issuing this qc.
*/
WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE);
- ap->deferred_qc = NULL;
- cancel_work(&ap->deferred_qc_work);
+ qc->dev->link->deferred_qc = NULL;
+ cancel_work(&qc->dev->link->deferred_qc_work);
set_host_byte(scmd, DID_TIME_OUT);
scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
} else if (i < ATA_MAX_QUEUE) {
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index e3adc008fed1..e8540931b4a1 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -110,13 +110,24 @@ int sata_pmp_qc_defer_cmd_switch(struct ata_queued_cmd *qc)
{
struct ata_link *link = qc->dev->link;
struct ata_port *ap = link->ap;
+ int ret;
if (ap->excl_link == NULL || ap->excl_link == link) {
if (ap->nr_active_links == 0 || ata_link_active(link)) {
qc->flags |= ATA_QCFLAG_CLEAR_EXCL;
- return ata_std_qc_defer(qc);
+ ret = ata_std_qc_defer(qc);
+ if (ret == ATA_DEFER_LINK)
+ return ATA_DEFER_LINK_EXCL;
+ return ret;
}
+ /*
+ * Note: ap->excl_link contains the link that is next in line,
+ * i.e. implicit round robin. If there is only one link
+ * dispatching, ap->excl_link will be left unclaimed, allowing
+ * other links to set ap->excl_link, ensuring that the currently
+ * active link cannot queue any more.
+ */
ap->excl_link = link;
}
@@ -571,8 +582,11 @@ static void sata_pmp_detach(struct ata_device *dev)
if (ap->ops->pmp_detach)
ap->ops->pmp_detach(ap);
- ata_for_each_link(tlink, ap, EDGE)
+ ata_for_each_link(tlink, ap, EDGE) {
+ WARN_ON(tlink->deferred_qc);
+ cancel_work_sync(&tlink->deferred_qc_work);
ata_eh_detach_dev(tlink->device);
+ }
spin_lock_irqsave(ap->lock, flags);
ap->nr_pmp_links = 0;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index f44612e269a4..d43207c6e467 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1664,8 +1664,9 @@ static void ata_scsi_qc_done(struct ata_queued_cmd *qc, bool set_result,
void ata_scsi_deferred_qc_work(struct work_struct *work)
{
- struct ata_port *ap =
- container_of(work, struct ata_port, deferred_qc_work);
+ struct ata_link *link =
+ container_of(work, struct ata_link, deferred_qc_work);
+ struct ata_port *ap = link->ap;
struct ata_queued_cmd *qc;
unsigned long flags;
@@ -1676,10 +1677,10 @@ void ata_scsi_deferred_qc_work(struct work_struct *work)
* such case, we should not need any more deferring the qc, so warn if
* qc_defer() says otherwise.
*/
- qc = ap->deferred_qc;
+ qc = link->deferred_qc;
if (qc && !ata_port_eh_scheduled(ap)) {
WARN_ON_ONCE(ap->ops->qc_defer(qc));
- ap->deferred_qc = NULL;
+ link->deferred_qc = NULL;
ata_qc_issue(qc);
}
@@ -1688,7 +1689,7 @@ void ata_scsi_deferred_qc_work(struct work_struct *work)
void ata_scsi_requeue_deferred_qc(struct ata_port *ap)
{
- struct ata_queued_cmd *qc = ap->deferred_qc;
+ struct ata_link *link;
lockdep_assert_held(ap->lock);
@@ -1697,16 +1698,21 @@ void ata_scsi_requeue_deferred_qc(struct ata_port *ap)
* do not try to be smart about what to do with this deferred command
* and simply requeue it by completing it with DID_REQUEUE.
*/
- if (qc) {
- ap->deferred_qc = NULL;
- cancel_work(&ap->deferred_qc_work);
- ata_scsi_qc_done(qc, true, DID_REQUEUE << 16);
+ ata_for_each_link(link, ap, PMP_FIRST) {
+ struct ata_queued_cmd *qc = link->deferred_qc;
+
+ if (qc) {
+ link->deferred_qc = NULL;
+ cancel_work(&link->deferred_qc_work);
+ ata_scsi_qc_done(qc, true, DID_REQUEUE << 16);
+ }
}
}
-static void ata_scsi_schedule_deferred_qc(struct ata_port *ap)
+static void ata_scsi_schedule_deferred_qc(struct ata_link *link)
{
- struct ata_queued_cmd *qc = ap->deferred_qc;
+ struct ata_queued_cmd *qc = link->deferred_qc;
+ struct ata_port *ap = link->ap;
lockdep_assert_held(ap->lock);
@@ -1723,12 +1729,12 @@ static void ata_scsi_schedule_deferred_qc(struct ata_port *ap)
return;
}
if (!ap->ops->qc_defer(qc))
- queue_work(system_highpri_wq, &ap->deferred_qc_work);
+ queue_work(system_highpri_wq, &link->deferred_qc_work);
}
static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
{
- struct ata_port *ap = qc->ap;
+ struct ata_link *link = qc->dev->link;
struct scsi_cmnd *cmd = qc->scsicmd;
u8 *cdb = cmd->cmnd;
bool have_sense = qc->flags & ATA_QCFLAG_SENSE_VALID;
@@ -1759,22 +1765,23 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
ata_scsi_qc_done(qc, false, 0);
- ata_scsi_schedule_deferred_qc(ap);
+ ata_scsi_schedule_deferred_qc(link);
}
static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
{
+ struct ata_link *link = qc->dev->link;
int ret;
if (!ap->ops->qc_defer)
- goto issue;
+ goto issue_qc;
/*
* If we already have a deferred qc, then rely on the SCSI layer to
* requeue and defer all incoming commands until the deferred qc is
* processed, once all on-going commands complete.
*/
- if (ap->deferred_qc) {
+ if (link->deferred_qc) {
ata_qc_free(qc);
return SCSI_MLQUEUE_DEVICE_BUSY;
}
@@ -1786,38 +1793,46 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
break;
case ATA_DEFER_LINK:
ret = SCSI_MLQUEUE_DEVICE_BUSY;
- break;
+ goto defer_qc;
+ case ATA_DEFER_LINK_EXCL:
+ /*
+ * Drivers making use of ap->excl_link cannot store the QC in
+ * link->deferred_qc, because the ap->excl_link handling is
+ * incompatible with the link->deferred_qc workqueue handling.
+ */
+ ret = SCSI_MLQUEUE_DEVICE_BUSY;
+ goto free_qc;
case ATA_DEFER_PORT:
ret = SCSI_MLQUEUE_HOST_BUSY;
- break;
+ goto free_qc;
default:
WARN_ON_ONCE(1);
ret = SCSI_MLQUEUE_HOST_BUSY;
- break;
+ goto free_qc;
}
- if (ret) {
- /*
- * We must defer this qc: if this is not an NCQ command, keep
- * this qc as a deferred one and report to the SCSI layer that
- * we issued it so that it is not requeued. The deferred qc will
- * be issued with the port deferred_qc_work once all on-going
- * commands complete.
- */
- if (!ata_is_ncq(qc->tf.protocol)) {
- ap->deferred_qc = qc;
- return 0;
- }
+issue_qc:
+ ata_qc_issue(qc);
+ return 0;
- /* Force a requeue of the command to defer its execution. */
- ata_qc_free(qc);
- return ret;
+defer_qc:
+ /*
+ * We must defer this qc: if this is not an NCQ command, keep
+ * this qc as a deferred one and report to the SCSI layer that
+ * we issued it so that it is not requeued. The deferred qc will
+ * be issued with the port deferred_qc_work once all on-going
+ * commands complete.
+ */
+ if (!ata_is_ncq(qc->tf.protocol)) {
+ link->deferred_qc = qc;
+ return 0;
}
-issue:
- ata_qc_issue(qc);
+free_qc:
+ /* Force a requeue of the command to defer its execution. */
+ ata_qc_free(qc);
- return 0;
+ return ret;
}
/**
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index d642ece9f07a..57f1081b86db 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -789,6 +789,7 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc)
struct ata_link *link = qc->dev->link;
struct ata_port *ap = link->ap;
u8 prot = qc->tf.protocol;
+ int ret;
/*
* There is a bug in the chip:
@@ -826,7 +827,10 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc)
qc->flags |= ATA_QCFLAG_CLEAR_EXCL;
}
- return ata_std_qc_defer(qc);
+ ret = ata_std_qc_defer(qc);
+ if (ret == ATA_DEFER_LINK)
+ return ATA_DEFER_LINK_EXCL;
+ return ret;
}
static enum ata_completion_errors sil24_qc_prep(struct ata_queued_cmd *qc)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index f806a683b767..6981b55d582a 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -1230,8 +1230,10 @@ void memblk_nr_poison_inc(unsigned long pfn)
const unsigned long block_id = pfn_to_block_id(pfn);
struct memory_block *mem = find_memory_block_by_id(block_id);
- if (mem)
+ if (mem) {
atomic_long_inc(&mem->nr_hwpoison);
+ put_device(&mem->dev);
+ }
}
void memblk_nr_poison_sub(unsigned long pfn, long i)
@@ -1239,8 +1241,10 @@ void memblk_nr_poison_sub(unsigned long pfn, long i)
const unsigned long block_id = pfn_to_block_id(pfn);
struct memory_block *mem = find_memory_block_by_id(block_id);
- if (mem)
+ if (mem) {
atomic_long_sub(i, &mem->nr_hwpoison);
+ put_device(&mem->dev);
+ }
}
static unsigned long memblk_nr_poison(struct memory_block *mem)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4065336ebd1f..6c1e7347e6a7 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4565,24 +4565,12 @@ out:
return ret;
}
-static void cancel_tasks_sync(struct rbd_device *rbd_dev)
-{
- dout("%s rbd_dev %p\n", __func__, rbd_dev);
-
- cancel_work_sync(&rbd_dev->acquired_lock_work);
- cancel_work_sync(&rbd_dev->released_lock_work);
- cancel_delayed_work_sync(&rbd_dev->lock_dwork);
- cancel_work_sync(&rbd_dev->unlock_work);
-}
-
/*
* header_rwsem must not be held to avoid a deadlock with
* rbd_dev_refresh() when flushing notifies.
*/
static void rbd_unregister_watch(struct rbd_device *rbd_dev)
{
- cancel_tasks_sync(rbd_dev);
-
mutex_lock(&rbd_dev->watch_mutex);
if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED)
__rbd_unregister_watch(rbd_dev);
@@ -6548,10 +6536,18 @@ out_err:
static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
{
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ disable_delayed_work_sync(&rbd_dev->lock_dwork);
+ disable_work_sync(&rbd_dev->unlock_work);
+
down_write(&rbd_dev->lock_rwsem);
if (__rbd_is_lock_owner(rbd_dev))
__rbd_release_lock(rbd_dev);
up_write(&rbd_dev->lock_rwsem);
+
+ flush_work(&rbd_dev->acquired_lock_work);
+ flush_work(&rbd_dev->released_lock_work);
}
/*
diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index a3643e67b33f..37e050763633 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -582,12 +582,10 @@ static int btintel_pcie_get_mac_access(struct btintel_pcie_data *data)
reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG);
- reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS;
- reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ;
- if ((reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_STS) == 0)
+ if (!(reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ)) {
reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ;
-
- btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg);
+ btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg);
+ }
do {
reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG);
@@ -607,16 +605,10 @@ static void btintel_pcie_release_mac_access(struct btintel_pcie_data *data)
reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG);
- if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ)
+ if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ) {
reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ;
-
- if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS)
- reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS;
-
- if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ)
- reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ;
-
- btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg);
+ btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg);
+ }
}
static void *btintel_pcie_copy_tlv(void *dest, enum btintel_pcie_tlv_type type,
diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h
index f922abd1e7d8..13efef499e4e 100644
--- a/drivers/bluetooth/btintel_pcie.h
+++ b/drivers/bluetooth/btintel_pcie.h
@@ -34,9 +34,6 @@
#define BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_STS (BIT(20))
#define BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ (BIT(21))
-/* Stop MAC Access disconnection request */
-#define BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS (BIT(22))
-#define BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ (BIT(23))
#define BTINTEL_PCIE_CSR_FUNC_CTRL_BUS_MASTER_STS (BIT(28))
#define BTINTEL_PCIE_CSR_FUNC_CTRL_BUS_MASTER_DISCON (BIT(29))
diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c
index f70c1b0f8990..8ff66b276af0 100644
--- a/drivers/bluetooth/btmtk.c
+++ b/drivers/bluetooth/btmtk.c
@@ -537,6 +537,7 @@ static void btmtk_usb_wmt_recv(struct urb *urb)
return;
} else if (urb->status == -ENOENT) {
/* Avoid suspend failed when usb_kill_urb */
+ kfree(urb->setup_packet);
return;
}
@@ -610,6 +611,7 @@ static int btmtk_usb_submit_wmt_recv_urb(struct hci_dev *hdev)
if (err != -EPERM && err != -ENODEV)
bt_dev_err(hdev, "urb %p submission failed (%d)",
urb, -err);
+ kfree(dr);
usb_unanchor_urb(urb);
}
@@ -719,8 +721,8 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev,
case BTMTK_WMT_FUNC_CTRL:
if (!skb_pull_data(data->evt_skb,
sizeof(wmt_evt_funcc->status))) {
- err = -EINVAL;
- goto err_free_skb;
+ status = BTMTK_WMT_ON_UNDONE;
+ break;
}
wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 275ea865bc29..47f4902b40b4 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -194,7 +194,15 @@ void hci_uart_init_work(struct work_struct *work)
err = hci_register_dev(hu->hdev);
if (err < 0) {
BT_ERR("Can't register HCI device");
+
+ percpu_down_write(&hu->proto_lock);
clear_bit(HCI_UART_PROTO_READY, &hu->flags);
+ percpu_up_write(&hu->proto_lock);
+
+ /* Safely cancel work after clearing flags */
+ cancel_work_sync(&hu->write_work);
+
+ /* Close protocol before freeing hdev */
hu->proto->close(hu);
hdev = hu->hdev;
hu->hdev = NULL;
@@ -263,8 +271,12 @@ static int hci_uart_open(struct hci_dev *hdev)
/* Close device */
static int hci_uart_close(struct hci_dev *hdev)
{
+ struct hci_uart *hu = hci_get_drvdata(hdev);
+
BT_DBG("hdev %p", hdev);
+ cancel_work_sync(&hu->write_work);
+
hci_uart_flush(hdev);
hdev->flush = NULL;
return 0;
@@ -531,6 +543,7 @@ static void hci_uart_tty_close(struct tty_struct *tty)
{
struct hci_uart *hu = tty->disc_data;
struct hci_dev *hdev;
+ bool proto_ready;
BT_DBG("tty %p", tty);
@@ -540,24 +553,38 @@ static void hci_uart_tty_close(struct tty_struct *tty)
if (!hu)
return;
- hdev = hu->hdev;
- if (hdev)
- hci_uart_close(hdev);
+ /* Wait for init_ready to finish to prevent registration races */
+ cancel_work_sync(&hu->init_ready);
- if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) {
+ proto_ready = test_bit(HCI_UART_PROTO_READY, &hu->flags);
+ if (proto_ready) {
percpu_down_write(&hu->proto_lock);
clear_bit(HCI_UART_PROTO_READY, &hu->flags);
percpu_up_write(&hu->proto_lock);
+ }
- cancel_work_sync(&hu->init_ready);
- cancel_work_sync(&hu->write_work);
+ /*
+ * Unconditionally cancel write_work AFTER clearing PROTO_READY.
+ * This ensures that concurrent protocol timers cannot requeue
+ * write_work via hci_uart_tx_wakeup(), permanently preventing
+ * double-free races and UAFs.
+ */
+ cancel_work_sync(&hu->write_work);
+
+ hdev = hu->hdev;
+ if (hdev)
+ hci_uart_close(hdev); /* proto->flush is safely skipped */
+ if (proto_ready) {
if (hdev) {
if (test_bit(HCI_UART_REGISTERED, &hu->flags))
hci_unregister_dev(hdev);
- hci_free_dev(hdev);
}
+ /* Close protocol before freeing hdev (intrinsically purges queues) */
hu->proto->close(hu);
+
+ if (hdev)
+ hci_free_dev(hdev);
}
clear_bit(HCI_UART_PROTO_SET, &hu->flags);
@@ -625,11 +652,12 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
* tty caller
*/
hu->proto->recv(hu, data, count);
- percpu_up_read(&hu->proto_lock);
if (hu->hdev)
hu->hdev->stat.byte_rx += count;
+ percpu_up_read(&hu->proto_lock);
+
tty_unthrottle(tty);
}
@@ -695,6 +723,10 @@ static int hci_uart_register_dev(struct hci_uart *hu)
percpu_down_write(&hu->proto_lock);
clear_bit(HCI_UART_PROTO_INIT, &hu->flags);
percpu_up_write(&hu->proto_lock);
+ /* Cancel work after clearing flags */
+ cancel_work_sync(&hu->write_work);
+
+ /* Close protocol before freeing hdev */
hu->proto->close(hu);
hu->hdev = NULL;
hci_free_dev(hdev);
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index cd1834246b47..ed280399bf47 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -48,13 +48,12 @@
#define HCI_MAX_IBS_SIZE 10
#define IBS_WAKE_RETRANS_TIMEOUT_MS 100
-#define IBS_BTSOC_TX_IDLE_TIMEOUT_MS 200
+#define IBS_BTSOC_TX_IDLE_TIMEOUT msecs_to_jiffies(200)
#define IBS_HOST_TX_IDLE_TIMEOUT_MS 2000
-#define CMD_TRANS_TIMEOUT_MS 100
-#define MEMDUMP_TIMEOUT_MS 8000
-#define IBS_DISABLE_SSR_TIMEOUT_MS \
- (MEMDUMP_TIMEOUT_MS + FW_DOWNLOAD_TIMEOUT_MS)
-#define FW_DOWNLOAD_TIMEOUT_MS 3000
+#define CMD_TRANS_TIMEOUT msecs_to_jiffies(100)
+#define MEMDUMP_TIMEOUT msecs_to_jiffies(8000)
+#define FW_DOWNLOAD_TIMEOUT msecs_to_jiffies(3000)
+#define IBS_DISABLE_SSR_TIMEOUT (MEMDUMP_TIMEOUT + FW_DOWNLOAD_TIMEOUT)
/* susclk rate */
#define SUSCLK_RATE_32KHZ 32768
@@ -1096,7 +1095,7 @@ static void qca_controller_memdump(struct work_struct *work)
queue_delayed_work(qca->workqueue,
&qca->ctrl_memdump_timeout,
- msecs_to_jiffies(MEMDUMP_TIMEOUT_MS));
+ MEMDUMP_TIMEOUT);
skb_pull(skb, sizeof(qca_memdump->ram_dump_size));
qca_memdump->current_seq_no = 0;
qca_memdump->received_dump = 0;
@@ -1369,7 +1368,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
if (hu->serdev)
serdev_device_wait_until_sent(hu->serdev,
- msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS));
+ CMD_TRANS_TIMEOUT);
/* Give the controller time to process the request */
switch (qca_soc_type(hu)) {
@@ -1401,8 +1400,8 @@ static inline void host_set_baudrate(struct hci_uart *hu, unsigned int speed)
static int qca_send_power_pulse(struct hci_uart *hu, bool on)
{
+ int timeout = CMD_TRANS_TIMEOUT;
int ret;
- int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS);
u8 cmd = on ? QCA_WCN3990_POWERON_PULSE : QCA_WCN3990_POWEROFF_PULSE;
/* These power pulses are single byte command which are sent
@@ -1607,7 +1606,7 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev)
struct qca_data *qca = hu->priv;
wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
- TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
+ TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT);
clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
}
@@ -2591,7 +2590,7 @@ static void qca_serdev_remove(struct serdev_device *serdev)
static void qca_serdev_shutdown(struct serdev_device *serdev)
{
int ret;
- int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS);
+ int timeout = CMD_TRANS_TIMEOUT;
struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
struct hci_uart *hu = &qcadev->serdev_hu;
struct hci_dev *hdev = hu->hdev;
@@ -2648,7 +2647,7 @@ static int __maybe_unused qca_suspend(struct device *dev)
bool tx_pending = false;
int ret = 0;
u8 cmd;
- u32 wait_timeout = 0;
+ unsigned long wait_timeout = 0;
set_bit(QCA_SUSPENDING, &qca->flags);
@@ -2669,15 +2668,15 @@ static int __maybe_unused qca_suspend(struct device *dev)
if (test_bit(QCA_IBS_DISABLED, &qca->flags) ||
test_bit(QCA_SSR_TRIGGERED, &qca->flags)) {
wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ?
- IBS_DISABLE_SSR_TIMEOUT_MS :
- FW_DOWNLOAD_TIMEOUT_MS;
+ IBS_DISABLE_SSR_TIMEOUT :
+ FW_DOWNLOAD_TIMEOUT;
/* QCA_IBS_DISABLED flag is set to true, During FW download
* and during memory dump collection. It is reset to false,
* After FW download complete.
*/
wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED,
- TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout));
+ TASK_UNINTERRUPTIBLE, wait_timeout);
if (test_bit(QCA_IBS_DISABLED, &qca->flags)) {
bt_dev_err(hu->hdev, "SSR or FW download time out");
@@ -2729,7 +2728,7 @@ static int __maybe_unused qca_suspend(struct device *dev)
if (tx_pending) {
serdev_device_wait_until_sent(hu->serdev,
- msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS));
+ CMD_TRANS_TIMEOUT);
serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_OFF, hu);
}
@@ -2738,7 +2737,7 @@ static int __maybe_unused qca_suspend(struct device *dev)
*/
ret = wait_event_interruptible_timeout(qca->suspend_wait_q,
qca->rx_ibs_state == HCI_IBS_RX_ASLEEP,
- msecs_to_jiffies(IBS_BTSOC_TX_IDLE_TIMEOUT_MS));
+ IBS_BTSOC_TX_IDLE_TIMEOUT);
if (ret == 0) {
ret = -ETIMEDOUT;
goto error;
diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c
index c95e93ef3ab0..64b4e9e3e8fe 100644
--- a/drivers/dpll/zl3073x/dpll.c
+++ b/drivers/dpll/zl3073x/dpll.c
@@ -1394,8 +1394,8 @@ zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index)
err_register:
dpll_pin_put(pin->dpll_pin, &pin->tracker);
- pin->dpll_pin = NULL;
err_pin_get:
+ pin->dpll_pin = NULL;
fwnode_handle_put(pin->fwnode);
pin->fwnode = NULL;
zl3073x_pin_props_put(props);
@@ -1563,8 +1563,10 @@ zl3073x_dpll_pins_register(struct zl3073x_dpll *zldpll)
}
rc = zl3073x_dpll_pin_register(pin, index);
- if (rc)
+ if (rc) {
+ zl3073x_dpll_pin_free(pin);
goto error;
+ }
list_add(&pin->list, &zldpll->pins);
}
diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
index 9576862d89c4..601c3418e0d9 100644
--- a/drivers/firmware/arm_ffa/bus.c
+++ b/drivers/firmware/arm_ffa/bus.c
@@ -26,6 +26,8 @@ static int ffa_device_match(struct device *dev, const struct device_driver *drv)
id_table = to_ffa_driver(drv)->id_table;
ffa_dev = to_ffa_dev(dev);
+ if (!id_table)
+ return 0;
while (!uuid_is_null(&id_table->uuid)) {
/*
@@ -123,7 +125,7 @@ int ffa_driver_register(struct ffa_driver *driver, struct module *owner,
{
int ret;
- if (!driver->probe)
+ if (!driver->probe || !driver->id_table)
return -EINVAL;
driver->driver.bus = &ffa_bus_type;
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index eb2782848283..b9f17fda7243 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -87,6 +87,7 @@ static inline int ffa_to_linux_errno(int errno)
struct ffa_pcpu_irq {
struct ffa_drv_info *info;
+ struct work_struct notif_pcpu_work;
};
struct ffa_drv_info {
@@ -100,13 +101,13 @@ struct ffa_drv_info {
bool mem_ops_native;
bool msg_direct_req2_supp;
bool bitmap_created;
+ bool bus_notifier_registered;
bool notif_enabled;
unsigned int sched_recv_irq;
unsigned int notif_pend_irq;
unsigned int cpuhp_state;
struct ffa_pcpu_irq __percpu *irq_pcpu;
struct workqueue_struct *notif_pcpu_wq;
- struct work_struct notif_pcpu_work;
struct work_struct sched_recv_irq_work;
struct xarray partition_info;
DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS));
@@ -322,6 +323,12 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3,
#define PART_INFO_ID_MASK GENMASK(15, 0)
#define PART_INFO_EXEC_CXT_MASK GENMASK(31, 16)
#define PART_INFO_PROPS_MASK GENMASK(63, 32)
+#define FFA_PART_INFO_GET_REGS_FIRST_REG 3
+#define FFA_PART_INFO_GET_REGS_REGS_PER_DESC 3
+#define FFA_PART_INFO_GET_REGS_MAX_DESC \
+ (((sizeof(ffa_value_t) / sizeof_field(ffa_value_t, a0)) - \
+ FFA_PART_INFO_GET_REGS_FIRST_REG) / \
+ FFA_PART_INFO_GET_REGS_REGS_PER_DESC)
#define PART_INFO_ID(x) ((u16)(FIELD_GET(PART_INFO_ID_MASK, (x))))
#define PART_INFO_EXEC_CXT(x) ((u16)(FIELD_GET(PART_INFO_EXEC_CXT_MASK, (x))))
#define PART_INFO_PROPERTIES(x) ((u32)(FIELD_GET(PART_INFO_PROPS_MASK, (x))))
@@ -329,15 +336,13 @@ static int
__ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3,
struct ffa_partition_info *buffer, int num_parts)
{
- u16 buf_sz, start_idx, cur_idx, count = 0, prev_idx = 0, tag = 0;
+ u16 buf_sz, start_idx = 0, cur_idx, count = 0, tag = 0;
struct ffa_partition_info *buf = buffer;
ffa_value_t partition_info;
do {
__le64 *regs;
- int idx;
-
- start_idx = prev_idx ? prev_idx + 1 : 0;
+ int idx, nr_desc, buf_idx;
invoke_ffa_fn((ffa_value_t){
.a0 = FFA_PARTITION_INFO_GET_REGS,
@@ -353,15 +358,28 @@ __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3,
count = PARTITION_COUNT(partition_info.a2);
if (!buffer || !num_parts) /* count only */
return count;
+ if (count > num_parts)
+ return -EINVAL;
cur_idx = CURRENT_INDEX(partition_info.a2);
+ if (cur_idx < start_idx || cur_idx >= count)
+ return -EINVAL;
+
+ nr_desc = cur_idx - start_idx + 1;
+ if (nr_desc > FFA_PART_INFO_GET_REGS_MAX_DESC)
+ return -EINVAL;
+
+ buf_idx = buf - buffer;
+ if (buf_idx + nr_desc > num_parts)
+ return -EINVAL;
+
tag = UUID_INFO_TAG(partition_info.a2);
buf_sz = PARTITION_INFO_SZ(partition_info.a2);
if (buf_sz > sizeof(*buffer))
buf_sz = sizeof(*buffer);
regs = (void *)&partition_info.a3;
- for (idx = 0; idx < cur_idx - start_idx + 1; idx++, buf++) {
+ for (idx = 0; idx < nr_desc; idx++, buf++) {
union {
uuid_t uuid;
u64 regs[2];
@@ -379,7 +397,7 @@ __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3,
uuid_copy(&buf->uuid, &uuid_regs.uuid);
regs += 3;
}
- prev_idx = cur_idx;
+ start_idx = cur_idx + 1;
} while (cur_idx < (count - 1));
@@ -1189,7 +1207,7 @@ static int
ffa_sched_recv_cb_update(struct ffa_device *dev, ffa_sched_recv_cb callback,
void *cb_data, bool is_registration)
{
- struct ffa_dev_part_info *partition = NULL, *tmp;
+ struct ffa_dev_part_info *partition = NULL;
struct list_head *phead;
bool cb_valid;
@@ -1202,11 +1220,11 @@ ffa_sched_recv_cb_update(struct ffa_device *dev, ffa_sched_recv_cb callback,
return -EINVAL;
}
- list_for_each_entry_safe(partition, tmp, phead, node)
+ list_for_each_entry(partition, phead, node)
if (partition->dev == dev)
break;
- if (!partition) {
+ if (&partition->node == phead) {
pr_err("%s: No such partition ID 0x%x\n", __func__, dev->vm_id);
return -EINVAL;
}
@@ -1445,20 +1463,25 @@ static int ffa_notify_send(struct ffa_device *dev, int notify_id,
static void handle_notif_callbacks(u64 bitmap, enum notify_type type)
{
+ ffa_notifier_cb cb;
+ void *cb_data;
int notify_id;
- struct notifier_cb_info *cb_info = NULL;
for (notify_id = 0; notify_id <= FFA_MAX_NOTIFICATIONS && bitmap;
notify_id++, bitmap >>= 1) {
if (!(bitmap & 1))
continue;
- read_lock(&drv_info->notify_lock);
- cb_info = notifier_hnode_get_by_type(notify_id, type);
- read_unlock(&drv_info->notify_lock);
+ scoped_guard(read_lock, &drv_info->notify_lock) {
+ struct notifier_cb_info *cb_info;
- if (cb_info && cb_info->cb)
- cb_info->cb(notify_id, cb_info->cb_data);
+ cb_info = notifier_hnode_get_by_type(notify_id, type);
+ cb = cb_info ? cb_info->cb : NULL;
+ cb_data = cb_info ? cb_info->cb_data : NULL;
+ }
+
+ if (cb)
+ cb(notify_id, cb_data);
}
}
@@ -1466,39 +1489,56 @@ static void handle_fwk_notif_callbacks(u32 bitmap)
{
void *buf;
uuid_t uuid;
+ void *fwk_cb_data;
int notify_id = 0, target;
+ ffa_fwk_notifier_cb fwk_cb;
struct ffa_indirect_msg_hdr *msg;
- struct notifier_cb_info *cb_info = NULL;
+ size_t min_offset = offsetof(struct ffa_indirect_msg_hdr, uuid);
/* Only one framework notification defined and supported for now */
if (!(bitmap & FRAMEWORK_NOTIFY_RX_BUFFER_FULL))
return;
- mutex_lock(&drv_info->rx_lock);
+ scoped_guard(mutex, &drv_info->rx_lock) {
+ u32 offset, size;
- msg = drv_info->rx_buffer;
- buf = kmemdup((void *)msg + msg->offset, msg->size, GFP_KERNEL);
- if (!buf) {
- mutex_unlock(&drv_info->rx_lock);
- return;
- }
+ msg = drv_info->rx_buffer;
+ offset = msg->offset;
+ size = msg->size;
- target = SENDER_ID(msg->send_recv_id);
- if (msg->offset >= sizeof(*msg))
- uuid_copy(&uuid, &msg->uuid);
- else
- uuid_copy(&uuid, &uuid_null);
+ if (!size || (offset != min_offset && offset < sizeof(*msg)) ||
+ offset > drv_info->rxtx_bufsz ||
+ size > drv_info->rxtx_bufsz - offset) {
+ pr_err("invalid framework notification message\n");
+ ffa_rx_release();
+ return;
+ }
- mutex_unlock(&drv_info->rx_lock);
+ buf = kmemdup((void *)msg + offset, size, GFP_KERNEL);
+ if (!buf) {
+ ffa_rx_release();
+ return;
+ }
+
+ target = SENDER_ID(msg->send_recv_id);
+ if (offset >= sizeof(*msg))
+ uuid_copy(&uuid, &msg->uuid);
+ else
+ uuid_copy(&uuid, &uuid_null);
+ ffa_rx_release();
+ }
- ffa_rx_release();
+ scoped_guard(read_lock, &drv_info->notify_lock) {
+ struct notifier_cb_info *cb_info;
- read_lock(&drv_info->notify_lock);
- cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid);
- read_unlock(&drv_info->notify_lock);
+ cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target,
+ &uuid);
+ fwk_cb = cb_info ? cb_info->fwk_cb : NULL;
+ fwk_cb_data = cb_info ? cb_info->cb_data : NULL;
+ }
- if (cb_info && cb_info->fwk_cb)
- cb_info->fwk_cb(notify_id, cb_info->cb_data, buf);
+ if (fwk_cb)
+ fwk_cb(notify_id, fwk_cb_data, buf);
kfree(buf);
}
@@ -1539,10 +1579,11 @@ ffa_self_notif_handle(u16 vcpu, bool is_per_vcpu, void *cb_data)
static void notif_pcpu_irq_work_fn(struct work_struct *work)
{
- struct ffa_drv_info *info = container_of(work, struct ffa_drv_info,
+ struct ffa_pcpu_irq *pcpu = container_of(work, struct ffa_pcpu_irq,
notif_pcpu_work);
+ struct ffa_drv_info *info = pcpu->info;
- ffa_self_notif_handle(smp_processor_id(), true, info);
+ notif_get_and_handle(info);
}
static const struct ffa_info_ops ffa_drv_info_ops = {
@@ -1629,6 +1670,15 @@ static struct notifier_block ffa_bus_nb = {
.notifier_call = ffa_bus_notifier,
};
+static void ffa_bus_notifier_unregister(void)
+{
+ if (!drv_info->bus_notifier_registered)
+ return;
+
+ bus_unregister_notifier(&ffa_bus_type, &ffa_bus_nb);
+ drv_info->bus_notifier_registered = false;
+}
+
static int ffa_xa_add_partition_info(struct ffa_device *dev)
{
struct ffa_dev_part_info *info;
@@ -1712,6 +1762,8 @@ static void ffa_partitions_cleanup(void)
struct list_head *phead;
unsigned long idx;
+ ffa_bus_notifier_unregister();
+
/* Clean up/free all registered devices */
ffa_devices_unregister();
@@ -1739,11 +1791,14 @@ static int ffa_setup_partitions(void)
ret = bus_register_notifier(&ffa_bus_type, &ffa_bus_nb);
if (ret)
pr_err("Failed to register FF-A bus notifiers\n");
+ else
+ drv_info->bus_notifier_registered = true;
}
count = ffa_partition_probe(&uuid_null, &pbuf);
if (count <= 0) {
pr_info("%s: No partitions found, error %d\n", __func__, count);
+ ffa_bus_notifier_unregister();
return -EINVAL;
}
@@ -1811,7 +1866,7 @@ static irqreturn_t notif_pend_irq_handler(int irq, void *irq_data)
struct ffa_drv_info *info = pcpu->info;
queue_work_on(smp_processor_id(), info->notif_pcpu_wq,
- &info->notif_pcpu_work);
+ &pcpu->notif_pcpu_work);
return IRQ_HANDLED;
}
@@ -1928,8 +1983,11 @@ static int ffa_init_pcpu_irq(void)
if (!irq_pcpu)
return -ENOMEM;
- for_each_present_cpu(cpu)
+ for_each_present_cpu(cpu) {
per_cpu_ptr(irq_pcpu, cpu)->info = drv_info;
+ INIT_WORK(&per_cpu_ptr(irq_pcpu, cpu)->notif_pcpu_work,
+ notif_pcpu_irq_work_fn);
+ }
drv_info->irq_pcpu = irq_pcpu;
@@ -1958,7 +2016,6 @@ static int ffa_init_pcpu_irq(void)
}
INIT_WORK(&drv_info->sched_recv_irq_work, ffa_sched_recv_irq_work_fn);
- INIT_WORK(&drv_info->notif_pcpu_work, notif_pcpu_irq_work_fn);
drv_info->notif_pcpu_wq = create_workqueue("ffa_pcpu_irq_notification");
if (!drv_info->notif_pcpu_wq)
return -EINVAL;
@@ -2063,11 +2120,12 @@ static int __init ffa_init(void)
rxtx_bufsz = SZ_4K;
}
+ rxtx_bufsz = PAGE_ALIGN(rxtx_bufsz);
drv_info->rxtx_bufsz = rxtx_bufsz;
drv_info->rx_buffer = alloc_pages_exact(rxtx_bufsz, GFP_KERNEL);
if (!drv_info->rx_buffer) {
ret = -ENOMEM;
- goto free_pages;
+ goto free_drv_info;
}
drv_info->tx_buffer = alloc_pages_exact(rxtx_bufsz, GFP_KERNEL);
@@ -2078,7 +2136,7 @@ static int __init ffa_init(void)
ret = ffa_rxtx_map(virt_to_phys(drv_info->tx_buffer),
virt_to_phys(drv_info->rx_buffer),
- PAGE_ALIGN(rxtx_bufsz) / FFA_PAGE_SIZE);
+ rxtx_bufsz / FFA_PAGE_SIZE);
if (ret) {
pr_err("failed to register FFA RxTx buffers\n");
goto free_pages;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index d04be38f1750..318d1cc9a066 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -402,21 +402,11 @@ static void __init efi_debugfs_init(void)
static inline void efi_debugfs_init(void) {}
#endif
-/*
- * We register the efi subsystem with the firmware subsystem and the
- * efivars subsystem with the efi subsystem, if the system was booted with
- * EFI.
- */
-static int __init efisubsys_init(void)
+static int __init efipostcore_init(void)
{
- int error;
-
if (!efi_enabled(EFI_RUNTIME_SERVICES))
efi.runtime_supported_mask = 0;
- if (!efi_enabled(EFI_BOOT))
- return 0;
-
if (efi.runtime_supported_mask) {
/*
* Since we process only one efi_runtime_service() at a time, an
@@ -428,9 +418,23 @@ static int __init efisubsys_init(void)
pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
efi.runtime_supported_mask = 0;
- return 0;
}
}
+ return 0;
+}
+postcore_initcall(efipostcore_init);
+
+/*
+ * We register the efi subsystem with the firmware subsystem and the
+ * efivars subsystem with the efi subsystem, if the system was booted with
+ * EFI.
+ */
+static int __init efisubsys_init(void)
+{
+ int error;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
if (efi_rt_services_supported(EFI_RT_SUPPORTED_TIME_SERVICES))
platform_device_register_simple("rtc-efi", 0, NULL, 0);
diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c
index 4c3986ddcd54..685283bb7327 100644
--- a/drivers/firmware/efi/sysfb_efi.c
+++ b/drivers/firmware/efi/sysfb_efi.c
@@ -311,11 +311,14 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
.callback = efifb_swap_width_height,
},
{
- /* Lenovo IdeaPad Duet 3 10IGL5 with 1200x1920 portrait screen */
+ /*
+ * Lenovo IdeaPad Duet 3 10IGL5 and 10IGL5-LTE with
+ * 1200x1920 portrait screen
+ */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
- DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
- "IdeaPad Duet 3 10IGL5"),
+ /* Non exact match to also match the LTE version */
+ DMI_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
},
.callback = efifb_swap_width_height,
},
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 38ca190d4a22..e73bae6cb23a 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -539,12 +539,22 @@ static int psci_system_suspend(unsigned long unused)
static int psci_system_suspend_enter(suspend_state_t state)
{
+ pm_set_resume_via_firmware();
+
return cpu_suspend(0, psci_system_suspend);
}
+static int psci_system_suspend_begin(suspend_state_t state)
+{
+ pm_set_suspend_via_firmware();
+
+ return 0;
+}
+
static const struct platform_suspend_ops psci_suspend_ops = {
.valid = suspend_valid_only_mem,
.enter = psci_system_suspend_enter,
+ .begin = psci_system_suspend_begin,
};
static void __init psci_init_system_reset2(void)
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index b3596851c719..41a79e43c82b 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2050,7 +2050,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
return 0;
if (unlikely(bsize < csize)) {
- hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n",
+ hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %zu)\n",
report->id, csize, bsize);
return -EINVAL;
}
@@ -2072,7 +2072,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
rsize = max_buffer_size;
if (bsize < rsize) {
- hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n",
+ hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %zu)\n",
report->id, rsize, bsize);
return -EINVAL;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 402671567736..3e1e1e861739 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1297,7 +1297,9 @@ static int ipoib_hard_header(struct sk_buff *skb,
return IPOIB_HARD_LEN;
}
-static void ipoib_set_mcast_list(struct net_device *dev)
+static void ipoib_set_rx_mode_async(struct net_device *dev,
+ struct netdev_hw_addr_list *uc,
+ struct netdev_hw_addr_list *mc)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -2160,7 +2162,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_fix_features = ipoib_fix_features,
.ndo_start_xmit = ipoib_start_xmit,
.ndo_tx_timeout = ipoib_timeout,
- .ndo_set_rx_mode = ipoib_set_mcast_list,
+ .ndo_set_rx_mode_async = ipoib_set_rx_mode_async,
.ndo_get_iflink = ipoib_get_iflink,
.ndo_set_vf_link_state = ipoib_set_vf_link_state,
.ndo_get_vf_config = ipoib_get_vf_config,
@@ -2183,7 +2185,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_fix_features = ipoib_fix_features,
.ndo_start_xmit = ipoib_start_xmit,
.ndo_tx_timeout = ipoib_timeout,
- .ndo_set_rx_mode = ipoib_set_mcast_list,
+ .ndo_set_rx_mode_async = ipoib_set_rx_mode_async,
.ndo_get_iflink = ipoib_get_iflink,
.ndo_get_stats64 = ipoib_get_stats,
.ndo_eth_ioctl = ipoib_ioctl,
diff --git a/drivers/media/rc/ttusbir.c b/drivers/media/rc/ttusbir.c
index 3848ad3a6b85..db2f6698a6c0 100644
--- a/drivers/media/rc/ttusbir.c
+++ b/drivers/media/rc/ttusbir.c
@@ -191,7 +191,7 @@ static int ttusbir_probe(struct usb_interface *intf,
tt = kzalloc_obj(*tt);
buffer = kzalloc(5, GFP_KERNEL);
rc = rc_allocate_device(RC_DRIVER_IR_RAW);
- if (!tt || !rc || buffer) {
+ if (!tt || !rc || !buffer) {
ret = -ENOMEM;
goto out;
}
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 44d670904ad8..3c2a3029b10c 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1023,12 +1023,16 @@ mt7530_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
unsigned int age_count;
unsigned int age_unit;
- /* Applied timer is (AGE_CNT + 1) * (AGE_UNIT + 1) seconds */
- if (secs < 1 || secs > (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1))
- return -ERANGE;
-
- /* iterate through all possible age_count to find the closest pair */
- for (tmp_age_count = 0; tmp_age_count <= AGE_CNT_MAX; ++tmp_age_count) {
+ /* Applied timer is (AGE_CNT + 1) * (AGE_UNIT + 1) seconds.
+ * The DSA core has already validated the range using
+ * ds->ageing_time_min and ds->ageing_time_max.
+ *
+ * Iterate through all possible age_count values to find the closest
+ * pair. Start from 1 because the per-entry aging counter is
+ * initialized to AGE_CNT and a value of 0 means the entry will
+ * never be aged out.
+ */
+ for (tmp_age_count = 1; tmp_age_count <= AGE_CNT_MAX; ++tmp_age_count) {
unsigned int tmp_age_unit = secs / (tmp_age_count + 1) - 1;
if (tmp_age_unit <= AGE_UNIT_MAX) {
@@ -1296,37 +1300,40 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
static void
mt753x_trap_frames(struct mt7530_priv *priv)
{
- /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
- * VLAN-untagged.
+ /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress
+ * them with the EG_TAG attribute set to disabled (system default)
+ * so that any VLAN tags in the frame are not modified by the
+ * switch egress VLAN tag processing. This preserves VLAN tags
+ * for reception on VLAN sub-interfaces.
*/
mt7530_rmw(priv, MT753X_BPC,
PAE_BPDU_FR | PAE_EG_TAG_MASK | PAE_PORT_FW_MASK |
BPDU_EG_TAG_MASK | BPDU_PORT_FW_MASK,
- PAE_BPDU_FR | PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ PAE_BPDU_FR | PAE_EG_TAG(MT7530_VLAN_EG_DISABLED) |
PAE_PORT_FW(TO_CPU_FW_CPU_ONLY) |
- BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ BPDU_EG_TAG(MT7530_VLAN_EG_DISABLED) |
TO_CPU_FW_CPU_ONLY);
- /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
- * them VLAN-untagged.
+ /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and
+ * egress them with EG_TAG disabled.
*/
mt7530_rmw(priv, MT753X_RGAC1,
R02_BPDU_FR | R02_EG_TAG_MASK | R02_PORT_FW_MASK |
R01_BPDU_FR | R01_EG_TAG_MASK | R01_PORT_FW_MASK,
- R02_BPDU_FR | R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ R02_BPDU_FR | R02_EG_TAG(MT7530_VLAN_EG_DISABLED) |
R02_PORT_FW(TO_CPU_FW_CPU_ONLY) | R01_BPDU_FR |
- R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ R01_EG_TAG(MT7530_VLAN_EG_DISABLED) |
TO_CPU_FW_CPU_ONLY);
- /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
- * them VLAN-untagged.
+ /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and
+ * egress them with EG_TAG disabled.
*/
mt7530_rmw(priv, MT753X_RGAC2,
R0E_BPDU_FR | R0E_EG_TAG_MASK | R0E_PORT_FW_MASK |
R03_BPDU_FR | R03_EG_TAG_MASK | R03_PORT_FW_MASK,
- R0E_BPDU_FR | R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ R0E_BPDU_FR | R0E_EG_TAG(MT7530_VLAN_EG_DISABLED) |
R0E_PORT_FW(TO_CPU_FW_CPU_ONLY) | R03_BPDU_FR |
- R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ R03_EG_TAG(MT7530_VLAN_EG_DISABLED) |
TO_CPU_FW_CPU_ONLY);
}
@@ -1616,6 +1623,49 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
return 0;
}
+static int
+mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid)
+{
+ struct mt7530_dummy_poll p;
+ u32 val;
+ int ret;
+
+ val = VTCR_BUSY | VTCR_FUNC(cmd) | vid;
+ mt7530_write(priv, MT7530_VTCR, val);
+
+ INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_VTCR);
+ ret = readx_poll_timeout(_mt7530_read, &p, val,
+ !(val & VTCR_BUSY), 20, 20000);
+ if (ret < 0) {
+ dev_err(priv->dev, "poll timeout\n");
+ return ret;
+ }
+
+ val = mt7530_read(priv, MT7530_VTCR);
+ if (val & VTCR_INVALID) {
+ dev_err(priv->dev, "read VTCR invalid\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+mt7530_setup_vlan0(struct mt7530_priv *priv)
+{
+ u32 val;
+
+ /* Validate the entry with independent learning, keep the original
+ * ingress tag attribute.
+ */
+ val = IVL_MAC | EG_CON | PORT_MEM(MT7530_ALL_MEMBERS) | FID(FID_BRIDGED) |
+ VLAN_VALID;
+ mt7530_write(priv, MT7530_VAWD1, val);
+ mt7530_write(priv, MT7530_VAWD2, 0);
+
+ return mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, 0);
+}
+
static void
mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
{
@@ -1641,6 +1691,8 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
G0_PORT_VID_DEF);
for (i = 0; i < priv->ds->num_ports; i++) {
+ if (i == port)
+ continue;
if (dsa_is_user_port(ds, i) &&
dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) {
all_user_ports_removed = false;
@@ -1652,13 +1704,9 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
* the CPU port get out of VLAN filtering mode.
*/
if (all_user_ports_removed) {
- struct dsa_port *dp = dsa_to_port(ds, port);
- struct dsa_port *cpu_dp = dp->cpu_dp;
-
- mt7530_write(priv, MT7530_PCR_P(cpu_dp->index),
- PCR_MATRIX(dsa_user_ports(priv->ds)));
- mt7530_write(priv, MT7530_PVC_P(cpu_dp->index), PORT_SPEC_TAG
- | PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
+ mutex_lock(&priv->reg_mutex);
+ mt7530_setup_vlan0(priv);
+ mutex_unlock(&priv->reg_mutex);
}
}
@@ -1847,33 +1895,6 @@ mt7530_port_mdb_del(struct dsa_switch *ds, int port,
}
static int
-mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid)
-{
- struct mt7530_dummy_poll p;
- u32 val;
- int ret;
-
- val = VTCR_BUSY | VTCR_FUNC(cmd) | vid;
- mt7530_write(priv, MT7530_VTCR, val);
-
- INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_VTCR);
- ret = readx_poll_timeout(_mt7530_read, &p, val,
- !(val & VTCR_BUSY), 20, 20000);
- if (ret < 0) {
- dev_err(priv->dev, "poll timeout\n");
- return ret;
- }
-
- val = mt7530_read(priv, MT7530_VTCR);
- if (val & VTCR_INVALID) {
- dev_err(priv->dev, "read VTCR invalid\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering,
struct netlink_ext_ack *extack)
{
@@ -1978,21 +1999,6 @@ mt7530_hw_vlan_update(struct mt7530_priv *priv, u16 vid,
}
static int
-mt7530_setup_vlan0(struct mt7530_priv *priv)
-{
- u32 val;
-
- /* Validate the entry with independent learning, keep the original
- * ingress tag attribute.
- */
- val = IVL_MAC | EG_CON | PORT_MEM(MT7530_ALL_MEMBERS) | FID(FID_BRIDGED) |
- VLAN_VALID;
- mt7530_write(priv, MT7530_VAWD1, val);
-
- return mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, 0);
-}
-
-static int
mt7530_port_vlan_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
@@ -2004,9 +2010,18 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port,
mutex_lock(&priv->reg_mutex);
+ /* VID 0 is managed exclusively by mt7530_setup_vlan0() for
+ * VLAN-unaware bridge operation. Don't let the bridge overwrite
+ * its EG_CON flag with VTAG_EN and corrupt PORT_MEM.
+ */
+ if (vlan->vid == 0)
+ goto skip_vlan_table;
+
mt7530_hw_vlan_entry_init(&new_entry, port, untagged);
mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add);
+skip_vlan_table:
+
if (pvid) {
priv->ports[port].pvid = vlan->vid;
@@ -2046,10 +2061,15 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
mutex_lock(&priv->reg_mutex);
+ /* VID 0 is managed exclusively by mt7530_setup_vlan0(). */
+ if (vlan->vid == 0)
+ goto skip_vlan_table;
+
mt7530_hw_vlan_entry_init(&target_entry, port, 0);
mt7530_hw_vlan_update(priv, vlan->vid, &target_entry,
mt7530_hw_vlan_del);
+skip_vlan_table:
/* PVID is being restored to the default whenever the PVID port
* is being removed from the VLAN.
*/
@@ -2427,7 +2447,10 @@ mt7530_setup(struct dsa_switch *ds)
}
ds->assisted_learning_on_cpu_port = true;
+ ds->untag_vlan_aware_bridge_pvid = true;
ds->mtu_enforcement_ingress = true;
+ ds->ageing_time_min = 2 * 1000;
+ ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000;
if (priv->id == ID_MT7530) {
regulator_set_voltage(priv->core_pwr, 1000000, 1000000);
@@ -2616,7 +2639,10 @@ mt7531_setup_common(struct dsa_switch *ds)
int ret, i;
ds->assisted_learning_on_cpu_port = true;
+ ds->untag_vlan_aware_bridge_pvid = true;
ds->mtu_enforcement_ingress = true;
+ ds->ageing_time_min = 2 * 1000;
+ ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000;
mt753x_trap_frames(priv);
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
new file mode 100644
index 000000000000..f23be7425daf
--- /dev/null
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -0,0 +1,1543 @@
+// SPDX-License-Identifier: GPL-2.0
+/* 3c509.c: A 3c509 EtherLink3 ethernet driver for linux. */
+/*
+ * Written 1993-2000 by Donald Becker.
+ *
+ * Copyright 1994-2000 by Donald Becker.
+ * Copyright 1993 United States Government as represented by the
+ * Director, National Security Agency. This software may be used and
+ * distributed according to the terms of the GNU General Public License,
+ * incorporated herein by reference.
+ *
+ * This driver is for the 3Com EtherLinkIII series.
+ *
+ * The author may be reached as becker@scyld.com, or C/O
+ * Scyld Computing Corporation
+ * 410 Severn Ave., Suite 210
+ * Annapolis MD 21403
+ *
+ * Known limitations:
+ * Because of the way 3c509 ISA detection works it's difficult to predict
+ * a priori which of several ISA-mode cards will be detected first.
+ *
+ * This driver does not use predictive interrupt mode, resulting in higher
+ * packet latency but lower overhead. If interrupts are disabled for an
+ * unusually long time it could also result in missed packets, but in
+ * practice this rarely happens.
+ *
+ *
+ * FIXES:
+ * Alan Cox: Removed the 'Unexpected interrupt' bug.
+ * Michael Meskes: Upgraded to Donald Becker's version 1.07.
+ * Alan Cox: Increased the eeprom delay. Regardless of
+ * what the docs say some people definitely
+ * get problems with lower (but in card spec)
+ * delays.
+ * v1.10 4/21/97 Fixed module code so that multiple cards may be
+ * detected, other cleanups. -djb
+ * Andrea Arcangeli: Upgraded to Donald Becker's version 1.12.
+ * Rick Payne: Fixed SMP race condition.
+ * v1.13 9/8/97 Made 'max_interrupt_work' an insmod-settable
+ * variable. -djb
+ * v1.14 10/15/97 Avoided waiting..discard message for fast
+ * machines. -djb
+ * v1.15 1/31/98 Faster recovery for Tx errors. -djb
+ * v1.16 2/3/98 Different ID port handling to avoid sound
+ * cards. -djb
+ * v1.18 12Mar2001 Andrew Morton
+ * - Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz)
+ * - Reviewed against 1.18 from scyld.com
+ * v1.18a 17Nov2001 Jeff Garzik <jgarzik@pobox.com>
+ * - ethtool support.
+ * v1.18b 1Mar2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ * - Power Management support.
+ * v1.18c 1Mar2002 David Ruggiero <jdr@farfalle.com>
+ * - Full duplex support.
+ * v1.19 16Oct2002 Zwane Mwaikambo <zwane@linuxpower.ca>
+ * - Additional ethtool features.
+ * v1.19a 28Oct2002 David Ruggiero <jdr@farfalle.com>
+ * - Increase *read_eeprom udelay to workaround oops with
+ * 2 cards.
+ * v1.19b 08Nov2002 Marc Zyngier <maz@wild-wind.fr.eu.org>
+ * - Introduce driver model for EISA cards.
+ * v1.20 04Feb2008 Ondrej Zary <linux@rainbow-software.org>
+ * - convert to isa_driver and pnp_driver and some
+ * cleanups.
+ */
+
+#define DRV_NAME "3c509"
+
+/* A few values that may be tweaked. */
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT (400 * HZ / 1000)
+
+#include <linux/bitops.h>
+#include <linux/delay.h> /* for udelay() */
+#include <linux/device.h>
+#include <linux/eisa.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/isa.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pm.h>
+#include <linux/pnp.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+
+#include <asm/irq.h>
+
+#ifdef EL3_DEBUG
+static int el3_debug = EL3_DEBUG;
+#else
+static int el3_debug = 2;
+#endif
+
+/* Used to do a global count of all the cards in the system. Must be
+ * a global variable so that the eisa probe routines can increment it.
+ */
+static int el3_cards;
+#define EL3_MAX_CARDS 8
+
+/* To minimize the size of the driver source I only define operating
+ * constants if they are used several times. You'll need the manual
+ * anyway if you want to understand driver details.
+ */
+/* Offsets from base I/O address. */
+#define EL3_DATA 0x00
+#define EL3_CMD 0x0e
+#define EL3_STATUS 0x0e
+#define EEPROM_READ 0x80
+
+#define EL3_IO_EXTENT 16
+
+#define EL3WINDOW(win_num) outw(SELECT_WINDOW + (win_num), ioaddr + EL3_CMD)
+
+/* The top five bits written to EL3_CMD are a command, the lower
+ * 11 bits are the parameter, if applicable.
+ */
+enum c509cmd {
+ TOTAL_RESET = 0 << 11,
+ SELECT_WINDOW = 1 << 11,
+ START_COAX = 2 << 11,
+ RX_DISABLE = 3 << 11,
+ RX_ENABLE = 4 << 11,
+ RX_RESET = 5 << 11,
+ RX_DISCARD = 8 << 11,
+ TX_ENABLE = 9 << 11,
+ TX_DISABLE = 10 << 11,
+ TX_RESET = 11 << 11,
+ FAKE_INTR = 12 << 11,
+ ACK_INTR = 13 << 11,
+ SET_INTR_ENB = 14 << 11,
+ SET_STATUS_ENB = 15 << 11,
+ SET_RX_FILTER = 16 << 11,
+ SET_RX_THRESHOLD = 17 << 11,
+ SET_TX_THRESHOLD = 18 << 11,
+ SET_TX_START = 19 << 11,
+ STATS_ENABLE = 21 << 11,
+ STATS_DISABLE = 22 << 11,
+ STOP_COAX = 23 << 11,
+ POWER_UP = 27 << 11,
+ POWER_DOWN = 28 << 11,
+ POWER_AUTO = 29 << 11,
+};
+
+enum c509status {
+ INT_LATCH = 0x0001,
+ ADAPTER_FAILURE = 0x0002,
+ TX_COMPLETE = 0x0004,
+ TX_AVAILABLE = 0x0008,
+ RX_COMPLETE = 0x0010,
+ RX_EARLY = 0x0020,
+ INT_REQ = 0x0040,
+ STATS_FULL = 0x0080,
+ CMD_BUSY = 0x1000,
+};
+
+/* The SET_RX_FILTER command accepts the following classes: */
+enum rx_filter {
+ RX_STATION = 1,
+ RX_MULTICAST = 2,
+ RX_BROADCAST = 4,
+ RX_PROM = 8,
+};
+
+/* Register window 1 offsets, the window used in normal operation. */
+#define TX_FIFO 0x00
+#define RX_FIFO 0x00
+#define RX_STATUS 0x08
+#define TX_STATUS 0x0B
+#define TX_FREE 0x0C /* Remaining free bytes in Tx buffer. */
+
+#define WN0_CONF_CTRL 0x04 /* Window 0: Configuration control register. */
+#define WN0_ADDR_CONF 0x06 /* Window 0: Address configuration register. */
+#define WN0_IRQ 0x08 /* Window 0: Set IRQ line in bits 12-15. */
+#define WN4_MEDIA 0x0A /* Window 4: Various transcvr/media bits. */
+#define MEDIA_TP 0x00C0 /* Enable link beat and jabber for 10baseT. */
+#define WN4_NETDIAG 0x06 /* Window 4: Net diagnostic. */
+#define FD_ENABLE 0x8000 /* Enable full-duplex ("external loopback"). */
+
+/*
+ * Must be a power of two (we use a binary and in the
+ * circular queue).
+ */
+#define SKB_QUEUE_SIZE 64
+
+enum el3_cardtype { EL3_ISA, EL3_PNP, EL3_EISA };
+
+struct el3_private {
+ /* for device access */
+ spinlock_t lock;
+ /* skb send-queue */
+ int head, size;
+ struct sk_buff *queue[SKB_QUEUE_SIZE];
+ enum el3_cardtype type;
+};
+
+static int id_port;
+static int current_tag;
+static struct net_device *el3_devs[EL3_MAX_CARDS];
+
+/* Parameters that may be passed into the module. */
+static int debug = -1;
+static int irq[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static int max_interrupt_work = 10;
+#ifdef CONFIG_PNP
+static int nopnp;
+#endif
+
+static int el3_common_init(struct net_device *dev);
+static void el3_common_remove(struct net_device *dev);
+static ushort id_read_eeprom(int index);
+static ushort read_eeprom(int ioaddr, int index);
+static int el3_open(struct net_device *dev);
+static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static irqreturn_t el3_interrupt(int irq, void *dev_id);
+static void update_stats(struct net_device *dev);
+static struct net_device_stats *el3_get_stats(struct net_device *dev);
+static int el3_rx(struct net_device *dev);
+static int el3_close(struct net_device *dev);
+static void set_multicast_list(struct net_device *dev);
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue);
+static void el3_down(struct net_device *dev);
+static void el3_up(struct net_device *dev);
+static const struct ethtool_ops ethtool_ops;
+#ifdef CONFIG_PM
+static int el3_suspend(struct device *, pm_message_t);
+static int el3_resume(struct device *);
+#else
+#define el3_suspend NULL
+#define el3_resume NULL
+#endif
+
+/* Generic device remove for all device types. */
+static int el3_device_remove(struct device *device);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void el3_poll_controller(struct net_device *dev);
+#endif
+
+/* Return 0 on success, 1 on error, 2 when found already detected PnP card. */
+static int el3_isa_id_sequence(__be16 *phys_addr)
+{
+ short lrs_state = 0xff;
+ int i;
+
+ /* ISA boards are detected by sending the ID sequence to the
+ * ID_PORT. We find cards past the first by setting the 'current_tag'
+ * on cards as they are found. Cards with their tag set will not
+ * respond to subsequent ID sequences.
+ */
+ outb(0x00, id_port);
+ outb(0x00, id_port);
+ for (i = 0; i < 255; i++) {
+ outb(lrs_state, id_port);
+ lrs_state <<= 1;
+ lrs_state = lrs_state & 0x100 ? lrs_state ^ 0xcf : lrs_state;
+ }
+ /* For the first probe, clear all board's tag registers. */
+ if (current_tag == 0)
+ outb(0xd0, id_port);
+ else /* Otherwise kill off already-found boards. */
+ outb(0xd8, id_port);
+ if (id_read_eeprom(7) != 0x6d50)
+ return 1;
+ /* Read in EEPROM data, which does contention-select.
+ * Only the lowest address board will stay "on-line".
+ * 3Com got the byte order backwards.
+ */
+ for (i = 0; i < 3; i++)
+ phys_addr[i] = htons(id_read_eeprom(i));
+#ifdef CONFIG_PNP
+ if (!nopnp) {
+ /* The ISA PnP 3c509 cards respond to the ID sequence too.
+ * This check is needed in order not to register them twice.
+ */
+ for (i = 0; i < el3_cards; i++) {
+ struct el3_private *lp = netdev_priv(el3_devs[i]);
+
+ if (lp->type == EL3_PNP &&
+ ether_addr_equal((u8 *)phys_addr,
+ el3_devs[i]->dev_addr)) {
+ if (el3_debug > 3)
+ pr_debug("3c509 with address %02x %02x %02x %02x %02x %02x was found by ISAPnP\n",
+ phys_addr[0] & 0xff,
+ phys_addr[0] >> 8,
+ phys_addr[1] & 0xff,
+ phys_addr[1] >> 8,
+ phys_addr[2] & 0xff,
+ phys_addr[2] >> 8);
+ /* Set the adaptor tag so that the next card
+ * can be found.
+ */
+ outb(0xd0 + ++current_tag, id_port);
+ return 2;
+ }
+ }
+ }
+#endif /* CONFIG_PNP */
+ return 0;
+}
+
+static void el3_dev_fill(struct net_device *dev, __be16 *phys_addr, int ioaddr,
+ int irq, int if_port, enum el3_cardtype type)
+{
+ struct el3_private *lp = netdev_priv(dev);
+
+ eth_hw_addr_set(dev, (u8 *)phys_addr);
+ dev->base_addr = ioaddr;
+ dev->irq = irq;
+ dev->if_port = if_port;
+ lp->type = type;
+}
+
+static int el3_isa_match(struct device *pdev, unsigned int ndev)
+{
+ int ioaddr, isa_irq, if_port, err;
+ struct net_device *dev;
+ unsigned int iobase;
+ __be16 phys_addr[3];
+
+ while ((err = el3_isa_id_sequence(phys_addr)) == 2)
+ ; /* Skip to next card when PnP card found */
+ if (err == 1)
+ return 0;
+
+ iobase = id_read_eeprom(8);
+ if_port = iobase >> 14;
+ ioaddr = 0x200 + ((iobase & 0x1f) << 4);
+ if (irq[el3_cards] > 1 && irq[el3_cards] < 16)
+ isa_irq = irq[el3_cards];
+ else
+ isa_irq = id_read_eeprom(9) >> 12;
+
+ dev = alloc_etherdev(sizeof(struct el3_private));
+ if (!dev)
+ return -ENOMEM;
+
+ SET_NETDEV_DEV(dev, pdev);
+
+ if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) {
+ free_netdev(dev);
+ return 0;
+ }
+
+ /* Set the adaptor tag so that the next card can be found. */
+ outb(0xd0 + ++current_tag, id_port);
+
+ /* Activate the adaptor at the EEPROM location. */
+ outb((ioaddr >> 4) | 0xe0, id_port);
+
+ EL3WINDOW(0);
+ if (inw(ioaddr) != 0x6d50) {
+ free_netdev(dev);
+ return 0;
+ }
+
+ /* Free the interrupt so that some other card can use it. */
+ outw(0x0f00, ioaddr + WN0_IRQ);
+
+ el3_dev_fill(dev, phys_addr, ioaddr, isa_irq, if_port, EL3_ISA);
+ dev_set_drvdata(pdev, dev);
+ if (el3_common_init(dev)) {
+ free_netdev(dev);
+ return 0;
+ }
+
+ el3_devs[el3_cards++] = dev;
+ return 1;
+}
+
+static void el3_isa_remove(struct device *pdev, unsigned int ndev)
+{
+ el3_device_remove(pdev);
+ dev_set_drvdata(pdev, NULL);
+}
+
+#ifdef CONFIG_PM
+static int el3_isa_suspend(struct device *dev, unsigned int n,
+ pm_message_t state)
+{
+ current_tag = 0;
+ return el3_suspend(dev, state);
+}
+
+static int el3_isa_resume(struct device *dev, unsigned int n)
+{
+ struct net_device *ndev = dev_get_drvdata(dev);
+ int ioaddr = ndev->base_addr, err;
+ __be16 phys_addr[3];
+
+ while ((err = el3_isa_id_sequence(phys_addr)) == 2)
+ ; /* Skip to next card when PnP card found */
+ if (err == 1)
+ return 0;
+ /* Set the adaptor tag so that the next card can be found. */
+ outb(0xd0 + ++current_tag, id_port);
+ /* Enable the card */
+ outb((ioaddr >> 4) | 0xe0, id_port);
+ EL3WINDOW(0);
+ if (inw(ioaddr) != 0x6d50)
+ return 1;
+ /* Free the interrupt so that some other card can use it. */
+ outw(0x0f00, ioaddr + WN0_IRQ);
+ return el3_resume(dev);
+}
+#endif
+
+static struct isa_driver el3_isa_driver = {
+ .match = el3_isa_match,
+ .remove = el3_isa_remove,
+#ifdef CONFIG_PM
+ .suspend = el3_isa_suspend,
+ .resume = el3_isa_resume,
+#endif
+ .driver = {
+ .name = "3c509"
+ },
+};
+
+static int isa_registered;
+
+#ifdef CONFIG_PNP
+static const struct pnp_device_id el3_pnp_ids[] = {
+ { .id = "TCM5090" }, /* 3Com Etherlink III (TP) */
+ { .id = "TCM5091" }, /* 3Com Etherlink III */
+ { .id = "TCM5094" }, /* 3Com Etherlink III (combo) */
+ { .id = "TCM5095" }, /* 3Com Etherlink III (TPO) */
+ { .id = "TCM5098" }, /* 3Com Etherlink III (TPC) */
+ { .id = "PNP80f7" }, /* 3Com Etherlink III compatible */
+ { .id = "PNP80f8" }, /* 3Com Etherlink III compatible */
+ { .id = "" }
+};
+MODULE_DEVICE_TABLE(pnp, el3_pnp_ids);
+
+static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id)
+{
+ struct net_device *dev = NULL;
+ int ioaddr, irq, if_port;
+ __be16 phys_addr[3];
+ short i;
+ int err;
+
+ ioaddr = pnp_port_start(pdev, 0);
+ if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-pnp"))
+ return -EBUSY;
+ irq = pnp_irq(pdev, 0);
+ EL3WINDOW(0);
+ for (i = 0; i < 3; i++)
+ phys_addr[i] = htons(read_eeprom(ioaddr, i));
+ if_port = read_eeprom(ioaddr, 8) >> 14;
+ dev = alloc_etherdev(sizeof(struct el3_private));
+ if (!dev) {
+ release_region(ioaddr, EL3_IO_EXTENT);
+ return -ENOMEM;
+ }
+ SET_NETDEV_DEV(dev, &pdev->dev);
+
+ el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP);
+ pnp_set_drvdata(pdev, dev);
+ err = el3_common_init(dev);
+
+ if (err) {
+ pnp_set_drvdata(pdev, NULL);
+ free_netdev(dev);
+ return err;
+ }
+
+ el3_devs[el3_cards++] = dev;
+ return 0;
+}
+
+static void el3_pnp_remove(struct pnp_dev *pdev)
+{
+ el3_common_remove(pnp_get_drvdata(pdev));
+ pnp_set_drvdata(pdev, NULL);
+}
+
+#ifdef CONFIG_PM
+static int el3_pnp_suspend(struct pnp_dev *pdev, pm_message_t state)
+{
+ return el3_suspend(&pdev->dev, state);
+}
+
+static int el3_pnp_resume(struct pnp_dev *pdev)
+{
+ return el3_resume(&pdev->dev);
+}
+#endif
+
+static struct pnp_driver el3_pnp_driver = {
+ .name = "3c509",
+ .id_table = el3_pnp_ids,
+ .probe = el3_pnp_probe,
+ .remove = el3_pnp_remove,
+#ifdef CONFIG_PM
+ .suspend = el3_pnp_suspend,
+ .resume = el3_pnp_resume,
+#endif
+};
+
+static int pnp_registered;
+#endif /* CONFIG_PNP */
+
+#ifdef CONFIG_EISA
+static const struct eisa_device_id el3_eisa_ids[] = {
+ { "TCM5090" },
+ { "TCM5091" },
+ { "TCM5092" },
+ { "TCM5093" },
+ { "TCM5094" },
+ { "TCM5095" },
+ { "TCM5098" },
+ { "" }
+};
+MODULE_DEVICE_TABLE(eisa, el3_eisa_ids);
+
+static int el3_eisa_probe(struct device *device);
+
+static struct eisa_driver el3_eisa_driver = {
+ .id_table = el3_eisa_ids,
+ .driver = {
+ .name = "3c579",
+ .probe = el3_eisa_probe,
+ .remove = el3_device_remove,
+ .suspend = el3_suspend,
+ .resume = el3_resume,
+ }
+};
+
+static int eisa_registered;
+#endif
+
+static const struct net_device_ops netdev_ops = {
+ .ndo_open = el3_open,
+ .ndo_stop = el3_close,
+ .ndo_start_xmit = el3_start_xmit,
+ .ndo_get_stats = el3_get_stats,
+ .ndo_set_rx_mode = set_multicast_list,
+ .ndo_tx_timeout = el3_tx_timeout,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = el3_poll_controller,
+#endif
+};
+
+static int el3_common_init(struct net_device *dev)
+{
+ static const char *const if_names[] = {
+ "10baseT", "AUI", "undefined", "BNC"
+ };
+ struct el3_private *lp = netdev_priv(dev);
+ int err;
+
+ spin_lock_init(&lp->lock);
+
+ if (dev->mem_start & 0x05) { /* xcvr codes 1/3/4/12 */
+ dev->if_port = (dev->mem_start & 0x0f);
+ } else { /* xcvr codes 0/8 */
+ /* use eeprom value, but save user's full-duplex selection */
+ dev->if_port |= (dev->mem_start & 0x08);
+ }
+
+ /* The EL3-specific entries in the device structure. */
+ dev->netdev_ops = &netdev_ops;
+ dev->watchdog_timeo = TX_TIMEOUT;
+ dev->ethtool_ops = &ethtool_ops;
+
+ err = register_netdev(dev);
+ if (err) {
+ pr_err("Failed to register 3c5x9 at %#3.3lx, IRQ %d.\n",
+ dev->base_addr, dev->irq);
+ release_region(dev->base_addr, EL3_IO_EXTENT);
+ return err;
+ }
+
+ pr_info("%s: 3c5x9 found at %#3.3lx, %s port, address %pM, IRQ %d.\n",
+ dev->name, dev->base_addr, if_names[(dev->if_port & 0x03)],
+ dev->dev_addr, dev->irq);
+
+ return 0;
+}
+
+static void el3_common_remove(struct net_device *dev)
+{
+ unregister_netdev(dev);
+ release_region(dev->base_addr, EL3_IO_EXTENT);
+ free_netdev(dev);
+}
+
+#ifdef CONFIG_EISA
+static int el3_eisa_probe(struct device *device)
+{
+ struct net_device *dev = NULL;
+ struct eisa_device *edev;
+ int ioaddr, irq, if_port;
+ __be16 phys_addr[3];
+ short i;
+ int err;
+
+ /* Yeepee, The driver framework is calling us ! */
+ edev = to_eisa_device(device);
+ ioaddr = edev->base_addr;
+
+ if (!request_region(ioaddr, EL3_IO_EXTENT, "3c579-eisa"))
+ return -EBUSY;
+
+ /* Change the register set to the configuration window 0. */
+ outw(SELECT_WINDOW | 0, ioaddr + 0xC80 + EL3_CMD);
+
+ irq = inw(ioaddr + WN0_IRQ) >> 12;
+ if_port = inw(ioaddr + 6) >> 14;
+ for (i = 0; i < 3; i++)
+ phys_addr[i] = htons(read_eeprom(ioaddr, i));
+
+ /* Restore the "Product ID" to the EEPROM read register. */
+ read_eeprom(ioaddr, 3);
+
+ dev = alloc_etherdev(sizeof(struct el3_private));
+ if (!dev) {
+ release_region(ioaddr, EL3_IO_EXTENT);
+ return -ENOMEM;
+ }
+
+ SET_NETDEV_DEV(dev, device);
+
+ el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA);
+ eisa_set_drvdata(edev, dev);
+ err = el3_common_init(dev);
+
+ if (err) {
+ eisa_set_drvdata(edev, NULL);
+ free_netdev(dev);
+ return err;
+ }
+
+ el3_devs[el3_cards++] = dev;
+ return 0;
+}
+#endif
+
+/* This remove works for all device types.
+ *
+ * The net dev must be stored in the driver data field.
+ */
+static int el3_device_remove(struct device *device)
+{
+ struct net_device *dev;
+
+ dev = dev_get_drvdata(device);
+
+ el3_common_remove(dev);
+ return 0;
+}
+
+/* Read a word from the EEPROM using the regular EEPROM access register.
+ * Assume that we are in register window zero.
+ */
+static ushort read_eeprom(int ioaddr, int index)
+{
+ outw(EEPROM_READ + index, ioaddr + 10);
+ /* Pause for at least 162 us for the read to take place.
+ * Some chips seem to require much longer.
+ */
+ mdelay(2);
+ return inw(ioaddr + 12);
+}
+
+/* Read a word from the EEPROM when in the ISA ID probe state. */
+static ushort id_read_eeprom(int index)
+{
+ int bit, word = 0;
+
+ /* Issue read command, and pause for at least 162 us for it to
+ * complete. Assume extra-fast 16MHz bus.
+ */
+ outb(EEPROM_READ + index, id_port);
+
+ /* Pause for at least 162 us for the read to take place.
+ * Some chips seem to require much longer.
+ */
+ mdelay(4);
+
+ for (bit = 15; bit >= 0; bit--)
+ word = (word << 1) + (inb(id_port) & 0x01);
+
+ if (el3_debug > 3)
+ pr_debug(" 3c509 EEPROM word %d %#4.4x.\n", index, word);
+
+ return word;
+}
+
+static int el3_open(struct net_device *dev)
+{
+ int ioaddr = dev->base_addr;
+ int i;
+
+ outw(TX_RESET, ioaddr + EL3_CMD);
+ outw(RX_RESET, ioaddr + EL3_CMD);
+ outw(SET_STATUS_ENB | 0x00, ioaddr + EL3_CMD);
+
+ i = request_irq(dev->irq, el3_interrupt, 0, dev->name, dev);
+ if (i)
+ return i;
+
+ EL3WINDOW(0);
+ if (el3_debug > 3)
+ pr_debug("%s: Opening, IRQ %d status@%x %4.4x.\n",
+ dev->name, dev->irq,
+ ioaddr + EL3_STATUS, inw(ioaddr + EL3_STATUS));
+
+ el3_up(dev);
+
+ if (el3_debug > 3)
+ pr_debug("%s: Opened 3c509 IRQ %d status %4.4x.\n",
+ dev->name, dev->irq, inw(ioaddr + EL3_STATUS));
+
+ return 0;
+}
+
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ int ioaddr = dev->base_addr;
+
+ /* Transmitter timeout, serious problems. */
+ pr_warn("%s: transmit timed out, Tx_status %2.2x status %4.4x Tx FIFO room %d\n",
+ dev->name, inb(ioaddr + TX_STATUS), inw(ioaddr + EL3_STATUS),
+ inw(ioaddr + TX_FREE));
+ dev->stats.tx_errors++;
+ netif_trans_update(dev); /* prevent tx timeout */
+ /* Issue TX_RESET and TX_START commands. */
+ outw(TX_RESET, ioaddr + EL3_CMD);
+ outw(TX_ENABLE, ioaddr + EL3_CMD);
+ netif_wake_queue(dev);
+}
+
+static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct el3_private *lp = netdev_priv(dev);
+ int ioaddr = dev->base_addr;
+ unsigned long flags;
+
+ netif_stop_queue(dev);
+
+ dev->stats.tx_bytes += skb->len;
+
+ if (el3_debug > 4) {
+ pr_debug("%s: el3_start_xmit(length = %u) called, status %4.4x.\n",
+ dev->name, skb->len, inw(ioaddr + EL3_STATUS));
+ }
+ /*
+ * We lock the driver against other processors. Note
+ * we don't need to lock versus the IRQ as we suspended
+ * that. This means that we lose the ability to take
+ * an RX during a TX upload. That sucks a bit with SMP
+ * on an original 3c509 (2K buffer).
+ *
+ * Using disable_irq stops us crapping on other
+ * time sensitive devices.
+ */
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ /* Put out the doubleword header... */
+ outw(skb->len, ioaddr + TX_FIFO);
+ outw(0x00, ioaddr + TX_FIFO);
+ /* ... and the packet rounded to a doubleword. */
+ outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
+
+ if (inw(ioaddr + TX_FREE) > 1536) {
+ netif_start_queue(dev);
+ } else {
+ /* Interrupt us when the FIFO has room for max-sized packet. */
+ outw(SET_TX_THRESHOLD + 1536, ioaddr + EL3_CMD);
+ }
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ dev_consume_skb_any(skb);
+
+ /* Clear the Tx status stack. */
+ {
+ short tx_status;
+ int i = 4;
+
+ while (--i > 0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) {
+ if (tx_status & 0x38)
+ dev->stats.tx_aborted_errors++;
+ if (tx_status & 0x30)
+ outw(TX_RESET, ioaddr + EL3_CMD);
+ if (tx_status & 0x3C)
+ outw(TX_ENABLE, ioaddr + EL3_CMD);
+ /* Pop the status stack. */
+ outb(0x00, ioaddr + TX_STATUS);
+ }
+ }
+ return NETDEV_TX_OK;
+}
+
+/* The EL3 interrupt handler. */
+static irqreturn_t el3_interrupt(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ int i = max_interrupt_work;
+ struct el3_private *lp;
+ int ioaddr, status;
+
+ lp = netdev_priv(dev);
+ spin_lock(&lp->lock);
+
+ ioaddr = dev->base_addr;
+
+ if (el3_debug > 4) {
+ status = inw(ioaddr + EL3_STATUS);
+ pr_debug("%s: interrupt, status %4.4x.\n", dev->name, status);
+ }
+
+ while ((status = inw(ioaddr + EL3_STATUS)) &
+ (INT_LATCH | RX_COMPLETE | STATS_FULL)) {
+
+ if (status & RX_COMPLETE)
+ el3_rx(dev);
+
+ if (status & TX_AVAILABLE) {
+ if (el3_debug > 5)
+ pr_debug(" TX room bit was handled.\n");
+ /* There's room in the FIFO for a full-sized packet. */
+ outw(ACK_INTR | TX_AVAILABLE, ioaddr + EL3_CMD);
+ netif_wake_queue(dev);
+ }
+ if (status &
+ (ADAPTER_FAILURE | RX_EARLY | STATS_FULL | TX_COMPLETE)) {
+ /* Handle all uncommon interrupts. */
+ if (status & STATS_FULL) {
+ /* Empty statistics. */
+ update_stats(dev);
+ }
+ if (status & RX_EARLY) {
+ /* Rx early is unused. */
+ el3_rx(dev);
+ outw(ACK_INTR | RX_EARLY, ioaddr + EL3_CMD);
+ }
+ if (status & TX_COMPLETE) {
+ /* Really Tx error. */
+ short tx_status;
+ int i = 4;
+
+ while (--i > 0 &&
+ ((tx_status = inb(ioaddr + TX_STATUS))
+ > 0)) {
+ if (tx_status & 0x38)
+ dev->stats.tx_aborted_errors++;
+ if (tx_status & 0x30)
+ outw(TX_RESET,
+ ioaddr + EL3_CMD);
+ if (tx_status & 0x3C)
+ outw(TX_ENABLE,
+ ioaddr + EL3_CMD);
+ /* Pop the status stack. */
+ outb(0x00, ioaddr + TX_STATUS);
+ }
+ }
+ if (status & ADAPTER_FAILURE) {
+ /* Adapter failure requires Rx reset
+ * and reinit.
+ */
+ outw(RX_RESET, ioaddr + EL3_CMD);
+ /* Set the Rx filter to the current state. */
+ outw((SET_RX_FILTER | RX_STATION |
+ RX_BROADCAST |
+ (dev->flags & IFF_ALLMULTI ?
+ RX_MULTICAST : 0) |
+ (dev->flags & IFF_PROMISC ?
+ RX_PROM : 0)),
+ ioaddr + EL3_CMD);
+ /* Re-enable the receiver. */
+ outw(RX_ENABLE, ioaddr + EL3_CMD);
+ outw(ACK_INTR | ADAPTER_FAILURE,
+ ioaddr + EL3_CMD);
+ }
+ }
+
+ if (--i < 0) {
+ pr_err("%s: Infinite loop in interrupt, status %4.4x.\n",
+ dev->name, status);
+ /* Clear all interrupts. */
+ outw(ACK_INTR | 0xFF, ioaddr + EL3_CMD);
+ break;
+ }
+ /* Acknowledge the IRQ. */
+ outw(ACK_INTR | INT_REQ | INT_LATCH, ioaddr + EL3_CMD);
+ }
+
+ if (el3_debug > 4) {
+ pr_debug("%s: exiting interrupt, status %4.4x.\n", dev->name,
+ inw(ioaddr + EL3_STATUS));
+ }
+ spin_unlock(&lp->lock);
+ return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/*
+ * Polling receive - used by netconsole and other diagnostic tools
+ * to allow network i/o with interrupts disabled.
+ */
+static void el3_poll_controller(struct net_device *dev)
+{
+ disable_irq(dev->irq);
+ el3_interrupt(dev->irq, dev);
+ enable_irq(dev->irq);
+}
+#endif
+
+static struct net_device_stats *el3_get_stats(struct net_device *dev)
+{
+ struct el3_private *lp = netdev_priv(dev);
+ unsigned long flags;
+
+ /* This is fast enough not to bother with disable IRQ stuff. */
+ spin_lock_irqsave(&lp->lock, flags);
+ update_stats(dev);
+ spin_unlock_irqrestore(&lp->lock, flags);
+ return &dev->stats;
+}
+
+/* Update statistics. We change to register window 6, so this should be run
+ * single-threaded if the device is active. This is expected to be a rare
+ * operation, and it's simpler for the rest of the driver to assume that
+ * window 1 is always valid rather than use a special window-state variable.
+ */
+static void update_stats(struct net_device *dev)
+{
+ int ioaddr = dev->base_addr;
+
+ if (el3_debug > 5)
+ pr_debug(" Updating the statistics.\n");
+ /* Turn off statistics updates while reading. */
+ outw(STATS_DISABLE, ioaddr + EL3_CMD);
+ /* Switch to the stats window, and read everything. */
+ EL3WINDOW(6);
+ dev->stats.tx_carrier_errors += inb(ioaddr + 0);
+ dev->stats.tx_heartbeat_errors += inb(ioaddr + 1);
+ /* Multiple collisions. */ inb(ioaddr + 2);
+ dev->stats.collisions += inb(ioaddr + 3);
+ dev->stats.tx_window_errors += inb(ioaddr + 4);
+ dev->stats.rx_fifo_errors += inb(ioaddr + 5);
+ dev->stats.tx_packets += inb(ioaddr + 6);
+ /* Rx packets */ inb(ioaddr + 7);
+ /* Tx deferrals */ inb(ioaddr + 8);
+ inw(ioaddr + 10); /* Total Rx and Tx octets. */
+ inw(ioaddr + 12);
+
+ /* Back to window 1, and turn statistics back on. */
+ EL3WINDOW(1);
+ outw(STATS_ENABLE, ioaddr + EL3_CMD);
+}
+
+static int el3_rx(struct net_device *dev)
+{
+ int ioaddr = dev->base_addr;
+ short rx_status;
+
+ if (el3_debug > 5)
+ pr_debug(" In rx_packet(), status %4.4x, rx_status %4.4x.\n",
+ inw(ioaddr + EL3_STATUS), inw(ioaddr + RX_STATUS));
+ while ((rx_status = inw(ioaddr + RX_STATUS)) > 0) {
+ if (rx_status & 0x4000) {
+ /* Error, update stats. */
+ short error = rx_status & 0x3800;
+
+ outw(RX_DISCARD, ioaddr + EL3_CMD);
+ dev->stats.rx_errors++;
+ switch (error) {
+ case 0x0000:
+ dev->stats.rx_over_errors++;
+ break;
+ case 0x0800:
+ dev->stats.rx_length_errors++;
+ break;
+ case 0x1000:
+ dev->stats.rx_frame_errors++;
+ break;
+ case 0x1800:
+ dev->stats.rx_length_errors++;
+ break;
+ case 0x2000:
+ dev->stats.rx_frame_errors++;
+ break;
+ case 0x2800:
+ dev->stats.rx_crc_errors++; break;
+ }
+ } else {
+ short pkt_len = rx_status & 0x7ff;
+ struct sk_buff *skb;
+
+ skb = netdev_alloc_skb(dev, pkt_len + 5);
+ if (el3_debug > 4)
+ pr_debug("Receiving packet size %d status %4.4x.\n",
+ pkt_len, rx_status);
+ if (skb) {
+ /* Align IP on 16 byte. */
+ skb_reserve(skb, 2);
+
+ /* 'skb->data' points to the start of sk_buff
+ * data area.
+ */
+ insl(ioaddr + RX_FIFO, skb_put(skb, pkt_len),
+ (pkt_len + 3) >> 2);
+
+ /* Pop top Rx packet. */
+ outw(RX_DISCARD, ioaddr + EL3_CMD);
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
+ dev->stats.rx_bytes += pkt_len;
+ dev->stats.rx_packets++;
+ continue;
+ }
+ outw(RX_DISCARD, ioaddr + EL3_CMD);
+ dev->stats.rx_dropped++;
+ if (el3_debug)
+ pr_debug("%s: Couldn't allocate a sk_buff of size %d.\n",
+ dev->name, pkt_len);
+ }
+ inw(ioaddr + EL3_STATUS); /* Delay. */
+ while (inw(ioaddr + EL3_STATUS) & 0x1000)
+ pr_debug(" Waiting for 3c509 to discard packet, status %x.\n",
+ inw(ioaddr + EL3_STATUS));
+ }
+
+ return 0;
+}
+
+/* Set or clear the multicast filter for this adaptor. */
+static void set_multicast_list(struct net_device *dev)
+{
+ struct el3_private *lp = netdev_priv(dev);
+ int ioaddr = dev->base_addr;
+ int mc_count = netdev_mc_count(dev);
+ unsigned long flags;
+
+ if (el3_debug > 1) {
+ static int old;
+
+ if (old != mc_count) {
+ old = mc_count;
+ pr_debug("%s: Setting Rx mode to %d addresses.\n",
+ dev->name, mc_count);
+ }
+ }
+ spin_lock_irqsave(&lp->lock, flags);
+ if (dev->flags & IFF_PROMISC) {
+ outw((SET_RX_FILTER | RX_STATION | RX_MULTICAST |
+ RX_BROADCAST | RX_PROM),
+ ioaddr + EL3_CMD);
+ } else if (mc_count || (dev->flags & IFF_ALLMULTI)) {
+ outw(SET_RX_FILTER | RX_STATION | RX_MULTICAST | RX_BROADCAST,
+ ioaddr + EL3_CMD);
+ } else {
+ outw(SET_RX_FILTER | RX_STATION | RX_BROADCAST,
+ ioaddr + EL3_CMD);
+ }
+ spin_unlock_irqrestore(&lp->lock, flags);
+}
+
+static int el3_close(struct net_device *dev)
+{
+ struct el3_private *lp = netdev_priv(dev);
+ int ioaddr = dev->base_addr;
+
+ if (el3_debug > 2)
+ pr_debug("%s: Shutting down ethercard.\n", dev->name);
+
+ el3_down(dev);
+
+ free_irq(dev->irq, dev);
+ /* Switching back to window 0 disables the IRQ. */
+ EL3WINDOW(0);
+ if (lp->type != EL3_EISA) {
+ /* But we explicitly zero the IRQ line select anyway. Don't do
+ * it on EISA cards, it prevents the module from getting an
+ * IRQ after unload+reload...
+ */
+ outw(0x0f00, ioaddr + WN0_IRQ);
+ }
+
+ return 0;
+}
+
+static int el3_link_ok(struct net_device *dev)
+{
+ int ioaddr = dev->base_addr;
+ u16 tmp;
+
+ EL3WINDOW(4);
+ tmp = inw(ioaddr + WN4_MEDIA);
+ EL3WINDOW(1);
+ return tmp & (1 << 11);
+}
+
+static void el3_netdev_get_ecmd(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ int ioaddr = dev->base_addr;
+ u32 supported;
+ u16 tmp;
+
+ EL3WINDOW(0);
+ /* Obtain current transceiver via WN4_MEDIA? */
+ tmp = inw(ioaddr + WN0_ADDR_CONF);
+ switch (tmp >> 14) {
+ case 0:
+ cmd->base.port = PORT_TP;
+ break;
+ case 1:
+ cmd->base.port = PORT_AUI;
+ break;
+ case 3:
+ cmd->base.port = PORT_BNC;
+ break;
+ default:
+ break;
+ }
+
+ cmd->base.duplex = DUPLEX_HALF;
+ supported = 0;
+ tmp = inw(ioaddr + WN0_CONF_CTRL);
+ if (tmp & (1 << 13))
+ supported |= SUPPORTED_AUI;
+ if (tmp & (1 << 12))
+ supported |= SUPPORTED_BNC;
+ if (tmp & (1 << 9)) {
+ supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half |
+ SUPPORTED_10baseT_Full; /* hmm... */
+ EL3WINDOW(4);
+ tmp = inw(ioaddr + WN4_NETDIAG);
+ if (tmp & FD_ENABLE)
+ cmd->base.duplex = DUPLEX_FULL;
+ }
+
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
+ cmd->base.speed = SPEED_10;
+ EL3WINDOW(1);
+}
+
+static int el3_netdev_set_ecmd(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ int ioaddr = dev->base_addr;
+ u16 tmp;
+
+ if (cmd->base.speed != SPEED_10)
+ return -EINVAL;
+ if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL)
+ return -EINVAL;
+
+ /* change XCVR type */
+ EL3WINDOW(0);
+ tmp = inw(ioaddr + WN0_ADDR_CONF);
+ switch (cmd->base.port) {
+ case PORT_TP:
+ tmp &= ~(3 << 14);
+ dev->if_port = 0;
+ break;
+ case PORT_AUI:
+ tmp &= ~(3 << 14);
+ tmp |= 1 << 14;
+ dev->if_port = 1;
+ break;
+ case PORT_BNC:
+ tmp |= 3 << 14;
+ dev->if_port = 3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ outw(tmp, ioaddr + WN0_ADDR_CONF);
+ if (dev->if_port == 3) {
+ /* Fire up the DC-DC converter if BNC gets enabled. */
+ tmp = inw(ioaddr + WN0_ADDR_CONF);
+ if (tmp & (3 << 14)) {
+ outw(START_COAX, ioaddr + EL3_CMD);
+ udelay(800);
+ } else {
+ return -EIO;
+ }
+ }
+
+ EL3WINDOW(4);
+ tmp = inw(ioaddr + WN4_NETDIAG);
+ if (cmd->base.duplex == DUPLEX_FULL)
+ tmp |= FD_ENABLE;
+ else
+ tmp &= ~FD_ENABLE;
+ outw(tmp, ioaddr + WN4_NETDIAG);
+ EL3WINDOW(1);
+
+ return 0;
+}
+
+static void el3_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+}
+
+static int el3_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct el3_private *lp = netdev_priv(dev);
+
+ spin_lock_irq(&lp->lock);
+ el3_netdev_get_ecmd(dev, cmd);
+ spin_unlock_irq(&lp->lock);
+ return 0;
+}
+
+static int el3_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct el3_private *lp = netdev_priv(dev);
+ int ret;
+
+ spin_lock_irq(&lp->lock);
+ ret = el3_netdev_set_ecmd(dev, cmd);
+ spin_unlock_irq(&lp->lock);
+ return ret;
+}
+
+static u32 el3_get_link(struct net_device *dev)
+{
+ struct el3_private *lp = netdev_priv(dev);
+ u32 ret;
+
+ spin_lock_irq(&lp->lock);
+ ret = el3_link_ok(dev);
+ spin_unlock_irq(&lp->lock);
+ return ret;
+}
+
+static u32 el3_get_msglevel(struct net_device *dev)
+{
+ return el3_debug;
+}
+
+static void el3_set_msglevel(struct net_device *dev, u32 v)
+{
+ el3_debug = v;
+}
+
+static const struct ethtool_ops ethtool_ops = {
+ .get_drvinfo = el3_get_drvinfo,
+ .get_link = el3_get_link,
+ .get_msglevel = el3_get_msglevel,
+ .set_msglevel = el3_set_msglevel,
+ .get_link_ksettings = el3_get_link_ksettings,
+ .set_link_ksettings = el3_set_link_ksettings,
+};
+
+static void el3_down(struct net_device *dev)
+{
+ int ioaddr = dev->base_addr;
+
+ netif_stop_queue(dev);
+
+ /* Turn off statistics ASAP. We update lp->stats below. */
+ outw(STATS_DISABLE, ioaddr + EL3_CMD);
+
+ /* Disable the receiver and transmitter. */
+ outw(RX_DISABLE, ioaddr + EL3_CMD);
+ outw(TX_DISABLE, ioaddr + EL3_CMD);
+
+ if (dev->if_port == 3) {
+ /* Turn off thinnet power. Green! */
+ outw(STOP_COAX, ioaddr + EL3_CMD);
+ } else if (dev->if_port == 0) {
+ /* Disable link beat and jabber, if_port may change here next
+ * open().
+ */
+ EL3WINDOW(4);
+ outw(inw(ioaddr + WN4_MEDIA) & ~MEDIA_TP, ioaddr + WN4_MEDIA);
+ }
+
+ outw(SET_INTR_ENB | 0x0000, ioaddr + EL3_CMD);
+
+ update_stats(dev);
+}
+
+static void el3_up(struct net_device *dev)
+{
+ int ioaddr = dev->base_addr;
+ int i, sw_info, net_diag;
+
+ /* Activating the board required and does no harm otherwise. */
+ outw(0x0001, ioaddr + 4);
+
+ /* Set the IRQ line. */
+ outw((dev->irq << 12) | 0x0f00, ioaddr + WN0_IRQ);
+
+ /* Set the station address in window 2 each time opened. */
+ EL3WINDOW(2);
+
+ for (i = 0; i < 6; i++)
+ outb(dev->dev_addr[i], ioaddr + i);
+
+ if ((dev->if_port & 0x03) == 3) {
+ /* BNC interface */
+
+ /* Start the thinnet transceiver. We should really wait
+ * 50ms...
+ */
+ outw(START_COAX, ioaddr + EL3_CMD);
+ } else if ((dev->if_port & 0x03) == 0) {
+ /* 10baseT interface */
+
+ /* Combine secondary sw_info word (the adapter level) and
+ * primary sw_info word (duplex setting plus other useless
+ * bits).
+ */
+ EL3WINDOW(0);
+ sw_info = (read_eeprom(ioaddr, 0x14) & 0x400f) |
+ (read_eeprom(ioaddr, 0x0d) & 0xBff0);
+
+ EL3WINDOW(4);
+ net_diag = inw(ioaddr + WN4_NETDIAG);
+ /* Temporarily assume full-duplex will be set. */
+ net_diag = (net_diag | FD_ENABLE);
+ pr_info("%s: ", dev->name);
+ switch (dev->if_port & 0x0c) {
+ case 12:
+ /* Force full-duplex mode if 3c5x9b. */
+ if (sw_info & 0x000f) {
+ pr_cont("Forcing 3c5x9b full-duplex mode");
+ break;
+ }
+ fallthrough;
+ case 8:
+ /* Set full-duplex mode based on eeprom config
+ * setting.
+ */
+ if ((sw_info & 0x000f) && (sw_info & 0x8000)) {
+ pr_cont("Setting 3c5x9b full-duplex mode (from EEPROM configuration bit)");
+ break;
+ }
+ fallthrough;
+ default:
+ /* xcvr = (0 || 4) OR user has an old 3c5x9 non "B"
+ * model.
+ */
+ pr_cont("Setting 3c5x9/3c5x9B half-duplex mode");
+ /* Disable full duplex. */
+ net_diag = (net_diag & ~FD_ENABLE);
+ }
+
+ outw(net_diag, ioaddr + WN4_NETDIAG);
+ pr_cont(" if_port: %d, sw_info: %4.4x\n",
+ dev->if_port, sw_info);
+ if (el3_debug > 3)
+ pr_debug("%s: 3c5x9 net diag word is now: %4.4x.\n",
+ dev->name, net_diag);
+ /* Enable link beat and jabber check. */
+ outw(inw(ioaddr + WN4_MEDIA) | MEDIA_TP, ioaddr + WN4_MEDIA);
+ }
+
+ /* Switch to the stats window, and clear all stats by reading. */
+ outw(STATS_DISABLE, ioaddr + EL3_CMD);
+ EL3WINDOW(6);
+ for (i = 0; i < 9; i++)
+ inb(ioaddr + i);
+ inw(ioaddr + 10);
+ inw(ioaddr + 12);
+
+ /* Switch to register set 1 for normal use. */
+ EL3WINDOW(1);
+
+ /* Accept b-case and phys addr only. */
+ outw(SET_RX_FILTER | RX_STATION | RX_BROADCAST, ioaddr + EL3_CMD);
+ /* Turn on statistics. */
+ outw(STATS_ENABLE, ioaddr + EL3_CMD);
+
+ /* Enable the receiver. */
+ outw(RX_ENABLE, ioaddr + EL3_CMD);
+ /* Enable transmitter. */
+ outw(TX_ENABLE, ioaddr + EL3_CMD);
+ /* Allow status bits to be seen. */
+ outw(SET_STATUS_ENB | 0xff, ioaddr + EL3_CMD);
+ /* Ack all pending events, and set active indicator mask. */
+ outw(ACK_INTR | INT_LATCH | TX_AVAILABLE | RX_EARLY | INT_REQ,
+ ioaddr + EL3_CMD);
+ outw((SET_INTR_ENB | INT_LATCH | TX_AVAILABLE | TX_COMPLETE |
+ RX_COMPLETE | STATS_FULL),
+ ioaddr + EL3_CMD);
+
+ netif_start_queue(dev);
+}
+
+/* Power Management support functions */
+#ifdef CONFIG_PM
+
+static int el3_suspend(struct device *pdev, pm_message_t state)
+{
+ struct net_device *dev;
+ struct el3_private *lp;
+ unsigned long flags;
+ int ioaddr;
+
+ dev = dev_get_drvdata(pdev);
+ lp = netdev_priv(dev);
+ ioaddr = dev->base_addr;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (netif_running(dev))
+ netif_device_detach(dev);
+
+ el3_down(dev);
+ outw(POWER_DOWN, ioaddr + EL3_CMD);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+ return 0;
+}
+
+static int el3_resume(struct device *pdev)
+{
+ struct net_device *dev;
+ struct el3_private *lp;
+ unsigned long flags;
+ int ioaddr;
+
+ dev = dev_get_drvdata(pdev);
+ lp = netdev_priv(dev);
+ ioaddr = dev->base_addr;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ outw(POWER_UP, ioaddr + EL3_CMD);
+ EL3WINDOW(0);
+ el3_up(dev);
+
+ if (netif_running(dev))
+ netif_device_attach(dev);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+ return 0;
+}
+
+#endif /* CONFIG_PM */
+
+module_param(debug, int, 0);
+module_param_hw_array(irq, int, irq, NULL, 0);
+module_param(max_interrupt_work, int, 0);
+MODULE_PARM_DESC(debug, "debug level (0-6)");
+MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
+MODULE_PARM_DESC(max_interrupt_work, "maximum events handled per interrupt");
+#ifdef CONFIG_PNP
+module_param(nopnp, int, 0);
+MODULE_PARM_DESC(nopnp, "disable ISA PnP support (0-1)");
+#endif /* CONFIG_PNP */
+MODULE_DESCRIPTION("3Com Etherlink III (3c509, 3c509B, 3c529, 3c579) ethernet driver");
+MODULE_LICENSE("GPL");
+
+static int __init el3_init_module(void)
+{
+ int ret = 0;
+
+ if (debug >= 0)
+ el3_debug = debug;
+
+#ifdef CONFIG_PNP
+ if (!nopnp) {
+ ret = pnp_register_driver(&el3_pnp_driver);
+ if (!ret)
+ pnp_registered = 1;
+ }
+#endif
+ /* Select an open I/O location at 0x1*0 to do ISA contention select. */
+ /* Start with 0x110 to avoid some sound cards.*/
+ for (id_port = 0x110; id_port < 0x200; id_port += 0x10) {
+ if (!request_region(id_port, 1, "3c509-control"))
+ continue;
+ outb(0x00, id_port);
+ outb(0xff, id_port);
+ if (inb(id_port) & 0x01)
+ break;
+ release_region(id_port, 1);
+ }
+ if (id_port >= 0x200) {
+ id_port = 0;
+ pr_err("No I/O port available for 3c509 activation.\n");
+ } else {
+ ret = isa_register_driver(&el3_isa_driver, EL3_MAX_CARDS);
+ if (!ret)
+ isa_registered = 1;
+ }
+#ifdef CONFIG_EISA
+ ret = eisa_driver_register(&el3_eisa_driver);
+ if (!ret)
+ eisa_registered = 1;
+#endif
+
+#ifdef CONFIG_PNP
+ if (pnp_registered)
+ ret = 0;
+#endif
+ if (isa_registered)
+ ret = 0;
+#ifdef CONFIG_EISA
+ if (eisa_registered)
+ ret = 0;
+#endif
+ return ret;
+}
+
+static void __exit el3_cleanup_module(void)
+{
+#ifdef CONFIG_PNP
+ if (pnp_registered)
+ pnp_unregister_driver(&el3_pnp_driver);
+#endif
+ if (isa_registered)
+ isa_unregister_driver(&el3_isa_driver);
+ if (id_port)
+ release_region(id_port, 1);
+#ifdef CONFIG_EISA
+ if (eisa_registered)
+ eisa_driver_unregister(&el3_eisa_driver);
+#endif
+}
+
+module_init(el3_init_module);
+module_exit(el3_cleanup_module);
diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig
index 399cb6c56198..81db16744f94 100644
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig
@@ -17,6 +17,20 @@ config NET_VENDOR_3COM
if NET_VENDOR_3COM
+config EL3
+ tristate "3c509/3c579 \"EtherLink III\" support"
+ depends on (ISA || EISA)
+ help
+ If you have a network (Ethernet) card belonging to the 3Com
+ EtherLinkIII series, say Y here.
+
+ If your card is not working you may need to use the DOS
+ setup disk to disable Plug & Play mode, and to select the default
+ media type.
+
+ To compile this driver as a module, choose M here. The module
+ will be called 3c509.
+
config VORTEX
tristate "3c590/3c900 series (592/595/597) \"Vortex/Boomerang\" support"
depends on (PCI || EISA) && HAS_IOPORT_MAP
diff --git a/drivers/net/ethernet/3com/Makefile b/drivers/net/ethernet/3com/Makefile
index 5c4d07f1d456..2c65e472196f 100644
--- a/drivers/net/ethernet/3com/Makefile
+++ b/drivers/net/ethernet/3com/Makefile
@@ -3,5 +3,6 @@
# Makefile for the 3Com Ethernet device drivers
#
+obj-$(CONFIG_EL3) += 3c509.o
obj-$(CONFIG_VORTEX) += 3c59x.o
obj-$(CONFIG_TYPHOON) += typhoon.o
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index d0c0c0ec8a80..cecd66251dba 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -1793,11 +1793,8 @@ static int airoha_set_gdm2_loopback(struct airoha_gdm_port *port)
u32 val, pse_port, chan;
int i, src_port;
- /* Forward the traffic to the proper GDM port */
- pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3
- : FE_PSE_PORT_GDM4;
airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX),
- pse_port);
+ FE_PSE_PORT_DROP);
airoha_fe_clear(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX),
GDM_STRIP_CRC_MASK);
@@ -1815,6 +1812,11 @@ static int airoha_set_gdm2_loopback(struct airoha_gdm_port *port)
GDM_SHORT_LEN_MASK | GDM_LONG_LEN_MASK,
FIELD_PREP(GDM_SHORT_LEN_MASK, 60) |
FIELD_PREP(GDM_LONG_LEN_MASK, AIROHA_MAX_MTU));
+ /* Forward the traffic to the proper GDM port */
+ pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3
+ : FE_PSE_PORT_GDM4;
+ airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX),
+ pse_port);
/* Disable VIP and IFC for GDM2 */
airoha_fe_clear(eth, REG_FE_VIP_PORT_EN, BIT(AIROHA_GDM2_IDX));
diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c
index 04c5e3abd8d7..810a0cd9bcac 100644
--- a/drivers/net/ethernet/amd/pds_core/debugfs.c
+++ b/drivers/net/ethernet/amd/pds_core/debugfs.c
@@ -64,9 +64,14 @@ DEFINE_SHOW_ATTRIBUTE(identity);
void pdsc_debugfs_add_ident(struct pdsc *pdsc)
{
+ struct dentry *dentry;
+
/* This file will already exist in the reset flow */
- if (debugfs_lookup("identity", pdsc->dentry))
+ dentry = debugfs_lookup("identity", pdsc->dentry);
+ if (!IS_ERR_OR_NULL(dentry)) {
+ dput(dentry);
return;
+ }
debugfs_create_file("identity", 0400, pdsc->dentry,
pdsc, &identity_fops);
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index 2e1d0d01d03a..bded6b33289c 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -162,12 +162,19 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
dev_dbg(dev, "DEVCMD %d %s after %ld secs\n",
opcode, pdsc_devcmd_str(opcode), duration / HZ);
- if ((!done || timeout) && running) {
+ if (!running) {
+ dev_err(dev, "DEVCMD %d %s fw not running\n",
+ opcode, pdsc_devcmd_str(opcode));
+ pdsc_devcmd_clean(pdsc);
+ return -ENXIO;
+ }
+
+ if (!done || timeout) {
dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n",
opcode, pdsc_devcmd_str(opcode), done, timeout,
max_seconds);
- err = -ETIMEDOUT;
pdsc_devcmd_clean(pdsc);
+ return -ETIMEDOUT;
}
status = pdsc_devcmd_status(pdsc);
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index b576be626a29..3f0e56b951bf 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -122,12 +122,14 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names));
for (i = 0; i < listlen; i++) {
+ char *fw_ver = fw_list.fw_names[i].fw_version;
+
if (i < ARRAY_SIZE(fw_slotnames))
strscpy(buf, fw_slotnames[i], sizeof(buf));
else
snprintf(buf, sizeof(buf), "fw.slot_%d", i);
- err = devlink_info_version_stored_put(req, buf,
- fw_list.fw_names[i].fw_version);
+ fw_ver[sizeof(fw_list.fw_names[i].fw_version) - 1] = '\0';
+ err = devlink_info_version_stored_put(req, buf, fw_ver);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index a5ab99474179..4e4794c4dfdc 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1856,6 +1856,9 @@ static int ag71xx_probe(struct platform_device *pdev)
ag71xx_int_disable(ag, AG71XX_INT_POLL);
ndev->irq = platform_get_irq(pdev, 0);
+ if (ndev->irq < 0)
+ return ndev->irq;
+
err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
0x0, dev_name(&pdev->dev), ndev);
if (err) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 54f71b1e85fc..7c11cf916762 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1368,13 +1368,12 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable)
reg &= ~(TBUF_EEE_EN | TBUF_PM_EN);
bcmgenet_writel(reg, priv->base + off);
- /* Do the same for thing for RBUF */
+ /* RBUF EEE/PM can break the RX path on GENET. Keep it disabled. */
reg = bcmgenet_rbuf_readl(priv, RBUF_ENERGY_CTRL);
- if (enable)
- reg |= RBUF_EEE_EN | RBUF_PM_EN;
- else
+ if (reg & (RBUF_EEE_EN | RBUF_PM_EN)) {
reg &= ~(RBUF_EEE_EN | RBUF_PM_EN);
- bcmgenet_rbuf_writel(priv, reg, RBUF_ENERGY_CTRL);
+ bcmgenet_rbuf_writel(priv, reg, RBUF_ENERGY_CTRL);
+ }
if (!enable && priv->clk_eee_enabled) {
clk_disable_unprepare(priv->clk_eee);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 662e4fbafb74..e58cc81d199d 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -56,11 +56,21 @@ static inline u32 enetc_vsi_set_msize(u32 size)
}
#define ENETC_PSIMSGRR 0x204
-#define ENETC_PSIMSGRR_MR_MASK GENMASK(2, 1)
-#define ENETC_PSIMSGRR_MR(n) BIT((n) + 1) /* n = VSI index */
#define ENETC_PSIVMSGRCVAR0(n) (0x210 + (n) * 0x8) /* n = VSI index */
#define ENETC_PSIVMSGRCVAR1(n) (0x214 + (n) * 0x8)
+/* Message received mask, n is the active number of VSIs.
+ * It is available for ENETC_PSIMSGRR, ENETC_PSIIER, and
+ * ENETC_PSIIDR registers.
+ */
+#define ENETC_PSIMR_MASK(n) \
+ ({ typeof(n) _n = (n); (_n) ? GENMASK((_n), 1) : 0; })
+
+/* Message received bit, n is VSI index. It is available for
+ * ENETC_PSIMSGRR, ENETC_PSIIER, and ENETC_PSIIDR registers.
+ */
+#define ENETC_PSIMR_BIT(n) BIT((n) + 1)
+
#define ENETC_VSIMSGSR 0x204 /* RO */
#define ENETC_VSIMSGSR_MB BIT(0)
#define ENETC_VSIMSGSR_MS BIT(1)
@@ -94,7 +104,6 @@ static inline u32 enetc_vsi_set_msize(u32 size)
#define ENETC_SICAPR1 0x904
#define ENETC_PSIIER 0xa00
-#define ENETC_PSIIER_MR_MASK GENMASK(2, 1)
#define ENETC_PSIIDR 0xa08
#define ENETC_SITXIDR 0xa18
#define ENETC_SIRXIDR 0xa28
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_msg.c b/drivers/net/ethernet/freescale/enetc/enetc_msg.c
index 40d22ebe9224..c09635e7eb3d 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_msg.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_msg.c
@@ -3,18 +3,25 @@
#include "enetc_pf.h"
-static void enetc_msg_disable_mr_int(struct enetc_hw *hw)
+static void enetc_msg_disable_mr_int(struct enetc_pf *pf)
{
- u32 psiier = enetc_rd(hw, ENETC_PSIIER);
+ struct enetc_hw *hw = &pf->si->hw;
+ u32 psiier;
+
+ psiier = enetc_rd(hw, ENETC_PSIIER) & ~ENETC_PSIMR_MASK(pf->num_vfs);
+
/* disable MR int source(s) */
- enetc_wr(hw, ENETC_PSIIER, psiier & ~ENETC_PSIIER_MR_MASK);
+ enetc_wr(hw, ENETC_PSIIER, psiier);
}
-static void enetc_msg_enable_mr_int(struct enetc_hw *hw)
+static void enetc_msg_enable_mr_int(struct enetc_pf *pf)
{
- u32 psiier = enetc_rd(hw, ENETC_PSIIER);
+ struct enetc_hw *hw = &pf->si->hw;
+ u32 psiier;
+
+ psiier = enetc_rd(hw, ENETC_PSIIER) | ENETC_PSIMR_MASK(pf->num_vfs);
- enetc_wr(hw, ENETC_PSIIER, psiier | ENETC_PSIIER_MR_MASK);
+ enetc_wr(hw, ENETC_PSIIER, psiier);
}
static irqreturn_t enetc_msg_psi_msix(int irq, void *data)
@@ -22,7 +29,7 @@ static irqreturn_t enetc_msg_psi_msix(int irq, void *data)
struct enetc_si *si = (struct enetc_si *)data;
struct enetc_pf *pf = enetc_si_priv(si);
- enetc_msg_disable_mr_int(&si->hw);
+ enetc_msg_disable_mr_int(pf);
schedule_work(&pf->msg_task);
return IRQ_HANDLED;
@@ -31,33 +38,35 @@ static irqreturn_t enetc_msg_psi_msix(int irq, void *data)
static void enetc_msg_task(struct work_struct *work)
{
struct enetc_pf *pf = container_of(work, struct enetc_pf, msg_task);
+ u32 mr_mask = ENETC_PSIMR_MASK(pf->num_vfs);
struct enetc_hw *hw = &pf->si->hw;
- unsigned long mr_mask;
+ u32 mr_status;
int i;
- for (;;) {
- mr_mask = enetc_rd(hw, ENETC_PSIMSGRR) & ENETC_PSIMSGRR_MR_MASK;
- if (!mr_mask) {
- /* re-arm MR interrupts, w1c the IDR reg */
- enetc_wr(hw, ENETC_PSIIDR, ENETC_PSIIER_MR_MASK);
- enetc_msg_enable_mr_int(hw);
- return;
- }
+ mr_status = (enetc_rd(hw, ENETC_PSIMSGRR) & mr_mask) |
+ (enetc_rd(hw, ENETC_PSIIDR) & mr_mask);
+ if (!mr_status)
+ goto out;
- for (i = 0; i < pf->num_vfs; i++) {
- u32 psimsgrr;
- u16 msg_code;
+ for (i = 0; i < pf->num_vfs; i++) {
+ u32 psimsgrr;
+ u16 msg_code;
+
+ if (!(ENETC_PSIMR_BIT(i) & mr_status))
+ continue;
- if (!(ENETC_PSIMSGRR_MR(i) & mr_mask))
- continue;
+ enetc_msg_handle_rxmsg(pf, i, &msg_code);
- enetc_msg_handle_rxmsg(pf, i, &msg_code);
+ /* w1c to clear the corresponding VF MR bit */
+ enetc_wr(hw, ENETC_PSIIDR, ENETC_PSIMR_BIT(i));
- psimsgrr = ENETC_SIMSGSR_SET_MC(msg_code);
- psimsgrr |= ENETC_PSIMSGRR_MR(i); /* w1c */
- enetc_wr(hw, ENETC_PSIMSGRR, psimsgrr);
- }
+ psimsgrr = ENETC_SIMSGSR_SET_MC(msg_code);
+ psimsgrr |= ENETC_PSIMR_BIT(i); /* w1c */
+ enetc_wr(hw, ENETC_PSIMSGRR, psimsgrr);
}
+
+out:
+ enetc_msg_enable_mr_int(pf);
}
/* Init */
@@ -96,12 +105,12 @@ static void enetc_msg_free_mbx(struct enetc_si *si, int idx)
struct enetc_hw *hw = &si->hw;
struct enetc_msg_swbd *msg;
+ enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), 0);
+ enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), 0);
+
msg = &pf->rxmsg[idx];
dma_free_coherent(&si->pdev->dev, msg->size, msg->vaddr, msg->dma);
memset(msg, 0, sizeof(*msg));
-
- enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), 0);
- enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), 0);
}
int enetc_msg_psi_init(struct enetc_pf *pf)
@@ -109,6 +118,15 @@ int enetc_msg_psi_init(struct enetc_pf *pf)
struct enetc_si *si = pf->si;
int vector, i, err;
+ for (i = 0; i < pf->num_vfs; i++) {
+ err = enetc_msg_alloc_mbx(si, i);
+ if (err)
+ goto free_mbx;
+ }
+
+ /* initialize PSI mailbox */
+ INIT_WORK(&pf->msg_task, enetc_msg_task);
+
/* register message passing interrupt handler */
snprintf(pf->msg_int_name, sizeof(pf->msg_int_name), "%s-vfmsg",
si->ndev->name);
@@ -117,32 +135,21 @@ int enetc_msg_psi_init(struct enetc_pf *pf)
if (err) {
dev_err(&si->pdev->dev,
"PSI messaging: request_irq() failed!\n");
- return err;
+ goto free_mbx;
}
/* set one IRQ entry for PSI message receive notification (SI int) */
enetc_wr(&si->hw, ENETC_SIMSIVR, ENETC_SI_INT_IDX);
- /* initialize PSI mailbox */
- INIT_WORK(&pf->msg_task, enetc_msg_task);
-
- for (i = 0; i < pf->num_vfs; i++) {
- err = enetc_msg_alloc_mbx(si, i);
- if (err)
- goto err_init_mbx;
- }
-
/* enable MR interrupts */
- enetc_msg_enable_mr_int(&si->hw);
+ enetc_msg_enable_mr_int(pf);
return 0;
-err_init_mbx:
+free_mbx:
for (i--; i >= 0; i--)
enetc_msg_free_mbx(si, i);
- free_irq(vector, si);
-
return err;
}
@@ -151,14 +158,17 @@ void enetc_msg_psi_free(struct enetc_pf *pf)
struct enetc_si *si = pf->si;
int i;
+ /* disable MR interrupts */
+ enetc_msg_disable_mr_int(pf);
+
+ /* de-register message passing interrupt handler */
+ free_irq(pci_irq_vector(si->pdev, ENETC_SI_INT_IDX), si);
+
cancel_work_sync(&pf->msg_task);
- /* disable MR interrupts */
- enetc_msg_disable_mr_int(&si->hw);
+ /* MR interrupts may be re-enabled by workqueue */
+ enetc_msg_disable_mr_int(pf);
for (i = 0; i < pf->num_vfs; i++)
enetc_msg_free_mbx(si, i);
-
- /* de-register message passing interrupt handler */
- free_irq(pci_irq_vector(si->pdev, ENETC_SI_INT_IDX), si);
}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index a12fd54a475f..3206b3daa1a0 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -252,8 +252,12 @@ static int enetc_pf_set_vf_mac(struct net_device *ndev, int vf, u8 *mac)
return -EADDRNOTAVAIL;
vf_state = &pf->vf_state[vf];
+
+ mutex_lock(&vf_state->lock);
vf_state->flags |= ENETC_VF_FLAG_PF_SET_MAC;
enetc_pf_set_primary_mac_addr(&priv->si->hw, vf + 1, mac);
+ mutex_unlock(&vf_state->lock);
+
return 0;
}
@@ -478,49 +482,77 @@ static void enetc_configure_port(struct enetc_pf *pf)
/* Messaging */
static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf,
- int vf_id)
+ int vf_id, void *msg)
{
struct enetc_vf_state *vf_state = &pf->vf_state[vf_id];
- struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id];
- struct enetc_msg_cmd_set_primary_mac *cmd;
+ struct enetc_msg_cmd_set_primary_mac *cmd = msg;
struct device *dev = &pf->si->pdev->dev;
- u16 cmd_id;
+ u16 cmd_id = cmd->header.id;
char *addr;
- cmd = (struct enetc_msg_cmd_set_primary_mac *)msg->vaddr;
- cmd_id = cmd->header.id;
if (cmd_id != ENETC_MSG_CMD_MNG_ADD)
return ENETC_MSG_CMD_STATUS_FAIL;
addr = cmd->mac.sa_data;
- if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC)
- dev_warn(dev, "Attempt to override PF set mac addr for VF%d\n",
- vf_id);
- else
- enetc_pf_set_primary_mac_addr(&pf->si->hw, vf_id + 1, addr);
+ if (!is_valid_ether_addr(addr)) {
+ dev_err_ratelimited(dev, "VF%d attempted to set invalid MAC\n",
+ vf_id);
+ return ENETC_MSG_CMD_STATUS_FAIL;
+ }
+
+ mutex_lock(&vf_state->lock);
+ if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC) {
+ mutex_unlock(&vf_state->lock);
+ dev_err_ratelimited(dev,
+ "VF%d attempted to override PF set MAC\n",
+ vf_id);
+ return ENETC_MSG_CMD_STATUS_FAIL;
+ }
+
+ enetc_pf_set_primary_mac_addr(&pf->si->hw, vf_id + 1, addr);
+ mutex_unlock(&vf_state->lock);
return ENETC_MSG_CMD_STATUS_OK;
}
void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status)
{
- struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id];
+ struct enetc_msg_swbd *msg_swbd = &pf->rxmsg[vf_id];
struct device *dev = &pf->si->pdev->dev;
struct enetc_msg_cmd_header *cmd_hdr;
u16 cmd_type;
+ u8 *msg;
- *status = ENETC_MSG_CMD_STATUS_OK;
- cmd_hdr = (struct enetc_msg_cmd_header *)msg->vaddr;
+ msg = kzalloc_objs(*msg, msg_swbd->size);
+ if (!msg) {
+ dev_err_ratelimited(dev,
+ "Failed to allocate message buffer\n");
+ *status = ENETC_MSG_CMD_STATUS_FAIL;
+ return;
+ }
+
+ /* Currently, only ENETC_MSG_CMD_MNG_MAC command is supported, so
+ * only sizeof(struct enetc_msg_cmd_set_primary_mac) bytes need to
+ * be copied. This data already includes the cmd_type field, so it
+ * can correctly return an error code.
+ */
+ memcpy(msg, msg_swbd->vaddr,
+ sizeof(struct enetc_msg_cmd_set_primary_mac));
+ cmd_hdr = (struct enetc_msg_cmd_header *)msg;
cmd_type = cmd_hdr->type;
switch (cmd_type) {
case ENETC_MSG_CMD_MNG_MAC:
- *status = enetc_msg_pf_set_vf_primary_mac_addr(pf, vf_id);
+ *status = enetc_msg_pf_set_vf_primary_mac_addr(pf, vf_id, msg);
break;
default:
- dev_err(dev, "command not supported (cmd_type: 0x%x)\n",
- cmd_type);
+ *status = ENETC_MSG_CMD_STATUS_FAIL;
+ dev_err_ratelimited(dev,
+ "command not supported (cmd_type: 0x%x)\n",
+ cmd_type);
}
+
+ kfree(msg);
}
#ifdef CONFIG_PCI_IOV
@@ -531,9 +563,9 @@ static int enetc_sriov_configure(struct pci_dev *pdev, int num_vfs)
int err;
if (!num_vfs) {
+ pci_disable_sriov(pdev);
enetc_msg_psi_free(pf);
pf->num_vfs = 0;
- pci_disable_sriov(pdev);
} else {
pf->num_vfs = num_vfs;
@@ -960,8 +992,13 @@ static int enetc_pf_probe(struct pci_dev *pdev,
if (pf->total_vfs) {
pf->vf_state = kzalloc_objs(struct enetc_vf_state,
pf->total_vfs);
- if (!pf->vf_state)
+ if (!pf->vf_state) {
+ err = -ENOMEM;
goto err_alloc_vf_state;
+ }
+
+ for (int i = 0; i < pf->total_vfs; i++)
+ mutex_init(&pf->vf_state[i].lock);
}
err = enetc_setup_mac_addresses(node, pf);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
index ae407e9e9ee7..35d484858c7b 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
@@ -14,6 +14,7 @@ enum enetc_vf_flags {
};
struct enetc_vf_state {
+ struct mutex lock; /* Prevent concurrent access */
enum enetc_vf_flags flags;
};
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c52c465280f7..e2fbe111f849 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3682,7 +3682,7 @@ int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
ICE_MCAST_VLAN_PROMISC_BITS,
vid);
- if (ret)
+ if (ret && ret != -EEXIST)
goto finish;
}
@@ -4104,6 +4104,12 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
}
ice_pf_dcb_recfg(pf, locked);
ice_vsi_open(vsi);
+ /* Rx rings are reallocated during VSI rebuild and lose their ptp_rx
+ * flag. Restore timestamp mode so newly allocated rings are set up
+ * for hardware Rx timestamping.
+ */
+ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+ ice_ptp_restore_timestamp_mode(pf);
goto done;
rebuild_err:
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index 24fb7a3e14d6..2c18e16fe053 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -2141,16 +2141,23 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port)
}
incval = (u64)hi << 32 | lo;
+ if (!ice_ptp_lock(hw)) {
+ dev_err(ice_hw_to_dev(hw), "Failed to acquire PTP semaphore\n");
+ return -EBUSY;
+ }
+
err = ice_write_40b_ptp_reg_eth56g(hw, port, PHY_REG_TIMETUS_L, incval);
if (err)
- return err;
+ goto err_ptp_unlock;
err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL);
if (err)
- return err;
+ goto err_ptp_unlock;
ice_ptp_exec_tmr_cmd(hw);
+ ice_ptp_unlock(hw);
+
err = ice_sync_phy_timer_eth56g(hw, port);
if (err)
return err;
@@ -2166,6 +2173,10 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port)
ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port);
return 0;
+
+err_ptp_unlock:
+ ice_ptp_unlock(hw);
+ return err;
}
/**
@@ -4503,18 +4514,17 @@ static int
ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo)
{
struct ice_e810_params *params = &hw->ptp.phy.e810;
- unsigned long flags;
u32 val;
int err;
- spin_lock_irqsave(&params->atqbal_wq.lock, flags);
+ spin_lock_irq(&params->atqbal_wq.lock);
/* Wait for any pending in-progress low latency interrupt */
err = wait_event_interruptible_locked_irq(params->atqbal_wq,
!(params->atqbal_flags &
ATQBAL_FLAGS_INTR_IN_PROGRESS));
if (err) {
- spin_unlock_irqrestore(&params->atqbal_wq.lock, flags);
+ spin_unlock_irq(&params->atqbal_wq.lock);
return err;
}
@@ -4529,7 +4539,7 @@ ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo)
REG_LL_PROXY_H);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to read PTP timestamp using low latency read\n");
- spin_unlock_irqrestore(&params->atqbal_wq.lock, flags);
+ spin_unlock_irq(&params->atqbal_wq.lock);
return err;
}
@@ -4539,7 +4549,7 @@ ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo)
/* Read the low 32 bit value and set the TS valid bit */
*lo = rd32(hw, REG_LL_PROXY_L) | TS_VALID;
- spin_unlock_irqrestore(&params->atqbal_wq.lock, flags);
+ spin_unlock_irq(&params->atqbal_wq.lock);
return 0;
}
@@ -5254,9 +5264,13 @@ static void ice_ptp_init_phy_e830(struct ice_ptp_hw *ptp)
*/
bool ice_ptp_lock(struct ice_hw *hw)
{
+ struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
u32 hw_lock;
int i;
+ if (!ice_is_primary(hw))
+ hw = ice_get_primary_hw(pf);
+
#define MAX_TRIES 15
for (i = 0; i < MAX_TRIES; i++) {
@@ -5283,6 +5297,11 @@ bool ice_ptp_lock(struct ice_hw *hw)
*/
void ice_ptp_unlock(struct ice_hw *hw)
{
+ struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
+
+ if (!ice_is_primary(hw))
+ hw = ice_get_primary_hw(pf);
+
wr32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), 0);
}
diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c
index f73d5a3e83d4..31be2f76181c 100644
--- a/drivers/net/ethernet/intel/ice/virt/queues.c
+++ b/drivers/net/ethernet/intel/ice/virt/queues.c
@@ -840,7 +840,7 @@ int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
if (qpi->rxq.databuffer_size != 0 &&
(qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
- qpi->rxq.databuffer_size < 1024))
+ qpi->rxq.databuffer_size < 128))
goto error_param;
ring->rx_buf_len = qpi->rxq.databuffer_size;
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 8a110145bfee..52de2bcbadbe 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -34,6 +34,7 @@ static int igc_fpe_init_smd_frame(struct igc_ring *ring,
return -ENOMEM;
}
+ buffer->type = IGC_TX_BUFFER_TYPE_SKB;
buffer->skb = skb;
buffer->protocol = 0;
buffer->bytecount = skb->len;
@@ -109,10 +110,16 @@ static int igc_fpe_xmit_smd_frame(struct igc_adapter *adapter,
__netif_tx_lock(nq, cpu);
err = igc_fpe_init_tx_descriptor(ring, skb, type);
- igc_flush_tx_descriptors(ring);
+ if (err)
+ goto err_free_skb_any;
+ igc_flush_tx_descriptors(ring);
__netif_tx_unlock(nq);
+ return 0;
+err_free_skb_any:
+ __netif_tx_unlock(nq);
+ dev_kfree_skb_any(skb);
return err;
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 42f89a179a3f..4ba3be961ab6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1221,6 +1221,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
ether_addr_equal(rx_ring->netdev->dev_addr,
eth_hdr(skb)->h_source)) {
dev_kfree_skb_irq(skb);
+ skb = NULL;
continue;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 4f33a816bc7a..2e94d5105016 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -1294,13 +1294,18 @@ static inline void link_status_user_format(u64 lstat,
struct cgx_link_user_info *linfo,
struct cgx *cgx, u8 lmac_id)
{
+ unsigned int speed;
+
linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
- linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)];
linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat);
linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat);
linfo->lmac_type_id = FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, lstat);
+ speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat);
+ linfo->speed = speed < ARRAY_SIZE(cgx_speed_mbps) ?
+ cgx_speed_mbps[speed] : 0;
+
if (linfo->lmac_type_id >= LMAC_MODE_MAX) {
dev_err(&cgx->pdev->dev, "Unknown lmac_type_id %d reported by firmware on cgx port%d:%d",
linfo->lmac_type_id, cgx->cgx_id, lmac_id);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 3c814d157ab9..607d0cf1a778 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -990,7 +990,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
u16 vf_func;
/* Only CGX PF/VF can add allmulticast entry */
- if (is_lbk_vf(rvu, pcifunc) && is_sdp_vf(rvu, pcifunc))
+ if (is_lbk_vf(rvu, pcifunc) || is_sdp_vf(rvu, pcifunc))
return;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c
index a5a8f4558717..dbf173196608 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c
@@ -619,11 +619,13 @@ static int cn20k_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id,
err = otx2_sync_mbox_msg(&pfvf->mbox);
if (err) {
qmem_free(pfvf->dev, pool->stack);
+ pool->stack = NULL;
return err;
}
aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox);
if (!aq) {
qmem_free(pfvf->dev, pool->stack);
+ pool->stack = NULL;
return -ENOMEM;
}
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 971fcab1c248..3d253132a17f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1482,11 +1482,13 @@ int otx2_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id,
err = otx2_sync_mbox_msg(&pfvf->mbox);
if (err) {
qmem_free(pfvf->dev, pool->stack);
+ pool->stack = NULL;
return err;
}
aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
if (!aq) {
qmem_free(pfvf->dev, pool->stack);
+ pool->stack = NULL;
return -ENOMEM;
}
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c
index 94f155ffb17f..0f5d5642d3f7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c
@@ -609,7 +609,7 @@ static int rvu_rep_rsrc_init(struct otx2_nic *priv)
err = otx2_init_hw_resources(priv);
if (err)
- goto err_free_rsrc;
+ goto err_free_mem;
/* Set maximum frame size allowed in HW */
err = otx2_hw_set_mtu(priv, priv->hw.max_mtu);
@@ -621,6 +621,7 @@ static int rvu_rep_rsrc_init(struct otx2_nic *priv)
err_free_rsrc:
otx2_free_hw_resources(priv);
+err_free_mem:
otx2_free_queue_mem(qset);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index afdeb1b3d425..8409ae73768f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -160,13 +160,13 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
* channels are being closed for other reason and this work is not
* relevant anymore.
*/
- while (!netdev_trylock(sq->netdev)) {
+ while (!netdev_trylock(priv->netdev)) {
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
return 0;
msleep(20);
}
- err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
+ err = mlx5e_health_channel_eq_recover(priv->netdev, eq, sq->cq.ch_stats);
if (!err) {
to_ctx->status = 0; /* this sq recovered */
goto out;
@@ -186,7 +186,7 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
"mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
err);
out:
- netdev_unlock(sq->netdev);
+ netdev_unlock(priv->netdev);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index a52e12c3c95a..db260e3d1412 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -792,8 +792,10 @@ static int mlx5e_xfrm_add_state(struct net_device *dev,
sa_entry->dev = dev;
sa_entry->ipsec = ipsec;
/* Check if this SA is originated from acquire flow temporary SA */
- if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
- goto out;
+ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) {
+ x->xso.offload_handle = (unsigned long)sa_entry;
+ return 0;
+ }
err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
if (err)
@@ -870,7 +872,6 @@ static int mlx5e_xfrm_add_state(struct net_device *dev,
xa_unlock_bh(&ipsec->sadb);
}
-out:
x->xso.offload_handle = (unsigned long)sa_entry;
if (allow_tunnel_mode)
mlx5_eswitch_unblock_encap(priv->mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index bb61e2179078..99a0034b9b20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1574,8 +1574,11 @@ static int mlx5e_create_rxfh_context(struct net_device *dev,
rxfh->indir, rxfh->key,
hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc,
rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric);
- if (err)
+ if (err) {
+ WARN_ON(mlx5e_rx_res_rss_destroy(priv->rx_res,
+ rxfh->rss_context));
goto unlock;
+ }
mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rxfh->rss_context,
ethtool_rxfh_context_indir(ctx),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index b31f689fe271..e90c6c6df835 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -252,7 +252,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&c->xdpsq->cq);
if (unlikely(aff_change && busy_xsk)) {
- mlx5e_trigger_irq(&c->icosq);
+ mlx5e_trigger_napi_async_icosq(c);
ch_stats->force_irq++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
index 3cfe743610d3..ab50d2c734ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
@@ -142,7 +142,8 @@ static int mlx5_esw_ipsec_modify_flow_dests(struct mlx5_eswitch *esw,
attr = flow->attr;
esw_attr = attr->esw_attr;
- if (esw_attr->out_count - esw_attr->split_count > 1)
+ if (!esw_attr->out_count ||
+ esw_attr->out_count - esw_attr->split_count > 1)
return 0;
err = mlx5_eswitch_restore_ipsec_rule(esw, flow->rule[0], esw_attr,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 123c96716a54..7c8311f41232 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -908,6 +908,24 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
esw_vport_cleanup_acl(esw, vport);
}
+static void mlx5_esw_vport_set_max_tx_speed(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int ret;
+
+ if (!MLX5_CAP_ESW(esw->dev, esw_vport_state_max_tx_speed))
+ return;
+
+ ret = mlx5_modify_vport_max_tx_speed(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport->vport, true,
+ vport->agg_max_tx_speed);
+ if (ret)
+ mlx5_core_dbg(esw->dev,
+ "Failed to set vport %d speed %d, err=%d\n",
+ vport->vport, vport->agg_max_tx_speed, ret);
+}
+
int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
enum mlx5_eswitch_vport_event enabled_events)
{
@@ -948,6 +966,9 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
esw->enabled_vports++;
esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
+
+ if (vport->agg_max_tx_speed)
+ mlx5_esw_vport_set_max_tx_speed(esw, vport);
done:
mutex_unlock(&esw->state_lock);
return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5128f5020dae..e9cf7c592ce9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -247,6 +247,7 @@ struct mlx5_vport {
enum mlx5_eswitch_vport_event enabled_events;
int index;
struct mlx5_devlink_port *dl_port;
+ u32 agg_max_tx_speed;
};
struct mlx5_esw_indir_table;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 449e4bd86c06..f8e70ac5a85b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1274,6 +1274,11 @@ static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
if (vport->vport == MLX5_VPORT_UPLINK)
continue;
+ vport->agg_max_tx_speed = speed;
+
+ if (!vport->enabled)
+ continue;
+
ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
vport->vport, true, speed);
if (ret)
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index dbbde0fa57e7..e3c24d50dad0 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -77,21 +77,19 @@ static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
}
static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
- struct hwc_work_request *rx_req)
+ struct hwc_work_request *rx_req, u16 msg_id)
{
const struct gdma_resp_hdr *resp_msg = rx_req->buf_va;
struct hwc_caller_ctx *ctx;
int err;
- if (!test_bit(resp_msg->response.hwc_msg_id,
- hwc->inflight_msg_res.map)) {
- dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n",
- resp_msg->response.hwc_msg_id);
+ if (!test_bit(msg_id, hwc->inflight_msg_res.map)) {
+ dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", msg_id);
mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
return;
}
- ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id;
+ ctx = hwc->caller_ctx + msg_id;
err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len);
if (err)
goto out;
@@ -251,6 +249,7 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id,
struct gdma_sge *sge;
u64 rq_base_addr;
u64 rx_req_idx;
+ u16 msg_id;
u8 *wqe;
if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id))
@@ -266,16 +265,26 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id,
rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle;
rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size;
+ if (rx_req_idx >= hwc_rxq->msg_buf->num_reqs) {
+ dev_err(hwc->dev, "HWC RX: wrong rx_req_idx=%llu, num_reqs=%u\n",
+ rx_req_idx, hwc_rxq->msg_buf->num_reqs);
+ return;
+ }
+
rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx];
resp = (struct gdma_resp_hdr *)rx_req->buf_va;
- if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) {
- dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n",
- resp->response.hwc_msg_id);
+ /* Read msg_id once from DMA buffer to prevent TOCTOU:
+ * DMA memory is shared/unencrypted in CVMs - host can
+ * modify it between reads.
+ */
+ msg_id = READ_ONCE(resp->response.hwc_msg_id);
+ if (msg_id >= hwc->num_inflight_msg) {
+ dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", msg_id);
return;
}
- mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req);
+ mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req, msg_id);
/* Can no longer use 'resp', because the buffer is posted to the HW
* in mana_hwc_handle_resp() above.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 9861daa82d9e..b70262e70baf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -1036,11 +1036,13 @@ static void qed_cid_map_free(struct qed_hwfn *p_hwfn)
for (type = 0; type < MAX_CONN_TYPES; type++) {
bitmap_free(p_mngr->acquired[type].cid_map);
+ p_mngr->acquired[type].cid_map = NULL;
p_mngr->acquired[type].max_count = 0;
p_mngr->acquired[type].start_cid = 0;
for (vf = 0; vf < MAX_NUM_VFS; vf++) {
bitmap_free(p_mngr->acquired_vf[type][vf].cid_map);
+ p_mngr->acquired_vf[type][vf].cid_map = NULL;
p_mngr->acquired_vf[type][vf].max_count = 0;
p_mngr->acquired_vf[type][vf].start_cid = 0;
}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 269c0449760c..78d4df55740a 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -213,8 +213,8 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
ep = rmnet_get_endpoint(real_port, mux_id);
if (ep) {
hlist_del_init_rcu(&ep->hlnode);
- rmnet_vnd_dellink(mux_id, real_port, ep);
- kfree(ep);
+ real_port->nr_rmnet_devs--;
+ kfree_rcu(ep, rcu);
}
netdev_upper_dev_unlink(real_dev, dev);
@@ -238,9 +238,9 @@ static void rmnet_force_unassociate_device(struct net_device *real_dev)
hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
unregister_netdevice_queue(ep->egress_dev, &list);
netdev_upper_dev_unlink(real_dev, ep->egress_dev);
- rmnet_vnd_dellink(ep->mux_id, port, ep);
hlist_del_init_rcu(&ep->hlnode);
- kfree(ep);
+ port->nr_rmnet_devs--;
+ kfree_rcu(ep, rcu);
}
rmnet_unregister_real_device(real_dev);
unregister_netdevice_many(&list);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index ed112d51ac5a..f50fae1c6bdd 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -18,6 +18,7 @@ struct rmnet_endpoint {
u8 mux_id;
struct net_device *egress_dev;
struct hlist_node hlnode;
+ struct rcu_head rcu;
};
struct rmnet_egress_agg_params {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c
index bcb8e000e720..4ac979d874d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c
@@ -28,13 +28,16 @@
/*
* TX/RX Clock Delay Bit Masks:
- * - TX Delay: bits [14:8] — TX_CLK delay (unit: 0.1ns per bit)
- * - RX Delay: bits [30:24] — RX_CLK delay (unit: 0.1ns per bit)
+ * - TX Delay: bits [14:8] — TX_CLK delay (unit: 0.02ns per bit)
+ * - RX Delay: bits [30:24] — RX_CLK delay (unit: 0.02ns per bit)
*/
#define EIC7700_ETH_TX_ADJ_DELAY GENMASK(14, 8)
#define EIC7700_ETH_RX_ADJ_DELAY GENMASK(30, 24)
-#define EIC7700_MAX_DELAY_UNIT 0x7F
+#define EIC7700_MAX_DELAY_STEPS 0x7F
+#define EIC7700_DELAY_STEP_PS 20
+#define EIC7700_MAX_DELAY_PS \
+ (EIC7700_MAX_DELAY_STEPS * EIC7700_DELAY_STEP_PS)
static const char * const eic7700_clk_names[] = {
"tx", "axi", "cfg",
@@ -42,6 +45,15 @@ static const char * const eic7700_clk_names[] = {
struct eic7700_qos_priv {
struct plat_stmmacenet_data *plat_dat;
+ struct regmap *eic7700_hsp_regmap;
+ u32 eth_axi_lp_ctrl_offset;
+ u32 eth_phy_ctrl_offset;
+ u32 eth_clk_offset;
+ u32 eth_txd_offset;
+ u32 eth_rxd_offset;
+ u32 eth_clk_dly_param;
+ bool has_txd_offset;
+ bool has_rxd_offset;
};
static int eic7700_clks_config(void *priv, bool enabled)
@@ -61,8 +73,34 @@ static int eic7700_clks_config(void *priv, bool enabled)
static int eic7700_dwmac_init(struct device *dev, void *priv)
{
struct eic7700_qos_priv *dwc = priv;
+ int ret;
+
+ ret = eic7700_clks_config(dwc, true);
+ if (ret)
+ return ret;
+
+ ret = regmap_set_bits(dwc->eic7700_hsp_regmap,
+ dwc->eth_phy_ctrl_offset,
+ EIC7700_ETH_TX_CLK_SEL |
+ EIC7700_ETH_PHY_INTF_SELI);
+ if (ret) {
+ eic7700_clks_config(dwc, false);
+ return ret;
+ }
- return eic7700_clks_config(dwc, true);
+ regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_axi_lp_ctrl_offset,
+ EIC7700_ETH_CSYSREQ_VAL);
+
+ if (dwc->has_txd_offset)
+ regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_txd_offset, 0);
+
+ if (dwc->has_rxd_offset)
+ regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_rxd_offset, 0);
+
+ regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_clk_offset,
+ dwc->eth_clk_dly_param);
+
+ return 0;
}
static void eic7700_dwmac_exit(struct device *dev, void *priv)
@@ -93,13 +131,7 @@ static int eic7700_dwmac_probe(struct platform_device *pdev)
struct plat_stmmacenet_data *plat_dat;
struct stmmac_resources stmmac_res;
struct eic7700_qos_priv *dwc_priv;
- struct regmap *eic7700_hsp_regmap;
- u32 eth_axi_lp_ctrl_offset;
- u32 eth_phy_ctrl_offset;
- u32 eth_phy_ctrl_regset;
- u32 eth_rxd_dly_offset;
- u32 eth_dly_param = 0;
- u32 delay_ps;
+ u32 delay_ps, val;
int i, ret;
ret = stmmac_get_platform_resources(pdev, &stmmac_res);
@@ -119,10 +151,20 @@ static int eic7700_dwmac_probe(struct platform_device *pdev)
/* Read rx-internal-delay-ps and update rx_clk delay */
if (!of_property_read_u32(pdev->dev.of_node,
"rx-internal-delay-ps", &delay_ps)) {
- u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT);
+ if (delay_ps % EIC7700_DELAY_STEP_PS)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "rx delay must be multiple of %dps\n",
+ EIC7700_DELAY_STEP_PS);
+
+ if (delay_ps > EIC7700_MAX_DELAY_PS)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "rx delay out of range\n");
- eth_dly_param &= ~EIC7700_ETH_RX_ADJ_DELAY;
- eth_dly_param |= FIELD_PREP(EIC7700_ETH_RX_ADJ_DELAY, val);
+ val = delay_ps / EIC7700_DELAY_STEP_PS;
+
+ dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_RX_ADJ_DELAY;
+ dwc_priv->eth_clk_dly_param |=
+ FIELD_PREP(EIC7700_ETH_RX_ADJ_DELAY, val);
} else {
return dev_err_probe(&pdev->dev, -EINVAL,
"missing required property rx-internal-delay-ps\n");
@@ -131,55 +173,65 @@ static int eic7700_dwmac_probe(struct platform_device *pdev)
/* Read tx-internal-delay-ps and update tx_clk delay */
if (!of_property_read_u32(pdev->dev.of_node,
"tx-internal-delay-ps", &delay_ps)) {
- u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT);
+ if (delay_ps % EIC7700_DELAY_STEP_PS)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "tx delay must be multiple of %dps\n",
+ EIC7700_DELAY_STEP_PS);
+
+ if (delay_ps > EIC7700_MAX_DELAY_PS)
+ return dev_err_probe(&pdev->dev, -EINVAL,
+ "tx delay out of range\n");
- eth_dly_param &= ~EIC7700_ETH_TX_ADJ_DELAY;
- eth_dly_param |= FIELD_PREP(EIC7700_ETH_TX_ADJ_DELAY, val);
+ val = delay_ps / EIC7700_DELAY_STEP_PS;
+
+ dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_TX_ADJ_DELAY;
+ dwc_priv->eth_clk_dly_param |=
+ FIELD_PREP(EIC7700_ETH_TX_ADJ_DELAY, val);
} else {
return dev_err_probe(&pdev->dev, -EINVAL,
"missing required property tx-internal-delay-ps\n");
}
- eic7700_hsp_regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
- "eswin,hsp-sp-csr");
- if (IS_ERR(eic7700_hsp_regmap))
+ dwc_priv->eic7700_hsp_regmap =
+ syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "eswin,hsp-sp-csr");
+ if (IS_ERR(dwc_priv->eic7700_hsp_regmap))
return dev_err_probe(&pdev->dev,
- PTR_ERR(eic7700_hsp_regmap),
+ PTR_ERR(dwc_priv->eic7700_hsp_regmap),
"Failed to get hsp-sp-csr regmap\n");
ret = of_property_read_u32_index(pdev->dev.of_node,
"eswin,hsp-sp-csr",
- 1, &eth_phy_ctrl_offset);
+ 1, &dwc_priv->eth_phy_ctrl_offset);
if (ret)
return dev_err_probe(&pdev->dev, ret,
"can't get eth_phy_ctrl_offset\n");
- regmap_read(eic7700_hsp_regmap, eth_phy_ctrl_offset,
- &eth_phy_ctrl_regset);
- eth_phy_ctrl_regset |=
- (EIC7700_ETH_TX_CLK_SEL | EIC7700_ETH_PHY_INTF_SELI);
- regmap_write(eic7700_hsp_regmap, eth_phy_ctrl_offset,
- eth_phy_ctrl_regset);
-
ret = of_property_read_u32_index(pdev->dev.of_node,
"eswin,hsp-sp-csr",
- 2, &eth_axi_lp_ctrl_offset);
+ 2, &dwc_priv->eth_axi_lp_ctrl_offset);
if (ret)
return dev_err_probe(&pdev->dev, ret,
"can't get eth_axi_lp_ctrl_offset\n");
- regmap_write(eic7700_hsp_regmap, eth_axi_lp_ctrl_offset,
- EIC7700_ETH_CSYSREQ_VAL);
-
ret = of_property_read_u32_index(pdev->dev.of_node,
"eswin,hsp-sp-csr",
- 3, &eth_rxd_dly_offset);
+ 3, &dwc_priv->eth_clk_offset);
if (ret)
return dev_err_probe(&pdev->dev, ret,
- "can't get eth_rxd_dly_offset\n");
+ "can't get eth_clk_offset\n");
+
+ ret = of_property_read_u32_index(pdev->dev.of_node,
+ "eswin,hsp-sp-csr",
+ 4, &dwc_priv->eth_txd_offset);
+ if (!ret)
+ dwc_priv->has_txd_offset = true;
- regmap_write(eic7700_hsp_regmap, eth_rxd_dly_offset,
- eth_dly_param);
+ ret = of_property_read_u32_index(pdev->dev.of_node,
+ "eswin,hsp-sp-csr",
+ 5, &dwc_priv->eth_rxd_offset);
+ if (!ret)
+ dwc_priv->has_rxd_offset = true;
plat_dat->num_clks = ARRAY_SIZE(eic7700_clk_names);
plat_dat->clks = devm_kcalloc(&pdev->dev,
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 5407d2ed71b3..43aa1bfd41cf 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -211,12 +211,12 @@ static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
switch (stringset) {
case ETH_SS_STATS:
- for (i = 0; i < dev->real_num_rx_queues; i++)
+ for (i = 0; i < dev->num_tx_queues; i++)
for (j = 0; j < IFB_Q_STATS_LEN; j++)
ethtool_sprintf(&p, "rx_queue_%u_%.18s",
i, ifb_q_stats_desc[j].desc);
- for (i = 0; i < dev->real_num_tx_queues; i++)
+ for (i = 0; i < dev->num_tx_queues; i++)
for (j = 0; j < IFB_Q_STATS_LEN; j++)
ethtool_sprintf(&p, "tx_queue_%u_%.18s",
i, ifb_q_stats_desc[j].desc);
@@ -229,8 +229,7 @@ static int ifb_get_sset_count(struct net_device *dev, int sset)
{
switch (sset) {
case ETH_SS_STATS:
- return IFB_Q_STATS_LEN * (dev->real_num_rx_queues +
- dev->real_num_tx_queues);
+ return IFB_Q_STATS_LEN * dev->num_tx_queues * 2;
default:
return -EOPNOTSUPP;
}
@@ -262,12 +261,12 @@ static void ifb_get_ethtool_stats(struct net_device *dev,
struct ifb_q_private *txp;
int i;
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = 0; i < dev->num_tx_queues; i++) {
txp = dp->tx_private + i;
ifb_fill_stats_data(&data, &txp->rx_stats);
}
- for (i = 0; i < dev->real_num_tx_queues; i++) {
+ for (i = 0; i < dev->num_tx_queues; i++) {
txp = dp->tx_private + i;
ifb_fill_stats_data(&data, &txp->tx_stats);
}
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index 22c555dd962e..a6b777a9c2d9 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -201,7 +201,7 @@ void ovpn_decrypt_post(void *data, int ret)
skb = NULL;
drop:
if (unlikely(skb))
- dev_dstats_rx_dropped(peer->ovpn->dev);
+ ovpn_dev_dstats_rx_dropped(peer->ovpn->dev);
kfree_skb(skb);
drop_nocount:
if (likely(peer))
@@ -225,7 +225,7 @@ void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb)
net_info_ratelimited("%s: no available key for peer %u, key-id: %u\n",
netdev_name(peer->ovpn->dev), peer->id,
key_id);
- dev_dstats_rx_dropped(peer->ovpn->dev);
+ ovpn_dev_dstats_rx_dropped(peer->ovpn->dev);
kfree_skb(skb);
ovpn_peer_put(peer);
return;
@@ -301,7 +301,7 @@ err_unlock:
rcu_read_unlock();
err:
if (unlikely(skb))
- dev_dstats_tx_dropped(peer->ovpn->dev);
+ ovpn_dev_dstats_tx_dropped(peer->ovpn->dev);
if (likely(peer))
ovpn_peer_put(peer);
if (likely(ks))
@@ -343,7 +343,7 @@ static void ovpn_send(struct ovpn_priv *ovpn, struct sk_buff *skb,
*/
skb_list_walk_safe(skb, curr, next) {
if (unlikely(!ovpn_encrypt_one(peer, curr))) {
- dev_dstats_tx_dropped(ovpn->dev);
+ ovpn_dev_dstats_tx_dropped(ovpn->dev);
kfree_skb(curr);
}
}
@@ -414,7 +414,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(!curr)) {
net_err_ratelimited("%s: skb_share_check failed for payload packet\n",
netdev_name(dev));
- dev_dstats_tx_dropped(ovpn->dev);
+ ovpn_dev_dstats_tx_dropped(ovpn->dev);
continue;
}
@@ -440,7 +440,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
drop:
ovpn_peer_put(peer);
drop_no_peer:
- dev_dstats_tx_dropped(ovpn->dev);
+ ovpn_dev_dstats_tx_dropped(ovpn->dev);
skb_tx_error(skb);
kfree_skb_list(skb);
return NETDEV_TX_OK;
diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c
index 2e0420febda0..9993c1dfe471 100644
--- a/drivers/net/ovpn/main.c
+++ b/drivers/net/ovpn/main.c
@@ -92,6 +92,8 @@ static void ovpn_net_uninit(struct net_device *dev)
{
struct ovpn_priv *ovpn = netdev_priv(dev);
+ disable_delayed_work_sync(&ovpn->keepalive_work);
+ ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN);
gro_cells_destroy(&ovpn->gro_cells);
}
@@ -208,15 +210,6 @@ static int ovpn_newlink(struct net_device *dev,
return register_netdevice(dev);
}
-static void ovpn_dellink(struct net_device *dev, struct list_head *head)
-{
- struct ovpn_priv *ovpn = netdev_priv(dev);
-
- cancel_delayed_work_sync(&ovpn->keepalive_work);
- ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN);
- unregister_netdevice_queue(dev, head);
-}
-
static int ovpn_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ovpn_priv *ovpn = netdev_priv(dev);
@@ -235,7 +228,6 @@ static struct rtnl_link_ops ovpn_link_ops = {
.policy = ovpn_policy,
.maxtype = IFLA_OVPN_MAX,
.newlink = ovpn_newlink,
- .dellink = ovpn_dellink,
.fill_info = ovpn_fill_info,
};
diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
index 291e2e5bb450..4c66c1ec497e 100644
--- a/drivers/net/ovpn/netlink.c
+++ b/drivers/net/ovpn/netlink.c
@@ -462,10 +462,12 @@ int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info)
sock_release:
ovpn_socket_release(peer);
peer_release:
- /* release right away because peer was not yet hashed, thus it is not
- * used in any context
+ /* For UDP, the peer is unreachable until added to the hashtables, so
+ * dropping the initial reference is enough. For TCP, the peer may be
+ * concurrently reachable via sk_user_data->peer until
+ * ovpn_socket_release() detaches; rely on the refcount.
*/
- ovpn_peer_release(peer);
+ ovpn_peer_put(peer);
return ret;
}
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index c02dfab51a6e..a09d61296425 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -354,7 +354,7 @@ static void ovpn_peer_release_rcu(struct rcu_head *head)
* ovpn_peer_release - release peer private members
* @peer: the peer to release
*/
-void ovpn_peer_release(struct ovpn_peer *peer)
+static void ovpn_peer_release(struct ovpn_peer *peer)
{
ovpn_crypto_state_release(&peer->crypto);
spin_lock_bh(&peer->lock);
@@ -1034,14 +1034,29 @@ static int ovpn_peer_add_p2p(struct ovpn_priv *ovpn, struct ovpn_peer *peer)
*/
int ovpn_peer_add(struct ovpn_priv *ovpn, struct ovpn_peer *peer)
{
+ int ret = -ENODEV;
+
+ /* Prevent adding new peers while destroying the ovpn interface.
+ * Failing to do so would end up holding the device reference
+ * endlessly hostage of the new peer object with no chance of
+ * release..
+ */
+ netdev_lock(ovpn->dev);
+ if (ovpn->dev->reg_state != NETREG_REGISTERED)
+ goto out;
+
switch (ovpn->mode) {
case OVPN_MODE_MP:
- return ovpn_peer_add_mp(ovpn, peer);
+ ret = ovpn_peer_add_mp(ovpn, peer);
+ break;
case OVPN_MODE_P2P:
- return ovpn_peer_add_p2p(ovpn, peer);
+ ret = ovpn_peer_add_p2p(ovpn, peer);
+ break;
}
+out:
+ netdev_unlock(ovpn->dev);
- return -EOPNOTSUPP;
+ return ret;
}
/**
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
index 328401570cba..86c8cffada6d 100644
--- a/drivers/net/ovpn/peer.h
+++ b/drivers/net/ovpn/peer.h
@@ -127,7 +127,6 @@ static inline bool ovpn_peer_hold(struct ovpn_peer *peer)
return kref_get_unless_zero(&peer->refcount);
}
-void ovpn_peer_release(struct ovpn_peer *peer);
void ovpn_peer_release_kref(struct kref *kref);
/**
diff --git a/drivers/net/ovpn/stats.h b/drivers/net/ovpn/stats.h
index 53433d8b6c33..3a45b97c0056 100644
--- a/drivers/net/ovpn/stats.h
+++ b/drivers/net/ovpn/stats.h
@@ -11,6 +11,8 @@
#ifndef _NET_OVPN_OVPNSTATS_H_
#define _NET_OVPN_OVPNSTATS_H_
+#include <linux/netdevice.h>
+
/* one stat */
struct ovpn_peer_stat {
atomic64_t bytes;
@@ -44,4 +46,18 @@ static inline void ovpn_peer_stats_increment_tx(struct ovpn_peer_stats *stats,
ovpn_peer_stats_increment(&stats->tx, n);
}
+static inline void ovpn_dev_dstats_tx_dropped(struct net_device *dev)
+{
+ local_bh_disable();
+ dev_dstats_tx_dropped(dev);
+ local_bh_enable();
+}
+
+static inline void ovpn_dev_dstats_rx_dropped(struct net_device *dev)
+{
+ local_bh_disable();
+ dev_dstats_rx_dropped(dev);
+ local_bh_enable();
+}
+
#endif /* _NET_OVPN_OVPNSTATS_H_ */
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
index 65054cc84be5..433bd07a4f1b 100644
--- a/drivers/net/ovpn/tcp.c
+++ b/drivers/net/ovpn/tcp.c
@@ -152,7 +152,7 @@ err:
if (WARN_ON(!ovpn_peer_hold(peer)))
goto err_nopeer;
schedule_work(&peer->tcp.defer_del_work);
- dev_dstats_rx_dropped(peer->ovpn->dev);
+ ovpn_dev_dstats_rx_dropped(peer->ovpn->dev);
err_nopeer:
kfree_skb(skb);
}
@@ -298,9 +298,9 @@ static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk)
} while (peer->tcp.out_msg.len > 0);
if (!peer->tcp.out_msg.len) {
- preempt_disable();
+ local_bh_disable();
dev_dstats_tx_add(peer->ovpn->dev, skb->len);
- preempt_enable();
+ local_bh_enable();
}
kfree_skb(peer->tcp.out_msg.skb);
@@ -331,7 +331,7 @@ static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
ovpn_tcp_send_sock(peer, sk);
if (peer->tcp.out_msg.skb) {
- dev_dstats_tx_dropped(peer->ovpn->dev);
+ ovpn_dev_dstats_tx_dropped(peer->ovpn->dev);
kfree_skb(skb);
return;
}
@@ -353,7 +353,7 @@ void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk,
if (sock_owned_by_user(sk)) {
if (skb_queue_len(&peer->tcp.out_queue) >=
READ_ONCE(net_hotdata.max_backlog)) {
- dev_dstats_tx_dropped(peer->ovpn->dev);
+ ovpn_dev_dstats_tx_dropped(peer->ovpn->dev);
kfree_skb(skb);
goto unlock;
}
@@ -581,14 +581,19 @@ static void ovpn_tcp_close(struct sock *sk, long timeout)
rcu_read_lock();
sock = rcu_dereference_sk_user_data(sk);
- if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) {
+ if (!sock) {
rcu_read_unlock();
return;
}
+
peer = sock->peer;
+ if (!peer || !ovpn_peer_hold(peer)) {
+ rcu_read_unlock();
+ return;
+ }
rcu_read_unlock();
- ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT);
+ ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT);
peer->tcp.sk_cb.prot->close(sk, timeout);
ovpn_peer_put(peer);
}
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
index 059e896b4a2f..8811aa9eedeb 100644
--- a/drivers/net/ovpn/udp.c
+++ b/drivers/net/ovpn/udp.c
@@ -125,7 +125,7 @@ static int ovpn_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
return 0;
drop:
- dev_dstats_rx_dropped(ovpn->dev);
+ ovpn_dev_dstats_rx_dropped(ovpn->dev);
drop_noovpn:
kfree_skb(skb);
return 0;
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index d48aa7231b37..126951741428 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -940,6 +940,14 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_eee_abilities);
*/
int genphy_c45_an_config_eee_aneg(struct phy_device *phydev)
{
+ /* Writing MMD AN advertisements while autoneg is disabled has no
+ * effect on link-partner negotiation, but on some PHYs (e.g. the
+ * Broadcom BCM54213PE) the write itself disturbs the receive
+ * datapath. Skip it.
+ */
+ if (phydev->autoneg == AUTONEG_DISABLE)
+ return 0;
+
if (!phydev->eee_cfg.eee_enabled) {
__ETHTOOL_DECLARE_LINK_MODE_MASK(adv) = {};
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index c2cdf1ae3542..3370eb822017 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2877,7 +2877,8 @@ EXPORT_SYMBOL(phy_advertise_supported);
*/
void phy_advertise_eee_all(struct phy_device *phydev)
{
- linkmode_copy(phydev->advertising_eee, phydev->supported_eee);
+ linkmode_andnot(phydev->advertising_eee, phydev->supported_eee,
+ phydev->eee_disabled_modes);
}
EXPORT_SYMBOL_GPL(phy_advertise_eee_all);
@@ -2903,7 +2904,8 @@ EXPORT_SYMBOL_GPL(phy_advertise_eee_all);
*/
void phy_support_eee(struct phy_device *phydev)
{
- linkmode_copy(phydev->advertising_eee, phydev->supported_eee);
+ linkmode_andnot(phydev->advertising_eee, phydev->supported_eee,
+ phydev->eee_disabled_modes);
phydev->eee_cfg.tx_lpi_enabled = true;
phydev->eee_cfg.eee_enabled = true;
diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c
index 87aa4f4e9724..69dbdbde9d71 100644
--- a/drivers/net/pse-pd/pse_core.c
+++ b/drivers/net/pse-pd/pse_core.c
@@ -210,7 +210,7 @@ static int of_load_pse_pis(struct pse_controller_dev *pcdev)
ret = of_load_pse_pi_pairsets(node, &pi, ret);
if (ret)
goto out;
- } else if (ret != ENOENT) {
+ } else if (ret != -ENOENT) {
dev_err(pcdev->dev,
"error: wrong number of pairsets. Should be 1 or 2, got %d (%pOF)\n",
ret, node);
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index b8240737dc51..a590e07ce0a9 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -919,11 +919,11 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
struct tap_queue *q = file->private_data;
struct tap_dev *tap;
void __user *argp = (void __user *)arg;
+ struct sockaddr_storage ss = {};
struct ifreq __user *ifr = argp;
unsigned int __user *up = argp;
unsigned short u;
int __user *sp = argp;
- struct sockaddr_storage ss;
int s;
int ret;
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 0bdb38edd915..e57588c19c80 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -3,7 +3,6 @@
* Copyright (c) 2005-2011 Atheros Communications Inc.
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
*/
@@ -1947,15 +1946,15 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id)
ret = -ESHUTDOWN;
ath10k_dbg(ar, ATH10K_DBG_WMI,
"drop wmi command %d, hardware is wedged\n", cmd_id);
- }
- /* try to send pending beacons first. they take priority */
- ath10k_wmi_tx_beacons_nowait(ar);
+ } else {
+ /* try to send pending beacons first. they take priority */
+ ath10k_wmi_tx_beacons_nowait(ar);
- ret = ath10k_wmi_cmd_send_nowait(ar, skb, cmd_id);
-
- if (ret && test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
- ret = -ESHUTDOWN;
+ ret = ath10k_wmi_cmd_send_nowait(ar, skb, cmd_id);
+ if (ret && test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
+ ret = -ESHUTDOWN;
+ }
(ret != -EAGAIN);
}), 3 * HZ);
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index fe79109adc70..2a413e3a07a7 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -1761,6 +1761,7 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar,
int buf_first_hdr_len, buf_first_len;
struct hal_rx_desc *ldesc;
int space_extra, rem_len, buf_len;
+ bool is_continuation;
u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
/* As the msdu is spread across multiple rx buffers,
@@ -1810,7 +1811,8 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar,
rem_len = msdu_len - buf_first_len;
while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) {
rxcb = ATH11K_SKB_RXCB(skb);
- if (rxcb->is_continuation)
+ is_continuation = rxcb->is_continuation;
+ if (is_continuation)
buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz;
else
buf_len = rem_len;
@@ -1828,7 +1830,7 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar,
dev_kfree_skb_any(skb);
rem_len -= buf_len;
- if (!rxcb->is_continuation)
+ if (!is_continuation)
break;
}
@@ -2214,8 +2216,7 @@ ath11k_dp_rx_h_find_peer(struct ath11k_base *ab, struct sk_buff *msdu)
lockdep_assert_held(&ab->base_lock);
- if (rxcb->peer_id)
- peer = ath11k_peer_find_by_id(ab, rxcb->peer_id);
+ peer = ath11k_peer_find_by_id(ab, rxcb->peer_id);
if (peer)
return peer;
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index e821e5a62c1c..98bd9e3f0aae 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -1387,14 +1387,22 @@ EXPORT_SYMBOL(ath11k_hal_srng_deinit);
void ath11k_hal_srng_clear(struct ath11k_base *ab)
{
- /* No need to memset rdp and wrp memory since each individual
- * segment would get cleared in ath11k_hal_srng_src_hw_init()
- * and ath11k_hal_srng_dst_hw_init().
+ /*
+ * Preserve the shared pointer buffers, but clear the previous
+ * firmware instance's hp/tp state before handing them back to FW.
+ * LMAC rings reuse this shared memory without going through the
+ * normal SRNG hw-init path that zeros non-LMAC ring pointers.
*/
memset(ab->hal.srng_list, 0,
sizeof(ab->hal.srng_list));
memset(ab->hal.shadow_reg_addr, 0,
sizeof(ab->hal.shadow_reg_addr));
+ if (ab->hal.rdp.vaddr)
+ memset(ab->hal.rdp.vaddr, 0,
+ sizeof(*ab->hal.rdp.vaddr) * HAL_SRNG_RING_ID_MAX);
+ if (ab->hal.wrp.vaddr)
+ memset(ab->hal.wrp.vaddr, 0,
+ sizeof(*ab->hal.wrp.vaddr) * HAL_SRNG_NUM_LMAC_RINGS);
ab->hal.avail_blk_resource = 0;
ab->hal.current_blk_index = 0;
ab->hal.num_shadow_reg_configured = 0;
diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c
index 753bd93f0212..51e0840bc0d1 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.c
@@ -1467,11 +1467,8 @@ ath11k_hal_rx_parse_mon_status_tlv(struct ath11k_base *ab,
case HAL_RX_MPDU_START: {
struct hal_rx_mpdu_info *mpdu_info =
(struct hal_rx_mpdu_info *)tlv_data;
- u16 peer_id;
- peer_id = ath11k_hal_rx_mpduinfo_get_peerid(ab, mpdu_info);
- if (peer_id)
- ppdu_info->peer_id = peer_id;
+ ppdu_info->peer_id = ath11k_hal_rx_mpduinfo_get_peerid(ab, mpdu_info);
break;
}
case HAL_RXPCU_PPDU_END_INFO: {
diff --git a/drivers/net/wireless/ath/ath11k/testmode.c b/drivers/net/wireless/ath/ath11k/testmode.c
index a9751ea2a0b7..c72eed358f6d 100644
--- a/drivers/net/wireless/ath/ath11k/testmode.c
+++ b/drivers/net/wireless/ath/ath11k/testmode.c
@@ -457,6 +457,7 @@ static int ath11k_tm_cmd_wmi_ftm(struct ath11k *ar, struct nlattr *tb[])
ret = ath11k_wmi_cmd_send(wmi, skb, cmd_id);
if (ret) {
ath11k_warn(ar->ab, "failed to send wmi ftm command: %d\n", ret);
+ dev_kfree_skb(skb);
goto out;
}
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 40747fba3b0c..dca6e011cc40 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -9299,7 +9299,7 @@ int ath11k_wmi_hw_data_filter_cmd(struct ath11k *ar, u32 vdev_id,
{
struct wmi_hw_data_filter_cmd *cmd;
struct sk_buff *skb;
- int len;
+ int ret, len;
len = sizeof(*cmd);
skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -9324,7 +9324,13 @@ int ath11k_wmi_hw_data_filter_cmd(struct ath11k *ar, u32 vdev_id,
"hw data filter enable %d filter_bitmap 0x%x\n",
enable, filter_bitmap);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_HW_DATA_FILTER_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar)
@@ -9332,6 +9338,7 @@ int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar)
struct wmi_wow_host_wakeup_ind *cmd;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd);
skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -9345,14 +9352,20 @@ int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar)
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow host wakeup ind\n");
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_wow_enable(struct ath11k *ar)
{
struct wmi_wow_enable_cmd *cmd;
struct sk_buff *skb;
- int len;
+ int ret, len;
len = sizeof(*cmd);
skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -9367,7 +9380,13 @@ int ath11k_wmi_wow_enable(struct ath11k *ar)
cmd->pause_iface_config = WOW_IFACE_PAUSE_ENABLED;
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow enable\n");
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar,
@@ -9376,7 +9395,7 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar,
struct sk_buff *skb;
struct wmi_scan_prob_req_oui_cmd *cmd;
u32 prob_req_oui;
- int len;
+ int ret, len;
prob_req_oui = (((u32)mac_addr[0]) << 16) |
(((u32)mac_addr[1]) << 8) | mac_addr[2];
@@ -9395,7 +9414,13 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar,
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "scan prob req oui %d\n",
prob_req_oui);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SCAN_PROB_REQ_OUI_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SCAN_PROB_REQ_OUI_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_SCAN_PROB_REQ_OUI_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_wow_add_wakeup_event(struct ath11k *ar, u32 vdev_id,
@@ -9405,6 +9430,7 @@ int ath11k_wmi_wow_add_wakeup_event(struct ath11k *ar, u32 vdev_id,
struct wmi_wow_add_del_event_cmd *cmd;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd);
skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -9422,7 +9448,13 @@ int ath11k_wmi_wow_add_wakeup_event(struct ath11k *ar, u32 vdev_id,
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow add wakeup event %s enable %d vdev_id %d\n",
wow_wakeup_event(event), enable, vdev_id);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_wow_add_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id,
@@ -9435,6 +9467,7 @@ int ath11k_wmi_wow_add_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id,
struct sk_buff *skb;
u8 *ptr;
size_t len;
+ int ret;
len = sizeof(*cmd) +
sizeof(*tlv) + /* array struct */
@@ -9527,7 +9560,13 @@ int ath11k_wmi_wow_add_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id,
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow add pattern vdev_id %d pattern_id %d pattern_offset %d\n",
vdev_id, pattern_id, pattern_offset);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_WOW_ADD_WAKE_PATTERN_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id)
@@ -9535,6 +9574,7 @@ int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id)
struct wmi_wow_del_pattern_cmd *cmd;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd);
skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
@@ -9553,7 +9593,13 @@ int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id)
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow del pattern vdev_id %d pattern_id %d\n",
vdev_id, pattern_id);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_WOW_DEL_WAKE_PATTERN_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
static struct sk_buff *
@@ -9697,6 +9743,7 @@ int ath11k_wmi_wow_config_pno(struct ath11k *ar, u32 vdev_id,
struct wmi_pno_scan_req *pno_scan)
{
struct sk_buff *skb;
+ int ret;
if (pno_scan->enable)
skb = ath11k_wmi_op_gen_config_pno_start(ar, vdev_id, pno_scan);
@@ -9706,7 +9753,13 @@ int ath11k_wmi_wow_config_pno(struct ath11k *ar, u32 vdev_id,
if (IS_ERR_OR_NULL(skb))
return -ENOMEM;
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
static void ath11k_wmi_fill_ns_offload(struct ath11k *ar,
@@ -9824,6 +9877,7 @@ int ath11k_wmi_arp_ns_offload(struct ath11k *ar,
u8 *buf_ptr;
size_t len;
u8 ns_cnt, ns_ext_tuples = 0;
+ int ret;
offload = &arvif->arp_ns_offload;
ns_cnt = offload->ipv6_count;
@@ -9862,7 +9916,13 @@ int ath11k_wmi_arp_ns_offload(struct ath11k *ar,
if (ns_ext_tuples)
ath11k_wmi_fill_ns_offload(ar, offload, &buf_ptr, enable, 1);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_SET_ARP_NS_OFFLOAD_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar,
@@ -9870,7 +9930,7 @@ int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar,
{
struct wmi_gtk_rekey_offload_cmd *cmd;
struct ath11k_rekey_data *rekey_data = &arvif->rekey_data;
- int len;
+ int ret, len;
struct sk_buff *skb;
__le64 replay_ctr;
@@ -9904,14 +9964,20 @@ int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar,
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "offload gtk rekey vdev: %d %d\n",
arvif->vdev_id, enable);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID offload\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_gtk_rekey_getinfo(struct ath11k *ar,
struct ath11k_vif *arvif)
{
struct wmi_gtk_rekey_offload_cmd *cmd;
- int len;
+ int ret, len;
struct sk_buff *skb;
len = sizeof(*cmd);
@@ -9928,7 +9994,13 @@ int ath11k_wmi_gtk_rekey_getinfo(struct ath11k *ar,
ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "get gtk rekey vdev_id: %d\n",
arvif->vdev_id);
- return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID getinfo\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_pdev_set_bios_sar_table_param(struct ath11k *ar, const u8 *sar_val)
@@ -9938,6 +10010,7 @@ int ath11k_wmi_pdev_set_bios_sar_table_param(struct ath11k *ar, const u8 *sar_va
struct sk_buff *skb;
u8 *buf_ptr;
u32 len, sar_len_aligned, rsvd_len_aligned;
+ int ret;
sar_len_aligned = roundup(BIOS_SAR_TABLE_LEN, sizeof(u32));
rsvd_len_aligned = roundup(BIOS_SAR_RSVD1_LEN, sizeof(u32));
@@ -9968,7 +10041,13 @@ int ath11k_wmi_pdev_set_bios_sar_table_param(struct ath11k *ar, const u8 *sar_va
tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_BYTE) |
FIELD_PREP(WMI_TLV_LEN, rsvd_len_aligned);
- return ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_SAR_TABLE_CMDID);
+ ret = ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_SAR_TABLE_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_PDEV_SET_BIOS_SAR_TABLE_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_pdev_set_bios_geo_table_param(struct ath11k *ar)
@@ -9979,6 +10058,7 @@ int ath11k_wmi_pdev_set_bios_geo_table_param(struct ath11k *ar)
struct sk_buff *skb;
u8 *buf_ptr;
u32 len, rsvd_len_aligned;
+ int ret;
rsvd_len_aligned = roundup(BIOS_SAR_RSVD2_LEN, sizeof(u32));
len = sizeof(*cmd) + TLV_HDR_SIZE + rsvd_len_aligned;
@@ -9998,7 +10078,13 @@ int ath11k_wmi_pdev_set_bios_geo_table_param(struct ath11k *ar)
tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_BYTE) |
FIELD_PREP(WMI_TLV_LEN, rsvd_len_aligned);
- return ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_GEO_TABLE_CMDID);
+ ret = ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_GEO_TABLE_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_PDEV_SET_BIOS_GEO_TABLE_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
int ath11k_wmi_sta_keepalive(struct ath11k *ar,
@@ -10009,6 +10095,7 @@ int ath11k_wmi_sta_keepalive(struct ath11k *ar,
struct wmi_sta_keepalive_arp_resp *arp;
struct sk_buff *skb;
size_t len;
+ int ret;
len = sizeof(*cmd) + sizeof(*arp);
skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, len);
@@ -10040,7 +10127,13 @@ int ath11k_wmi_sta_keepalive(struct ath11k *ar,
"sta keepalive vdev %d enabled %d method %d interval %d\n",
arg->vdev_id, arg->enabled, arg->method, arg->interval);
- return ath11k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID);
+ ret = ath11k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to send WMI_STA_KEEPALIVE_CMDID\n");
+ dev_kfree_skb(skb);
+ }
+
+ return ret;
}
bool ath11k_wmi_supports_6ghz_cc_ext(struct ath11k *ar)
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index df2334f3bad6..2cff9485c95a 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -3446,7 +3446,9 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar,
arg->peer_eht_mcs_count++;
fallthrough;
default:
- if (!(link_sta->he_cap.he_cap_elem.phy_cap_info[0] &
+ if ((vif->type == NL80211_IFTYPE_AP ||
+ vif->type == NL80211_IFTYPE_MESH_POINT) &&
+ !(link_sta->he_cap.he_cap_elem.phy_cap_info[0] &
IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) {
bw_20 = &eht_cap->eht_mcs_nss_supp.only_20mhz;
@@ -3475,7 +3477,9 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar,
arg->punct_bitmap = ~arvif->punct_bitmap;
arg->eht_disable_mcs15 = link_conf->eht_disable_mcs15;
- if (!(link_sta->he_cap.he_cap_elem.phy_cap_info[0] &
+ if ((vif->type == NL80211_IFTYPE_AP ||
+ vif->type == NL80211_IFTYPE_MESH_POINT) &&
+ !(link_sta->he_cap.he_cap_elem.phy_cap_info[0] &
IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) {
if (bw_20->rx_tx_mcs13_max_nss)
max_nss = max(max_nss, u8_get_bits(bw_20->rx_tx_mcs13_max_nss,
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/constants.h b/drivers/net/wireless/intel/iwlwifi/mld/constants.h
index e2a5eecc18c3..890abcab3837 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mld/constants.h
@@ -1,11 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2024-2025 Intel Corporation
+ * Copyright (C) 2024-2026 Intel Corporation
*/
#ifndef __iwl_mld_constants_h__
#define __iwl_mld_constants_h__
-#define IWL_MLD_MISSED_BEACONS_SINCE_RX_THOLD 4
+#define IWL_MLD_MISSED_BEACONS_SINCE_RX_THOLD 6
#define IWL_MLD_MISSED_BEACONS_THRESHOLD 8
#define IWL_MLD_MISSED_BEACONS_THRESHOLD_LONG 19
#define IWL_MLD_BCN_LOSS_EXIT_ESR_THRESH_2_LINKS 5
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c
index ef98efc8fb1b..3a595a1c2e00 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c
@@ -1930,12 +1930,12 @@ int iwl_mld_wowlan_suspend(struct iwl_mld *mld, struct cfg80211_wowlan *wowlan)
if (WARN_ON(!wowlan))
return 1;
- IWL_DEBUG_WOWLAN(mld, "Starting the wowlan suspend flow\n");
-
bss_vif = iwl_mld_get_bss_vif(mld);
- if (WARN_ON(!bss_vif))
+ if (!bss_vif)
return 1;
+ IWL_DEBUG_WOWLAN(mld, "Starting the wowlan suspend flow\n");
+
if (!bss_vif->cfg.assoc) {
int ret;
/* If we're not associated, this must be netdetect */
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c
index b66e84d2365f..be2cdf43c72e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/link.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2024-2025 Intel Corporation
+ * Copyright (C) 2024-2026 Intel Corporation
*/
#include "constants.h"
@@ -504,7 +504,6 @@ void iwl_mld_remove_link(struct iwl_mld *mld,
struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(bss_conf->vif);
struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf);
bool is_deflink = link == &mld_vif->deflink;
- u8 fw_id = link->fw_id;
if (WARN_ON(!link || link->active))
return;
@@ -512,15 +511,15 @@ void iwl_mld_remove_link(struct iwl_mld *mld,
iwl_mld_rm_link_from_fw(mld, bss_conf);
/* Continue cleanup on failure */
- if (!is_deflink)
- kfree_rcu(link, rcu_head);
-
RCU_INIT_POINTER(mld_vif->link[bss_conf->link_id], NULL);
- if (WARN_ON(fw_id >= mld->fw->ucode_capa.num_links))
+ if (WARN_ON(link->fw_id >= mld->fw->ucode_capa.num_links))
return;
- RCU_INIT_POINTER(mld->fw_id_to_bss_conf[fw_id], NULL);
+ RCU_INIT_POINTER(mld->fw_id_to_bss_conf[link->fw_id], NULL);
+
+ if (!is_deflink)
+ kfree_rcu(link, rcu_head);
}
void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld,
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tx.c b/drivers/net/wireless/intel/iwlwifi/mld/tx.c
index 546d09a38dab..0bcb1ae69468 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/tx.c
@@ -834,7 +834,7 @@ static int iwl_mld_tx_tso_segment(struct iwl_mld *mld, struct sk_buff *skb,
return -EINVAL;
max_tid_amsdu_len = sta->cur->max_tid_amsdu_len[tid];
- if (!max_tid_amsdu_len)
+ if (!max_tid_amsdu_len || max_tid_amsdu_len == 1)
return iwl_tx_tso_segment(skb, 1, netdev_flags, mpdus_skbs);
/* Sub frame header + SNAP + IP header + TCP header + MSS */
@@ -846,6 +846,9 @@ static int iwl_mld_tx_tso_segment(struct iwl_mld *mld, struct sk_buff *skb,
*/
num_subframes = (max_tid_amsdu_len + pad) / (subf_len + pad);
+ if (WARN_ON_ONCE(!num_subframes))
+ return iwl_tx_tso_segment(skb, 1, netdev_flags, mpdus_skbs);
+
if (sta->max_amsdu_subframes &&
num_subframes > sta->max_amsdu_subframes)
num_subframes = sta->max_amsdu_subframes;
@@ -971,6 +974,16 @@ void iwl_mld_tx_from_txq(struct iwl_mld *mld, struct ieee80211_txq *txq)
u8 zero_addr[ETH_ALEN] = {};
/*
+ * Don't transmit during firmware restart. The firmware is dead,
+ * so iwl_trans_tx() would return -EIO for each frame. Avoid the
+ * overhead of dequeuing from mac80211 only to immediately free
+ * the skbs, and the potential memory pressure from rapid skb
+ * allocation churn during high-throughput restart scenarios.
+ */
+ if (unlikely(mld->fw_status.in_hw_restart))
+ return;
+
+ /*
* No need for threads to be pending here, they can leave the first
* taker all the work.
*
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index c523c5e82d4a..8ffa72aca3cf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2025 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2026 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -927,13 +927,18 @@ u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm,
u16 iwl_mvm_mac_ctxt_get_beacon_flags(const struct iwl_fw *fw, u8 rate_idx)
{
- u16 flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx);
bool is_new_rate = iwl_fw_lookup_cmd_ver(fw, BEACON_TEMPLATE_CMD, 0) > 10;
+ u16 flags = 0;
if (rate_idx <= IWL_LAST_CCK_RATE)
flags |= is_new_rate ? IWL_MAC_BEACON_CCK
: IWL_MAC_BEACON_CCK_V1;
+ if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8)
+ flags |= iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx);
+ else
+ flags |= iwl_fw_rate_idx_to_plcp(rate_idx);
+
return flags;
}
@@ -962,6 +967,7 @@ static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm,
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct ieee80211_tx_info *info;
+ u32 rate_n_flags = 0;
u8 rate;
u32 tx_flags;
@@ -981,18 +987,21 @@ static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm,
IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION)) {
iwl_mvm_toggle_tx_ant(mvm, &mvm->mgmt_last_antenna_idx);
- tx_params->rate_n_flags =
- cpu_to_le32(BIT(mvm->mgmt_last_antenna_idx) <<
- RATE_MCS_ANT_POS);
+ rate_n_flags |= BIT(mvm->mgmt_last_antenna_idx) <<
+ RATE_MCS_ANT_POS;
}
rate = iwl_mvm_mac_ctxt_get_beacon_rate(mvm, info, vif);
- tx_params->rate_n_flags |=
- cpu_to_le32(iwl_mvm_mac80211_idx_to_hwrate(mvm->fw, rate));
- if (rate == IWL_FIRST_CCK_RATE)
- tx_params->rate_n_flags |= cpu_to_le32(RATE_MCS_CCK_MSK_V1);
+ if (rate < IWL_FIRST_OFDM_RATE)
+ rate_n_flags |= RATE_MCS_MOD_TYPE_CCK;
+ else
+ rate_n_flags |= RATE_MCS_MOD_TYPE_LEGACY_OFDM;
+
+ rate_n_flags |= iwl_mvm_mac80211_idx_to_hwrate(mvm->fw, rate);
+ tx_params->rate_n_flags = iwl_mvm_v3_rate_to_fw(rate_n_flags,
+ mvm->fw_rates_ver);
}
int iwl_mvm_mac_ctxt_send_beacon_cmd(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 4a33a032c2a7..f052537e9567 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2025 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2026 Intel Corporation
* Copyright (C) 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -159,15 +159,9 @@ int iwl_mvm_legacy_rate_to_mac80211_idx(u32 rate_n_flags,
u8 iwl_mvm_mac80211_idx_to_hwrate(const struct iwl_fw *fw, int rate_idx)
{
- if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8)
- /* In the new rate legacy rates are indexed:
- * 0 - 3 for CCK and 0 - 7 for OFDM.
- */
- return (rate_idx >= IWL_FIRST_OFDM_RATE ?
- rate_idx - IWL_FIRST_OFDM_RATE :
- rate_idx);
-
- return iwl_fw_rate_idx_to_plcp(rate_idx);
+ return rate_idx >= IWL_FIRST_OFDM_RATE ?
+ rate_idx - IWL_FIRST_OFDM_RATE :
+ rate_idx;
}
u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c
index a50e845cea42..64262bcca55d 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c
@@ -398,9 +398,9 @@ void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans)
mutex_unlock(&trans_pcie->mutex);
if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
- trans->step_urm = !!(iwl_read_umac_prph(trans,
- CNVI_PMU_STEP_FLOW) &
- CNVI_PMU_STEP_FLOW_FORCE_URM);
+ trans->step_urm = !!(iwl_read_prph(trans,
+ CNVI_PMU_STEP_FLOW) &
+ CNVI_PMU_STEP_FLOW_FORCE_URM);
}
static bool iwl_pcie_set_ltr(struct iwl_trans *trans)
diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
index 3fa8592eb250..4b116fe6f9ea 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
@@ -1265,7 +1265,7 @@ int wilc_wlan_firmware_download(struct wilc *wilc, const u8 *buffer,
ret = acquire_bus(wilc, WILC_BUS_ACQUIRE_AND_WAKEUP);
if (ret)
- return ret;
+ goto fail;
wilc->hif_func->hif_read_reg(wilc, WILC_GLB_RESET_0, &reg);
reg &= ~BIT(10);
diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c
index 1b7bc7d63a2e..4405c8531888 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_imem.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c
@@ -1425,6 +1425,8 @@ imem_config_fail:
protocol_init_fail:
cancel_work_sync(&ipc_imem->run_state_worker);
ipc_task_deinit(ipc_imem->ipc_task);
+ if (ipc_imem->ipc_protocol)
+ ipc_protocol_deinit(ipc_imem->ipc_protocol);
ipc_task_init_fail:
kfree(ipc_imem->ipc_task);
ipc_task_fail:
diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index 714bcab97b60..08a0e7091ced 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -2072,8 +2072,10 @@ static int brcm_pcie_probe(struct platform_device *pdev)
return PTR_ERR(pcie->clk);
ret = of_pci_get_max_link_speed(np);
- if (pcie_get_link_speed(ret) == PCI_SPEED_UNKNOWN)
+ if (ret < 0 || ret > 3)
pcie->gen = 0;
+ else
+ pcie->gen = ret;
pcie->ssc = of_property_read_bool(np, "brcm,enable-ssc");
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index d3aefbec4de6..34c115d7c250 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -75,17 +75,4 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
int v9fs_open_to_dotl_flags(int flags);
-static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
-{
- /*
- * 32-bit need the lock, concurrent updates could break the
- * sequences and make i_size_read() loop forever.
- * 64-bit updates are atomic and can skip the locking.
- */
- if (sizeof(i_size) > sizeof(long))
- spin_lock(&inode->i_lock);
- i_size_write(inode, i_size);
- if (sizeof(i_size) > sizeof(long))
- spin_unlock(&inode->i_lock);
-}
#endif
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index d1508b1fe109..f468acb8ee7d 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1141,11 +1141,13 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
- v9inode->netfs.remote_i_size = stat->length;
+ spin_lock(&inode->i_lock);
+ netfs_write_remote_i_size(inode, stat->length);
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
- v9fs_i_size_write(inode, stat->length);
+ i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
inode->i_blocks = (stat->length + 512 - 1) >> 9;
+ spin_unlock(&inode->i_lock);
v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 71796a89bcf4..141fb54db65d 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -634,10 +634,12 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
- v9inode->netfs.remote_i_size = stat->st_size;
+ spin_lock(&inode->i_lock);
+ netfs_write_remote_i_size(inode, stat->st_size);
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
- v9fs_i_size_write(inode, stat->st_size);
+ i_size_write(inode, stat->st_size);
inode->i_blocks = stat->st_blocks;
+ spin_unlock(&inode->i_lock);
} else {
if (stat->st_result_mask & P9_STATS_ATIME) {
inode_set_atime(inode, stat->st_atime_sec,
@@ -662,13 +664,15 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
}
+ spin_lock(&inode->i_lock);
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
stat->st_result_mask & P9_STATS_SIZE) {
- v9inode->netfs.remote_i_size = stat->st_size;
- v9fs_i_size_write(inode, stat->st_size);
+ netfs_write_remote_i_size(inode, stat->st_size);
+ i_size_write(inode, stat->st_size);
}
if (stat->st_result_mask & P9_STATS_BLOCKS)
inode->i_blocks = stat->st_blocks;
+ spin_unlock(&inode->i_lock);
}
if (stat->st_result_mask & P9_STATS_GEN)
inode->i_generation = stat->st_gen;
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index b49b8fe682f3..0d8f1982d596 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -30,6 +30,7 @@ kafs-y := \
server.o \
server_list.o \
super.o \
+ symlink.o \
validation.o \
vlclient.o \
vl_alias.o \
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index aaaa55878ffd..498b99ccdf0e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -44,6 +44,8 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags);
+static int afs_dir_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
const struct file_operations afs_dir_file_operations = {
.open = afs_dir_open,
@@ -68,7 +70,7 @@ const struct inode_operations afs_dir_inode_operations = {
};
const struct address_space_operations afs_dir_aops = {
- .writepages = afs_single_writepages,
+ .writepages = afs_dir_writepages,
};
const struct dentry_operations afs_fs_dentry_operations = {
@@ -233,22 +235,13 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
struct iov_iter iter;
ssize_t ret;
loff_t i_size;
- bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
- !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
i_size = i_size_read(&dvnode->netfs.inode);
- if (is_dir) {
- if (i_size < AFS_DIR_BLOCK_SIZE)
- return afs_bad(dvnode, afs_file_error_dir_small);
- if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
- trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
- return -EFBIG;
- }
- } else {
- if (i_size > AFSPATHMAX) {
- trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
- return -EFBIG;
- }
+ if (i_size < AFS_DIR_BLOCK_SIZE)
+ return afs_bad(dvnode, afs_file_error_dir_small);
+ if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
}
/* Expand the storage. TODO: Shrink the storage too. */
@@ -277,24 +270,18 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
* buffer.
*/
ret = -ESTALE;
- } else if (is_dir) {
+ } else {
int ret2 = afs_dir_check(dvnode);
if (ret2 < 0)
ret = ret2;
- } else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
- /* NUL-terminate a symlink. */
- char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);
-
- symlink[i_size] = 0;
- kunmap_local(symlink);
}
}
return ret;
}
-ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
+static ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
{
ssize_t ret;
@@ -1763,13 +1750,20 @@ error:
return ret;
}
+static void afs_symlink_put(struct afs_operation *op)
+{
+ kfree(op->create.symlink);
+ op->create.symlink = NULL;
+ afs_create_put(op);
+}
+
static const struct afs_operation_ops afs_symlink_operation = {
.issue_afs_rpc = afs_fs_symlink,
.issue_yfs_rpc = yfs_fs_symlink,
.success = afs_create_success,
.aborted = afs_check_for_remote_deletion,
.edit_dir = afs_create_edit_dir,
- .put = afs_create_put,
+ .put = afs_symlink_put,
};
/*
@@ -1779,7 +1773,9 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *content)
{
struct afs_operation *op;
+ struct afs_symlink *symlink;
struct afs_vnode *dvnode = AFS_FS_I(dir);
+ size_t clen = strlen(content);
int ret;
_enter("{%llx:%llu},{%pd},%s",
@@ -1791,12 +1787,20 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
goto error;
ret = -EINVAL;
- if (strlen(content) >= AFSPATHMAX)
+ if (clen >= AFSPATHMAX)
+ goto error;
+
+ ret = -ENOMEM;
+ symlink = kmalloc_flex(struct afs_symlink, content, clen + 1, GFP_KERNEL);
+ if (!symlink)
goto error;
+ refcount_set(&symlink->ref, 1);
+ memcpy(symlink->content, content, clen + 1);
op = afs_alloc_operation(NULL, dvnode->volume);
if (IS_ERR(op)) {
ret = PTR_ERR(op);
+ kfree(symlink);
goto error;
}
@@ -1808,7 +1812,7 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
op->dentry = dentry;
op->ops = &afs_symlink_operation;
op->create.reason = afs_edit_dir_for_symlink;
- op->create.symlink = content;
+ op->create.symlink = symlink;
op->mtime = current_time(dir);
ret = afs_do_sync_operation(op);
afs_dir_unuse_cookie(dvnode, ret);
@@ -2192,28 +2196,33 @@ error:
}
/*
- * Write the file contents to the cache as a single blob.
+ * Write the directory contents to the cache as a single blob.
*/
-int afs_single_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+static int afs_dir_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
struct iov_iter iter;
- bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
- !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
int ret = 0;
/* Need to lock to prevent the folio queue and folios from being thrown
* away.
*/
- down_read(&dvnode->validate_lock);
+ if (!down_read_trylock(&dvnode->validate_lock)) {
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ /* The VFS will have undirtied the inode. */
+ netfs_single_mark_inode_dirty(&dvnode->netfs.inode);
+ return 0;
+ }
+ down_read(&dvnode->validate_lock);
+ }
- if (is_dir ?
- test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
- atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
i_size_read(&dvnode->netfs.inode));
ret = netfs_writeback_single(mapping, wbc, &iter);
+ if (ret == 1)
+ ret = 0; /* Skipped write due to lock conflict. */
}
up_read(&dvnode->validate_lock);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 85696ac984cc..0467742bfeee 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -427,21 +427,35 @@ static void afs_free_request(struct netfs_io_request *rreq)
afs_put_wb_key(rreq->netfs_priv2);
}
-static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
+/*
+ * Set the file size and block count, taking ->cb_lock and ->i_lock to maintain
+ * coherency and prevent 64-bit tearing on 32-bit arches.
+ *
+ * Also, estimate the number of 512 bytes blocks used, rounded up to nearest 1K
+ * for consistency with other AFS clients.
+ */
+void afs_set_i_size(struct afs_vnode *vnode, loff_t new_i_size)
{
- struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct inode *inode = &vnode->netfs.inode;
loff_t i_size;
write_seqlock(&vnode->cb_lock);
- i_size = i_size_read(&vnode->netfs.inode);
+ spin_lock(&inode->i_lock);
+ i_size = i_size_read(inode);
if (new_i_size > i_size) {
- i_size_write(&vnode->netfs.inode, new_i_size);
- inode_set_bytes(&vnode->netfs.inode, new_i_size);
+ i_size_write(inode, new_i_size);
+ inode_set_bytes(inode, round_up(new_i_size, 1024));
}
+ spin_unlock(&inode->i_lock);
write_sequnlock(&vnode->cb_lock);
fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size);
}
+static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
+{
+ afs_set_i_size(AFS_FS_I(inode), new_i_size);
+}
+
static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq)
{
struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 95494d5f2b8a..a2ffd60889f8 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -886,7 +886,7 @@ void afs_fs_symlink(struct afs_operation *op)
namesz = name->len;
padsz = (4 - (namesz & 3)) & 3;
- c_namesz = strlen(op->create.symlink);
+ c_namesz = strlen(op->create.symlink->content);
c_padsz = (4 - (c_namesz & 3)) & 3;
reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
@@ -910,7 +910,7 @@ void afs_fs_symlink(struct afs_operation *op)
bp = (void *) bp + padsz;
}
*bp++ = htonl(c_namesz);
- memcpy(bp, op->create.symlink, c_namesz);
+ memcpy(bp, op->create.symlink->content, c_namesz);
bp = (void *) bp + c_namesz;
if (c_padsz > 0) {
memset(bp, 0, c_padsz);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index a5173434f786..3f48458694ba 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -25,96 +25,6 @@
#include "internal.h"
#include "afs_fs.h"
-void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
-{
- size_t size = strlen(op->create.symlink) + 1;
- size_t dsize = 0;
- char *p;
-
- if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
- mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
- return;
-
- vnode->directory_size = dsize;
- p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
- memcpy(p, op->create.symlink, size);
- kunmap_local(p);
- set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
- netfs_single_mark_inode_dirty(&vnode->netfs.inode);
-}
-
-static void afs_put_link(void *arg)
-{
- struct folio *folio = virt_to_folio(arg);
-
- kunmap_local(arg);
- folio_put(folio);
-}
-
-const char *afs_get_link(struct dentry *dentry, struct inode *inode,
- struct delayed_call *callback)
-{
- struct afs_vnode *vnode = AFS_FS_I(inode);
- struct folio *folio;
- char *content;
- ssize_t ret;
-
- if (!dentry) {
- /* RCU pathwalk. */
- if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
- return ERR_PTR(-ECHILD);
- goto good;
- }
-
- if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
- goto fetch;
-
- ret = afs_validate(vnode, NULL);
- if (ret < 0)
- return ERR_PTR(ret);
-
- if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
- test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
- goto good;
-
-fetch:
- ret = afs_read_single(vnode, NULL);
- if (ret < 0)
- return ERR_PTR(ret);
- set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
-
-good:
- folio = folioq_folio(vnode->directory, 0);
- folio_get(folio);
- content = kmap_local_folio(folio, 0);
- set_delayed_call(callback, afs_put_link, content);
- return content;
-}
-
-int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
-{
- DEFINE_DELAYED_CALL(done);
- const char *content;
- int len;
-
- content = afs_get_link(dentry, d_inode(dentry), &done);
- if (IS_ERR(content)) {
- do_delayed_call(&done);
- return PTR_ERR(content);
- }
-
- len = umin(strlen(content), buflen);
- if (copy_to_user(buffer, content, len))
- len = -EFAULT;
- do_delayed_call(&done);
- return len;
-}
-
-static const struct inode_operations afs_symlink_inode_operations = {
- .get_link = afs_get_link,
- .readlink = afs_readlink,
-};
-
static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
{
static unsigned long once_only;
@@ -214,7 +124,7 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_mode = S_IFLNK | status->mode;
inode->i_op = &afs_symlink_inode_operations;
}
- inode->i_mapping->a_ops = &afs_dir_aops;
+ inode->i_mapping->a_ops = &afs_symlink_aops;
inode_nohighmem(inode);
mapping_set_release_always(inode->i_mapping);
break;
@@ -224,7 +134,8 @@ static int afs_inode_init_from_status(struct afs_operation *op,
return afs_protocol_error(NULL, afs_eproto_file_type);
}
- afs_set_i_size(vnode, status->size);
+ i_size_write(inode, status->size);
+ inode_set_bytes(inode, status->size);
afs_set_netfs_context(vnode);
vnode->invalid_before = status->data_version;
@@ -253,7 +164,8 @@ static void afs_apply_status(struct afs_operation *op,
{
struct afs_file_status *status = &vp->scb.status;
struct afs_vnode *vnode = vp->vnode;
- struct inode *inode = &vnode->netfs.inode;
+ struct netfs_inode *ictx = &vnode->netfs;
+ struct inode *inode = &ictx->inode;
struct timespec64 t;
umode_t mode;
bool unexpected_jump = false;
@@ -336,6 +248,8 @@ static void afs_apply_status(struct afs_operation *op,
}
if (data_changed) {
+ unsigned long long zero_point, size = status->size;
+
inode_set_iversion_raw(inode, status->data_version);
/* Only update the size if the data version jumped. If the
@@ -343,16 +257,25 @@ static void afs_apply_status(struct afs_operation *op,
* idea of what the size should be that's not the same as
* what's on the server.
*/
- vnode->netfs.remote_i_size = status->size;
- if (change_size || status->size > i_size_read(inode)) {
- afs_set_i_size(vnode, status->size);
+ spin_lock(&inode->i_lock);
+
+ if (change_size || size > i_size_read(inode)) {
+ /* We can read the sizes directly as we hold i_lock. */
+ zero_point = ictx->_zero_point;
+
if (unexpected_jump)
- vnode->netfs.zero_point = status->size;
+ zero_point = size;
+ netfs_write_sizes(inode, size, size, zero_point);
+ inode_set_bytes(inode, size);
inode_set_ctime_to_ts(inode, t);
inode_set_atime_to_ts(inode, t);
+ } else {
+ netfs_write_remote_i_size(inode, size);
}
+ spin_unlock(&inode->i_lock);
+
if (op->ops == &afs_fetch_data_operation)
- op->fetch.subreq->rreq->i_size = status->size;
+ op->fetch.subreq->rreq->i_size = size;
}
}
@@ -709,7 +632,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
* it, but we need to give userspace the server's size.
*/
if (S_ISDIR(inode->i_mode))
- stat->size = vnode->netfs.remote_i_size;
+ stat->size = netfs_read_remote_i_size(inode);
} while (read_seqretry(&vnode->cb_lock, seq));
return 0;
@@ -756,12 +679,14 @@ void afs_evict_inode(struct inode *inode)
.range_end = LLONG_MAX,
};
- afs_single_writepages(inode->i_mapping, &wbc);
+ inode->i_mapping->a_ops->writepages(inode->i_mapping, &wbc);
}
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
netfs_free_folioq_buffer(vnode->directory);
+ if (vnode->symlink)
+ afs_evict_symlink(vnode);
afs_set_cache_aux(vnode, &aux);
netfs_clear_inode_writeback(inode, &aux);
@@ -889,7 +814,7 @@ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
*/
if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
attr->ia_size < i_size &&
- attr->ia_size > vnode->netfs.remote_i_size) {
+ attr->ia_size > netfs_read_remote_i_size(inode)) {
truncate_setsize(inode, attr->ia_size);
netfs_resize_file(&vnode->netfs, size, false);
fscache_resize_cookie(afs_vnode_cache(vnode),
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 599353c33337..0b72a8566299 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -710,6 +710,7 @@ struct afs_vnode {
#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */
struct folio_queue *directory; /* Directory contents */
+ struct afs_symlink __rcu *symlink; /* Symlink content */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
@@ -777,6 +778,15 @@ struct afs_permits {
};
/*
+ * Copy of symlink content for normal use.
+ */
+struct afs_symlink {
+ struct rcu_head rcu;
+ refcount_t ref;
+ char content[];
+};
+
+/*
* Error prioritisation and accumulation.
*/
struct afs_error {
@@ -887,7 +897,7 @@ struct afs_operation {
struct {
int reason; /* enum afs_edit_dir_reason */
mode_t mode;
- const char *symlink;
+ struct afs_symlink *symlink;
} create;
struct {
bool need_rehash;
@@ -1098,13 +1108,10 @@ extern const struct inode_operations afs_dir_inode_operations;
extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
-ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
__acquires(&dvnode->validate_lock);
extern void afs_d_release(struct dentry *);
extern void afs_check_for_remote_deletion(struct afs_operation *);
-int afs_single_writepages(struct address_space *mapping,
- struct writeback_control *wbc);
/*
* dir_edit.c
@@ -1157,6 +1164,7 @@ extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
void afs_fetch_data_async_rx(struct work_struct *work);
void afs_fetch_data_immediate_cancel(struct afs_call *call);
+void afs_set_i_size(struct afs_vnode *vnode, loff_t new_i_size);
/*
* flock.c
@@ -1246,10 +1254,6 @@ extern void afs_fs_probe_cleanup(struct afs_net *);
*/
extern const struct afs_operation_ops afs_fetch_status_operation;
-void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
-const char *afs_get_link(struct dentry *dentry, struct inode *inode,
- struct delayed_call *callback);
-int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
@@ -1600,6 +1604,21 @@ extern int __init afs_fs_init(void);
extern void afs_fs_exit(void);
/*
+ * symlink.c
+ */
+extern const struct inode_operations afs_symlink_inode_operations;
+extern const struct address_space_operations afs_symlink_aops;
+
+void afs_invalidate_symlink(struct afs_vnode *vnode);
+void afs_evict_symlink(struct afs_vnode *vnode);
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback);
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
+int afs_symlink_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
+
+/*
* validation.c
*/
bool afs_check_validity(const struct afs_vnode *vnode);
@@ -1759,16 +1778,6 @@ static inline void afs_update_dentry_version(struct afs_operation *op,
}
/*
- * Set the file size and block count. Estimate the number of 512 bytes blocks
- * used, rounded up to nearest 1K for consistency with other AFS clients.
- */
-static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size)
-{
- i_size_write(&vnode->netfs.inode, size);
- vnode->netfs.inode.i_blocks = ((size + 1023) >> 10) << 1;
-}
-
-/*
* Check for a conflicting operation on a directory that we just unlinked from.
* If someone managed to sneak a link or an unlink in on the file we just
* unlinked, we won't be able to trust nlink on an AFS file (but not YFS).
diff --git a/fs/afs/symlink.c b/fs/afs/symlink.c
new file mode 100644
index 000000000000..ed5868369f37
--- /dev/null
+++ b/fs/afs/symlink.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS filesystem symbolic link handling
+ *
+ * Copyright (C) 2026 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/iov_iter.h>
+#include "internal.h"
+
+static void afs_put_symlink(struct afs_symlink *symlink)
+{
+ if (refcount_dec_and_test(&symlink->ref))
+ kfree_rcu(symlink, rcu);
+}
+
+static void afs_replace_symlink(struct afs_vnode *vnode, struct afs_symlink *symlink)
+{
+ struct afs_symlink *old;
+
+ old = rcu_replace_pointer(vnode->symlink, symlink,
+ lockdep_is_held(&vnode->validate_lock));
+ if (old)
+ afs_put_symlink(old);
+}
+
+/*
+ * In the event that a third-party update of a symlink occurs, dispose of the
+ * copy of the old contents. Called under ->validate_lock.
+ */
+void afs_invalidate_symlink(struct afs_vnode *vnode)
+{
+ afs_replace_symlink(vnode, NULL);
+}
+
+/*
+ * Dispose of a symlink copy during inode deletion.
+ */
+void afs_evict_symlink(struct afs_vnode *vnode)
+{
+ struct afs_symlink *old;
+
+ old = rcu_replace_pointer(vnode->symlink, NULL, true);
+ if (old)
+ afs_put_symlink(old);
+
+}
+
+/*
+ * Set up a locally created symlink inode for immediate write to the cache.
+ */
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
+{
+ struct afs_symlink *symlink = op->create.symlink;
+ size_t dsize = 0;
+ size_t size = strlen(symlink->content) + 1;
+ char *p;
+
+ rcu_assign_pointer(vnode->symlink, symlink);
+ op->create.symlink = NULL;
+
+ if (!fscache_cookie_enabled(netfs_i_cookie(&vnode->netfs)))
+ return;
+
+ if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
+ mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
+ return;
+
+ vnode->directory_size = dsize;
+ p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
+ memcpy(p, symlink->content, size);
+ kunmap_local(p);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+}
+
+/*
+ * Read a symlink in a single download.
+ */
+static ssize_t afs_do_read_symlink(struct afs_vnode *vnode)
+{
+ struct afs_symlink *symlink;
+ struct iov_iter iter;
+ ssize_t ret;
+ loff_t i_size;
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (i_size > PAGE_SIZE - 1) {
+ trace_afs_file_error(vnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
+
+ if (!vnode->directory) {
+ size_t cur_size = 0;
+
+ ret = netfs_alloc_folioq_buffer(NULL,
+ &vnode->directory, &cur_size, PAGE_SIZE,
+ mapping_gfp_mask(vnode->netfs.inode.i_mapping));
+ vnode->directory_size = PAGE_SIZE - 1;
+ if (ret < 0)
+ return ret;
+ }
+
+ iov_iter_folio_queue(&iter, ITER_DEST, vnode->directory, 0, 0, PAGE_SIZE);
+
+ /* AFS requires us to perform the read of a symlink as a single unit to
+ * avoid issues with the content being changed between reads.
+ */
+ ret = netfs_read_single(&vnode->netfs.inode, NULL, &iter);
+ if (ret >= 0) {
+ i_size = ret;
+ if (i_size > PAGE_SIZE - 1) {
+ trace_afs_file_error(vnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
+ vnode->directory_size = i_size;
+
+ /* Copy the symlink. */
+ symlink = kmalloc_flex(struct afs_symlink, content, i_size + 1,
+ GFP_KERNEL);
+ if (!symlink)
+ return -ENOMEM;
+
+ refcount_set(&symlink->ref, 1);
+ symlink->content[i_size] = 0;
+
+ const char *s = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
+
+ memcpy(symlink->content, s, i_size);
+ kunmap_local(s);
+
+ afs_replace_symlink(vnode, symlink);
+ }
+
+ if (!fscache_cookie_enabled(netfs_i_cookie(&vnode->netfs))) {
+ netfs_free_folioq_buffer(vnode->directory);
+ vnode->directory = NULL;
+ vnode->directory_size = 0;
+ }
+
+ return ret;
+}
+
+static ssize_t afs_read_symlink(struct afs_vnode *vnode)
+{
+ ssize_t ret;
+
+ fscache_use_cookie(afs_vnode_cache(vnode), false);
+ ret = afs_do_read_symlink(vnode);
+ fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL);
+ return ret;
+}
+
+static void afs_put_link(void *arg)
+{
+ afs_put_symlink(arg);
+}
+
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
+{
+ struct afs_symlink *symlink;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ ssize_t ret;
+
+ if (!dentry) {
+ /* RCU pathwalk. */
+ symlink = rcu_dereference(vnode->symlink);
+ if (!symlink || !afs_check_validity(vnode))
+ return ERR_PTR(-ECHILD);
+ set_delayed_call(callback, NULL, NULL);
+ return symlink->content;
+ }
+
+ if (vnode->symlink) {
+ ret = afs_validate(vnode, NULL);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ down_read(&vnode->validate_lock);
+ if (vnode->symlink)
+ goto good;
+ up_read(&vnode->validate_lock);
+ }
+
+ if (down_write_killable(&vnode->validate_lock) < 0)
+ return ERR_PTR(-ERESTARTSYS);
+ if (!vnode->symlink) {
+ ret = afs_read_symlink(vnode);
+ if (ret < 0) {
+ up_write(&vnode->validate_lock);
+ return ERR_PTR(ret);
+ }
+ }
+
+ downgrade_write(&vnode->validate_lock);
+
+good:
+ symlink = rcu_dereference_protected(vnode->symlink,
+ lockdep_is_held(&vnode->validate_lock));
+ refcount_inc(&symlink->ref);
+ up_read(&vnode->validate_lock);
+
+ set_delayed_call(callback, afs_put_link, symlink);
+ return symlink->content;
+}
+
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+ DEFINE_DELAYED_CALL(done);
+ const char *content;
+ int len;
+
+ content = afs_get_link(dentry, d_inode(dentry), &done);
+ if (IS_ERR(content)) {
+ do_delayed_call(&done);
+ return PTR_ERR(content);
+ }
+
+ len = umin(strlen(content), buflen);
+ if (copy_to_user(buffer, content, len))
+ len = -EFAULT;
+ do_delayed_call(&done);
+ return len;
+}
+
+/*
+ * Write the symlink contents to the cache as a single blob. We then throw
+ * away the page we used to receive it.
+ */
+int afs_symlink_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct iov_iter iter;
+ int ret = 0;
+
+ if (!down_read_trylock(&vnode->validate_lock)) {
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ /* The VFS will have undirtied the inode. */
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+ return 0;
+ }
+ down_read(&vnode->validate_lock);
+ }
+
+ if (vnode->directory &&
+ atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
+ iov_iter_folio_queue(&iter, ITER_SOURCE, vnode->directory, 0, 0,
+ i_size_read(&vnode->netfs.inode));
+ ret = netfs_writeback_single(mapping, wbc, &iter);
+ }
+
+ if (ret == 0) {
+ mutex_lock(&vnode->netfs.wb_lock);
+ netfs_free_folioq_buffer(vnode->directory);
+ vnode->directory = NULL;
+ vnode->directory_size = 0;
+ mutex_unlock(&vnode->netfs.wb_lock);
+ } else if (ret == 1) {
+ ret = 0; /* Skipped write due to lock conflict. */
+ }
+
+ up_read(&vnode->validate_lock);
+ return ret;
+}
+
+const struct inode_operations afs_symlink_inode_operations = {
+ .get_link = afs_get_link,
+ .readlink = afs_readlink,
+};
+
+const struct address_space_operations afs_symlink_aops = {
+ .writepages = afs_symlink_writepages,
+};
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
index 0ba8336c9025..e997563af658 100644
--- a/fs/afs/validation.c
+++ b/fs/afs/validation.c
@@ -465,11 +465,17 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
vnode->cb_ro_snapshot = cb_ro_snapshot;
vnode->cb_scrub = cb_scrub;
- /* if the vnode's data version number changed then its contents are
- * different */
+ /* If the vnode's data version number changed then its contents are
+ * different. Note that afs_apply_status() doesn't set ZAP_DATA on
+ * directories.
+ */
zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
- if (zap)
- afs_zap_data(vnode);
+ if (zap) {
+ if (S_ISREG(vnode->netfs.inode.i_mode))
+ afs_zap_data(vnode);
+ else if (S_ISLNK(vnode->netfs.inode.i_mode))
+ afs_invalidate_symlink(vnode);
+ }
up_write(&vnode->validate_lock);
_leave(" = 0");
return 0;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index fcfed9d24e0a..7f34b939706a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -142,7 +142,7 @@ static void afs_issue_write_worker(struct work_struct *work)
afs_begin_vnode_operation(op);
op->store.write_iter = &subreq->io_iter;
- op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size);
+ op->store.i_size = umax(pos + len, netfs_read_remote_i_size(&vnode->netfs.inode));
op->mtime = inode_get_mtime(&vnode->netfs.inode);
afs_wait_for_operation(op);
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 24fb562ebd33..d941179730a9 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -960,7 +960,7 @@ void yfs_fs_symlink(struct afs_operation *op)
_enter("");
- contents_sz = strlen(op->create.symlink);
+ contents_sz = strlen(op->create.symlink->content);
call = afs_alloc_flat_call(op->net, &yfs_RXYFSSymlink,
sizeof(__be32) +
sizeof(struct yfs_xdr_RPCFlags) +
@@ -981,7 +981,7 @@ void yfs_fs_symlink(struct afs_operation *op)
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &dvp->fid);
bp = xdr_encode_name(bp, name);
- bp = xdr_encode_string(bp, op->create.symlink, contents_sz);
+ bp = xdr_encode_string(bp, op->create.symlink->content, contents_sz);
bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime);
yfs_check_req(call, bp);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 1b83ed0e0a63..2937db690b40 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -130,6 +130,8 @@ retry:
ret = cachefiles_inject_write_error();
if (ret == 0) {
subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL);
+ if (IS_ERR(subdir))
+ ret = PTR_ERR(subdir);
} else {
end_creating(subdir);
subdir = ERR_PTR(ret);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 41e311019a25..df7ea019526d 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -89,13 +89,11 @@ static int erofs_init_inode_xattrs(struct inode *inode)
vi->xattr_isize - sizeof(struct erofs_xattr_ibody_header)) {
erofs_err(sb, "invalid h_shared_count %u @ nid %llu",
vi->xattr_shared_count, vi->nid);
- erofs_put_metabuf(&buf);
ret = -EFSCORRUPTED;
goto out_unlock;
}
vi->xattr_shared_xattrs = kmalloc_objs(uint, vi->xattr_shared_count);
if (!vi->xattr_shared_xattrs) {
- erofs_put_metabuf(&buf);
ret = -ENOMEM;
goto out_unlock;
}
@@ -112,12 +110,12 @@ static int erofs_init_inode_xattrs(struct inode *inode)
}
vi->xattr_shared_xattrs[i] = le32_to_cpu(*xattr_id);
}
- erofs_put_metabuf(&buf);
/* paired with smp_mb() at the beginning of the function. */
smp_mb();
set_bit(EROFS_I_EA_INITED_BIT, &vi->flags);
out_unlock:
+ erofs_put_metabuf(&buf);
clear_and_wake_up_bit(EROFS_I_BL_XATTR_BIT, &vi->flags);
return ret;
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 43bb5a6a9924..27ab7bd844ec 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1509,8 +1509,15 @@ repeat:
DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
folio = page_folio(zbv.page);
- /* For preallocated managed folios, add them to page cache here */
+ /*
+ * Preallocated folios are added to the managed cache here rather than
+ * in z_erofs_bind_cache() in order to keep these folios locked in
+ * increasing (physical) address order.
+ * Clear folio->private before these folios become visible to others in
+ * the managed cache to avoid duplicate additions for unaligned extents.
+ */
if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) {
+ folio->private = NULL;
tocache = true;
goto out_tocache;
}
@@ -1546,14 +1553,8 @@ repeat:
}
return;
}
- /*
- * Already linked with another pcluster, which only appears in
- * crafted images by fuzzers for now. But handle this anyway.
- */
- tocache = false; /* use temporary short-lived pages */
} else {
DBG_BUGON(1); /* referenced managed folios can't be truncated */
- tocache = true;
}
folio_unlock(folio);
folio_put(folio);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c59452d60b8d..f94f3dc082c6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2176,7 +2176,10 @@ static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
WARN_ON(!ap->num_folios);
- /* Reached max pages */
+ /* Reached max pages or max folio slots */
+ if (ap->num_folios >= fc->max_pages)
+ return true;
+
if (DIV_ROUND_UP(bytes, PAGE_SIZE) > fc->max_pages)
return true;
diff --git a/fs/inode.c b/fs/inode.c
index 6a3cbc7dcd28..62c579a0cf7d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2124,7 +2124,13 @@ static int inode_update_cmtime(struct inode *inode, unsigned int flags)
inode_iversion_need_inc(inode))
return -EAGAIN;
} else {
- if (inode_maybe_inc_iversion(inode, !!dirty))
+ /*
+ * Don't force iversion increment for pure lazytime
+ * updates (I_DIRTY_TIME only), let I_VERSION_QUERIED
+ * dictate whether the increment is needed.
+ */
+ if (inode_maybe_inc_iversion(inode,
+ dirty != I_DIRTY_TIME))
dirty |= I_DIRTY_SYNC;
}
}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 60c4a0e0fca5..442d62679262 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -309,7 +309,7 @@ static struct dentry *jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip,
out1:
jfs_info("jfs_mkdir: rc:%d", rc);
- return ERR_PTR(rc);
+ return rc ? ERR_PTR(rc) : NULL;
}
/*
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 6472c4ea3d1e..cb61fbdb52e9 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -375,6 +375,8 @@ int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_
continue;
seq_printf(seq, "%u %u %u", extent->first, lower, extent->count);
+ if (seq_has_overflowed(seq))
+ return -EAGAIN;
seq->count++; /* mappings are separated by \0 */
if (seq_has_overflowed(seq))
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a8c0d86118c5..76d0f6a29aba 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -156,9 +156,8 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
netfs_cache_read_terminated, subreq);
}
-static void netfs_queue_read(struct netfs_io_request *rreq,
- struct netfs_io_subrequest *subreq,
- bool last_subreq)
+void netfs_queue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
@@ -169,7 +168,8 @@ static void netfs_queue_read(struct netfs_io_request *rreq,
* remove entries off of the front.
*/
spin_lock(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ /* Write IN_PROGRESS before pointer to new subreq */
+ list_add_tail_release(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
if (!stream->active) {
stream->collected_to = subreq->start;
@@ -178,11 +178,6 @@ static void netfs_queue_read(struct netfs_io_request *rreq,
}
}
- if (last_subreq) {
- smp_wmb(); /* Write lists before ALL_QUEUED. */
- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- }
-
spin_unlock(&rreq->lock);
}
@@ -214,7 +209,6 @@ static void netfs_issue_read(struct netfs_io_request *rreq,
static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
struct readahead_control *ractl)
{
- struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
int ret = 0;
@@ -233,10 +227,13 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
subreq->start = start;
subreq->len = size;
+ netfs_queue_read(rreq, subreq);
+
source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
subreq->source = source;
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
- unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
+ unsigned long long zero_point = netfs_read_zero_point(rreq->inode);
+ unsigned long long zp = umin(zero_point, rreq->i_size);
size_t len = subreq->len;
if (unlikely(rreq->origin == NETFS_READ_SINGLE))
@@ -252,7 +249,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
rreq->debug_id, subreq->debug_index,
subreq->len, size,
- subreq->start, ictx->zero_point, rreq->i_size);
+ subreq->start, zero_point, rreq->i_size);
+ netfs_cancel_read(subreq, ret);
break;
}
subreq->len = len;
@@ -261,12 +259,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- subreq->error = ret;
- /* Not queued - release both refs. */
- netfs_put_subrequest(subreq,
- netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq,
- netfs_sreq_trace_put_cancel);
+ netfs_cancel_read(subreq, ret);
break;
}
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
@@ -289,24 +282,29 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
pr_err("Unexpected read source %u\n", source);
WARN_ON_ONCE(1);
+ netfs_cancel_read(subreq, ret);
break;
issue:
slice = netfs_prepare_read_iterator(subreq, ractl);
if (slice < 0) {
ret = slice;
- subreq->error = ret;
- trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
- /* Not queued - release both refs. */
- netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
+ netfs_cancel_read(subreq, ret);
break;
}
- size -= slice;
start += slice;
+ size -= slice;
+ if (size <= 0) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
- netfs_queue_read(rreq, subreq, size <= 0);
netfs_issue_read(rreq, subreq);
+
+ if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ netfs_wait_for_paused_read(rreq);
+ if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
+ break;
cond_resched();
} while (size > 0);
@@ -397,6 +395,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
{
struct netfs_io_request *rreq;
struct address_space *mapping = folio->mapping;
+ struct netfs_group *group = netfs_folio_group(folio);
struct netfs_folio *finfo = netfs_folio_info(folio);
struct netfs_inode *ctx = netfs_inode(mapping->host);
struct folio *sink = NULL;
@@ -458,14 +457,20 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
netfs_read_to_pagecache(rreq, NULL);
- if (sink)
- folio_put(sink);
-
ret = netfs_wait_for_read(rreq);
if (ret >= 0) {
+ if (group)
+ folio_change_private(folio, group);
+ else
+ folio_detach_private(folio);
+ kfree(finfo);
+ trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
+
+ if (sink)
+ folio_put(sink);
folio_unlock(folio);
netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
@@ -498,10 +503,10 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct netfs_inode *ctx = netfs_inode(mapping->host);
int ret;
- if (folio_test_dirty(folio)) {
- trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
+ folio_wait_writeback(folio);
+
+ if (folio_test_dirty(folio))
return netfs_read_gaps(file, folio);
- }
_enter("%lx", folio->index);
@@ -667,7 +672,7 @@ retry:
ret = PTR_ERR(rreq);
goto error;
}
- rreq->no_unlock_folio = folio->index;
+ rreq->no_unlock_folio = folio;
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
ret = netfs_begin_cache_read(rreq, ctx);
@@ -684,9 +689,9 @@ retry:
netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
if (ret < 0)
goto error;
- netfs_put_request(rreq, netfs_rreq_trace_put_return);
have_folio:
ret = folio_wait_private_2_killable(folio);
@@ -733,7 +738,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
goto error;
}
- rreq->no_unlock_folio = folio->index;
+ rreq->no_unlock_folio = folio;
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
ret = netfs_begin_cache_read(rreq, ctx);
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 05ea5b0cc0e8..6bde3320bcec 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -12,24 +12,6 @@
#include <linux/slab.h>
#include "internal.h"
-static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
- if (netfs_group)
- folio_attach_private(folio, netfs_get_group(netfs_group));
-}
-
-static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
- void *priv = folio_get_private(folio);
-
- if (unlikely(priv != netfs_group)) {
- if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
- folio_attach_private(folio, netfs_get_group(netfs_group));
- else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
- folio_detach_private(folio);
- }
-}
-
/*
* Grab a folio for writing and lock it. Attempt to allocate as large a folio
* as possible to hold as much of the remaining length as possible in one go.
@@ -149,6 +131,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
}
do {
+ enum netfs_folio_trace trace;
struct netfs_folio *finfo;
struct netfs_group *group;
unsigned long long fpos;
@@ -156,6 +139,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
size_t offset; /* Offset into pagecache folio */
size_t part; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
+ void *priv;
offset = pos & (max_chunk - 1);
part = min(max_chunk - offset, iov_iter_count(iter));
@@ -201,73 +185,99 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
goto error_folio_unlock;
}
- /* Decide how we should modify a folio. We might be attempting
- * to do write-streaming, in which case we don't want to a
- * local RMW cycle if we can avoid it. If we're doing local
- * caching or content crypto, we award that priority over
- * avoiding RMW. If the file is open readably, then we also
- * assume that we may want to read what we wrote.
- */
finfo = netfs_folio_info(folio);
group = netfs_folio_group(folio);
+ /* If the requested group differs from the group set on the
+ * page, then we need to flush out the folio if it has a group
+ * set (ie. is non-NULL). Note that COPY_TO_CACHE is a special
+ * case, being a netfs annotation rather than an actual group.
+ *
+ * The filesystem isn't permitted to mix writes with groups and
+ * writes without groups as the NULL group is used to indicate
+ * that no group is set.
+ */
if (unlikely(group != netfs_group) &&
- group != NETFS_FOLIO_COPY_TO_CACHE)
+ group != NETFS_FOLIO_COPY_TO_CACHE &&
+ group) {
+ WARN_ON_ONCE(!netfs_group);
goto flush_content;
+ }
+ /* Decide how we should modify a folio. We might be attempting
+ * to do write-streaming, as we don't want to a local RMW cycle
+ * if we can avoid it. If we're doing local caching or content
+ * crypto, we award that priority over avoiding RMW. If the
+ * file is open readably, then we let ->read_folio() fill in
+ * the gaps.
+ */
if (folio_test_uptodate(folio)) {
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
goto copy_failed;
- netfs_set_group(folio, netfs_group);
- trace_netfs_folio(folio, netfs_folio_is_uptodate);
- goto copied;
+ trace = netfs_folio_is_uptodate;
+ goto copied_uptodate;
}
/* If the page is above the zero-point then we assume that the
* server would just return a block of zeros or a short read if
* we try to read it.
*/
- if (fpos >= ctx->zero_point) {
+ if (fpos >= netfs_read_zero_point(inode)) {
folio_zero_segment(folio, 0, offset);
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
goto copy_failed;
folio_zero_segment(folio, offset + copied, flen);
- __netfs_set_group(folio, netfs_group);
- folio_mark_uptodate(folio);
- trace_netfs_folio(folio, netfs_modify_and_clear);
- goto copied;
+ if (finfo)
+ trace = netfs_modify_and_clear_rm_finfo;
+ else
+ trace = netfs_modify_and_clear;
+ goto mark_uptodate;
}
/* See if we can write a whole folio in one go. */
if (!maybe_trouble && offset == 0 && part >= flen) {
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
- if (unlikely(copied == 0))
+ if (likely(copied == part)) {
+ if (finfo)
+ trace = netfs_whole_folio_modify_filled;
+ else
+ trace = netfs_whole_folio_modify;
+ goto mark_uptodate;
+ }
+ if (copied == 0)
goto copy_failed;
- if (unlikely(copied < part)) {
+ if (!finfo || copied <= finfo->dirty_offset) {
maybe_trouble = true;
iov_iter_revert(iter, copied);
copied = 0;
folio_unlock(folio);
goto retry;
}
- __netfs_set_group(folio, netfs_group);
- folio_mark_uptodate(folio);
- trace_netfs_folio(folio, netfs_whole_folio_modify);
+
+ /* We overwrote some existing dirty data, so we have to
+ * accept the partial write.
+ */
+ finfo->dirty_len += finfo->dirty_offset;
+ if (finfo->dirty_len == flen) {
+ trace = netfs_whole_folio_modify_filled_efault;
+ goto mark_uptodate;
+ }
+ if (copied > finfo->dirty_len)
+ finfo->dirty_len = copied;
+ finfo->dirty_offset = 0;
+ trace = netfs_whole_folio_modify_efault;
goto copied;
}
/* We don't want to do a streaming write on a file that loses
* caching service temporarily because the backing store got
- * culled and we don't really want to get a streaming write on
- * a file that's open for reading as ->read_folio() then has to
- * be able to flush it.
+ * culled.
*/
- if ((file->f_mode & FMODE_READ) ||
- netfs_is_cache_enabled(ctx)) {
+ if (netfs_is_cache_enabled(ctx)) {
if (finfo) {
netfs_stat(&netfs_n_wh_wstream_conflict);
goto flush_content;
@@ -282,11 +292,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
goto copy_failed;
- netfs_set_group(folio, netfs_group);
- trace_netfs_folio(folio, netfs_just_prefetch);
- goto copied;
+ trace = netfs_just_prefetch;
+ goto copied_uptodate;
}
+ /* Do a streaming write on a folio that has nothing in it yet. */
if (!finfo) {
ret = -EIO;
if (WARN_ON(folio_get_private(folio)))
@@ -295,10 +305,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(copied == 0))
goto copy_failed;
if (offset == 0 && copied == flen) {
- __netfs_set_group(folio, netfs_group);
- folio_mark_uptodate(folio);
- trace_netfs_folio(folio, netfs_streaming_filled_page);
- goto copied;
+ trace = netfs_streaming_filled_page;
+ goto mark_uptodate;
}
finfo = kzalloc_obj(*finfo);
@@ -312,7 +320,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
finfo->dirty_len = copied;
folio_attach_private(folio, (void *)((unsigned long)finfo |
NETFS_FOLIO_INFO));
- trace_netfs_folio(folio, netfs_streaming_write);
+ trace = netfs_streaming_write;
goto copied;
}
@@ -326,16 +334,10 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
goto copy_failed;
finfo->dirty_len += copied;
if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) {
- if (finfo->netfs_group)
- folio_change_private(folio, finfo->netfs_group);
- else
- folio_detach_private(folio);
- folio_mark_uptodate(folio);
- kfree(finfo);
- trace_netfs_folio(folio, netfs_streaming_cont_filled_page);
- } else {
- trace_netfs_folio(folio, netfs_streaming_write_cont);
+ trace = netfs_streaming_cont_filled_page;
+ goto mark_uptodate;
}
+ trace = netfs_streaming_write_cont;
goto copied;
}
@@ -349,7 +351,38 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
goto out;
continue;
+ /* Mark a folio as being up to data when we've filled it
+ * completely. If the folio has a group attached, then it must
+ * be the same group, otherwise we should have flushed it out
+ * above. We have to get rid of the netfs_folio struct if
+ * there was one.
+ */
+ mark_uptodate:
+ folio_mark_uptodate(folio);
+
+ copied_uptodate:
+ priv = folio_get_private(folio);
+ if (likely(priv == netfs_group)) {
+ /* Already set correctly; no change required. */
+ } else if (priv == NETFS_FOLIO_COPY_TO_CACHE) {
+ if (!netfs_group)
+ folio_detach_private(folio);
+ else
+ folio_change_private(folio, netfs_get_group(netfs_group));
+ } else if (!priv) {
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+ } else {
+ WARN_ON_ONCE(!finfo);
+ if (netfs_group)
+ /* finfo->netfs_group has a ref */
+ folio_change_private(folio, netfs_group);
+ else
+ folio_detach_private(folio);
+ kfree(finfo);
+ }
+
copied:
+ trace_netfs_folio(folio, trace);
flush_dcache_folio(folio);
/* Update the inode size if we moved the EOF marker */
@@ -510,6 +543,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
struct inode *inode = file_inode(file);
struct netfs_inode *ictx = netfs_inode(inode);
vm_fault_t ret = VM_FAULT_NOPAGE;
+ void *priv;
int err;
_enter("%lx", folio->index);
@@ -530,7 +564,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
}
group = netfs_folio_group(folio);
- if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) {
+ if (group &&
+ group != netfs_group &&
+ group != NETFS_FOLIO_COPY_TO_CACHE) {
folio_unlock(folio);
err = filemap_fdatawrite_range(mapping,
folio_pos(folio),
@@ -552,7 +588,19 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus);
else
trace_netfs_folio(folio, netfs_folio_trace_mkwrite);
- netfs_set_group(folio, netfs_group);
+
+ priv = folio_get_private(folio);
+ if (priv != netfs_group) {
+ if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
+ folio_detach_private(folio);
+ else if (netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
+ folio_change_private(folio, netfs_get_group(netfs_group));
+ else if (netfs_group && !priv)
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+ else
+ WARN_ON_ONCE(1);
+ }
+
file_update_time(file);
set_bit(NETFS_ICTX_MODIFIED_ATTR, &ictx->flags);
if (ictx->ops->post_modify)
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index f72e6da88cca..6a8fb0d55e04 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -45,12 +45,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
* Perform a read to a buffer from the server, slicing up the region to be read
* according to the network rsize.
*/
-static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
+static void netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
{
- struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
- int ret = 0;
+ int ret;
do {
struct netfs_io_subrequest *subreq;
@@ -58,7 +57,10 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
subreq = netfs_alloc_subrequest(rreq);
if (!subreq) {
- ret = -ENOMEM;
+ /* Stash the error in the request if there's not
+ * already an error set.
+ */
+ cmpxchg(&rreq->error, 0, -ENOMEM);
break;
}
@@ -66,25 +68,13 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
subreq->start = start;
subreq->len = size;
- __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
-
- spin_lock(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &stream->subrequests);
- if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
- if (!stream->active) {
- stream->collected_to = subreq->start;
- /* Store list pointers before active flag */
- smp_store_release(&stream->active, true);
- }
- }
- trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- spin_unlock(&rreq->lock);
+ netfs_queue_read(rreq, subreq);
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
+ netfs_cancel_read(subreq, ret);
break;
}
}
@@ -113,8 +103,6 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
netfs_wake_collector(rreq);
}
-
- return ret;
}
/*
@@ -137,21 +125,17 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
// TODO: Use bounce buffer if requested
inode_dio_begin(rreq->inode);
+ netfs_dispatch_unbuffered_reads(rreq);
- ret = netfs_dispatch_unbuffered_reads(rreq);
-
- if (!rreq->submitted) {
- netfs_put_request(rreq, netfs_rreq_trace_put_no_submit);
- inode_dio_end(rreq->inode);
- ret = 0;
- goto out;
- }
+ /* The collector will get run, even if we don't manage to submit any
+ * subreqs, so we shouldn't call inode_dio_end() here.
+ */
if (sync)
ret = netfs_wait_for_read(rreq);
else
ret = -EIOCBQUEUED;
-out:
+
_leave(" = %zd", ret);
return ret;
}
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index f9ab69de3e29..25f8ceb15fad 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -376,8 +376,10 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret < 0)
goto out;
end = iocb->ki_pos + iov_iter_count(from);
- if (end > ictx->zero_point)
- ictx->zero_point = end;
+ spin_lock(&inode->i_lock);
+ if (end > ictx->_zero_point)
+ netfs_write_zero_point(inode, end);
+ spin_unlock(&inode->i_lock);
fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode),
FSCACHE_INVAL_DIO_WRITE);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index d436e20d3418..645996ecfc80 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -23,6 +23,8 @@
/*
* buffered_read.c
*/
+void netfs_queue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq);
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
@@ -108,6 +110,7 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
*/
bool netfs_read_collection(struct netfs_io_request *rreq);
void netfs_read_collection_worker(struct work_struct *work);
+void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error);
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
/*
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 154a14bb2d7f..b375567e0520 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -22,7 +22,7 @@
*
* Extract the page fragments from the given amount of the source iterator and
* build up a second iterator that refers to all of those bits. This allows
- * the original iterator to disposed of.
+ * the original iterator to be disposed of.
*
* @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA be
* allowed on the pages extracted.
@@ -43,7 +43,7 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
unsigned int max_pages;
unsigned int npages = 0;
unsigned int i;
- ssize_t ret;
+ ssize_t ret = 0;
size_t count = orig_len, offset, len;
size_t bv_size, pg_size;
@@ -67,26 +67,29 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
ret = iov_iter_extract_pages(orig, &pages, count,
max_pages - npages, extraction_flags,
&offset);
- if (ret < 0) {
- pr_err("Couldn't get user pages (rc=%zd)\n", ret);
+ if (unlikely(ret <= 0)) {
+ ret = ret ?: -EIO;
break;
}
- if (ret > count) {
- pr_err("get_pages rc=%zd more than %zu\n", ret, count);
+ if (WARN(ret > count,
+ "%s: extract_pages overrun %zd > %zu bytes\n",
+ __func__, ret, count)) {
+ ret = -EIO;
break;
}
- count -= ret;
- ret += offset;
- cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE);
-
- if (npages + cur_npages > max_pages) {
- pr_err("Out of bvec array capacity (%u vs %u)\n",
- npages + cur_npages, max_pages);
+ cur_npages = DIV_ROUND_UP(offset + ret, PAGE_SIZE);
+ if (WARN(cur_npages > max_pages - npages,
+ "%s: extract_pages overrun %u > %u pages\n",
+ __func__, npages + cur_npages, max_pages)) {
+ ret = -EIO;
break;
}
+ count -= ret;
+ ret += offset;
+
for (i = 0; i < cur_npages; i++) {
len = ret > PAGE_SIZE ? PAGE_SIZE : ret;
bvec_set_page(bv + npages + i, *pages++, len - offset, offset);
@@ -97,6 +100,18 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
npages += cur_npages;
}
+ /* Note: Don't try to clean up after EIO. Either we got no pages, so
+ * nothing to clean up, or we got a buffer overrun, memory corruption
+ * and can't trust the stuff in the buffer (a WARN was emitted).
+ */
+
+ if (ret < 0 && (ret == -ENOMEM || npages == 0)) {
+ for (i = 0; i < npages; i++)
+ unpin_user_page(bv[i].bv_page);
+ kvfree(bv);
+ return ret;
+ }
+
iov_iter_bvec(new, orig->data_source, bv, npages, orig_len - count);
return npages;
}
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 6df89c92b10b..5d554512ed23 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -211,18 +211,25 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback);
void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
struct netfs_folio *finfo;
- struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
+ struct inode *inode = folio_inode(folio);
+ struct netfs_inode *ctx = netfs_inode(inode);
size_t flen = folio_size(folio);
_enter("{%lx},%zx,%zx", folio->index, offset, length);
if (offset == 0 && length == flen) {
- unsigned long long i_size = i_size_read(&ctx->inode);
+ unsigned long long i_size, remote_i_size, zero_point;
unsigned long long fpos = folio_pos(folio), end;
+ netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
end = umin(fpos + flen, i_size);
- if (fpos < i_size && end > ctx->zero_point)
- ctx->zero_point = end;
+ if (fpos < i_size && end > zero_point) {
+ spin_lock(&inode->i_lock);
+ end = umin(fpos + flen, inode->i_size);
+ if (fpos < i_size && end > ctx->_zero_point)
+ netfs_write_zero_point(inode, end);
+ spin_unlock(&inode->i_lock);
+ }
}
folio_wait_private_2(folio); /* [DEPRECATED] */
@@ -255,7 +262,8 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
goto erase_completely;
/* Move the start of the data. */
finfo->dirty_len = fend - iend;
- finfo->dirty_offset = offset;
+ finfo->dirty_offset = iend;
+ trace_netfs_folio(folio, netfs_folio_trace_invalidate_front);
return;
}
@@ -264,12 +272,14 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
*/
if (iend >= fend) {
finfo->dirty_len = offset - fstart;
+ trace_netfs_folio(folio, netfs_folio_trace_invalidate_tail);
return;
}
/* A partial write was split. The caller has already zeroed
* it, so just absorb the hole.
*/
+ trace_netfs_folio(folio, netfs_folio_trace_invalidate_middle);
}
return;
@@ -277,8 +287,9 @@ erase_completely:
netfs_put_group(netfs_folio_group(folio));
folio_detach_private(folio);
folio_clear_uptodate(folio);
+ folio_cancel_dirty(folio);
kfree(finfo);
- return;
+ trace_netfs_folio(folio, netfs_folio_trace_invalidate_all);
}
EXPORT_SYMBOL(netfs_invalidate_folio);
@@ -292,15 +303,22 @@ EXPORT_SYMBOL(netfs_invalidate_folio);
*/
bool netfs_release_folio(struct folio *folio, gfp_t gfp)
{
- struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
- unsigned long long end;
+ struct inode *inode = folio_inode(folio);
+ struct netfs_inode *ctx = netfs_inode(inode);
+ unsigned long long i_size, remote_i_size, zero_point, end;
if (folio_test_dirty(folio))
return false;
- end = umin(folio_next_pos(folio), i_size_read(&ctx->inode));
- if (end > ctx->zero_point)
- ctx->zero_point = end;
+ netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
+ end = folio_next_pos(folio);
+ if (end > zero_point) {
+ spin_lock(&inode->i_lock);
+ end = umin(end, ctx->_remote_i_size);
+ if (end > ctx->_zero_point)
+ netfs_write_zero_point(inode, end);
+ spin_unlock(&inode->i_lock);
+ }
if (folio_test_private(folio))
return false;
@@ -356,6 +374,7 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
DEFINE_WAIT(myself);
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ smp_rmb(); /* Read ->next before IN_PROGRESS. */
if (!netfs_check_subreq_in_progress(subreq))
continue;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index e5f6665b3341..23660a590124 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -83,7 +83,7 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
}
just_unlock:
- if (folio->index == rreq->no_unlock_folio &&
+ if (folio == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
_debug("no unlock");
} else {
@@ -205,8 +205,10 @@ reassess:
* in progress. The issuer thread may be adding stuff to the tail
* whilst we're doing this.
*/
- front = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
+ front = list_first_entry_or_null_acquire(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ /* Read first subreq pointer before IN_PROGRESS flag. */
+
while (front) {
size_t transferred;
@@ -576,6 +578,17 @@ skip_error_checks:
EXPORT_SYMBOL(netfs_read_subreq_terminated);
/*
+ * Cancel a read subrequest due to preparation failure.
+ */
+void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error)
+{
+ trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
+ subreq->error = error;
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ netfs_read_subreq_terminated(subreq);
+}
+
+/*
* Handle termination of a read from the cache.
*/
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error)
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index cca9ac43c077..f59a70f3a086 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -175,7 +175,9 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
+ spin_lock(&rreq->lock);
list_del(&subreq->rreq_link);
+ spin_unlock(&rreq->lock);
netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
@@ -203,8 +205,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
+ spin_lock(&rreq->lock);
list_add(&subreq->rreq_link, &to->rreq_link);
- to = list_next_entry(to, rreq_link);
+ spin_unlock(&rreq->lock);
+ to = subreq;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
stream->sreq_max_len = umin(len, rreq->rsize);
@@ -288,8 +292,15 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
struct folio *folio = folioq_folio(p, slot);
if (folio && !folioq_is_marked2(p, slot)) {
- trace_netfs_folio(folio, netfs_folio_trace_abandon);
- folio_unlock(folio);
+ if (folio == rreq->no_unlock_folio &&
+ test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO,
+ &rreq->flags)) {
+ _debug("no unlock");
+ } else {
+ trace_netfs_folio(folio,
+ netfs_folio_trace_abandon);
+ folio_unlock(folio);
+ }
}
}
}
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
index d0e23bc42445..8833550d2eb6 100644
--- a/fs/netfs/read_single.c
+++ b/fs/netfs/read_single.c
@@ -89,7 +89,6 @@ static void netfs_single_read_cache(struct netfs_io_request *rreq,
*/
static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
{
- struct netfs_io_stream *stream = &rreq->io_streams[0];
struct netfs_io_subrequest *subreq;
int ret = 0;
@@ -102,14 +101,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
subreq->len = rreq->len;
subreq->io_iter = rreq->buffer.iter;
- __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
-
- spin_lock(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &stream->subrequests);
- trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- /* Store list pointers before active flag */
- smp_store_release(&stream->active, true);
- spin_unlock(&rreq->lock);
+ netfs_queue_read(rreq, subreq);
netfs_single_cache_prepare_read(rreq, subreq);
switch (subreq->source) {
@@ -121,10 +113,14 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
goto cancel;
}
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
rreq->netfs_ops->issue_read(subreq);
rreq->submitted += subreq->len;
break;
case NETFS_READ_FROM_CACHE:
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
netfs_single_read_cache(rreq, subreq);
rreq->submitted += subreq->len;
@@ -134,14 +130,15 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
pr_warn("Unexpected single-read source %u\n", subreq->source);
WARN_ON_ONCE(true);
ret = -EIO;
- break;
+ goto cancel;
}
- smp_wmb(); /* Write lists before ALL_QUEUED. */
- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
return ret;
cancel:
- netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
+ netfs_cancel_read(subreq, ret);
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ netfs_wake_collector(rreq);
return ret;
}
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index b194447f4b11..24fc2bb2f8a4 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -57,7 +57,8 @@ static void netfs_dump_request(const struct netfs_io_request *rreq)
int netfs_folio_written_back(struct folio *folio)
{
enum netfs_folio_trace why = netfs_folio_trace_clear;
- struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
+ struct inode *inode = folio_inode(folio);
+ struct netfs_inode *ictx = netfs_inode(inode);
struct netfs_folio *finfo;
struct netfs_group *group = NULL;
int gcount = 0;
@@ -69,8 +70,10 @@ int netfs_folio_written_back(struct folio *folio)
unsigned long long fend;
fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
- if (fend > ictx->zero_point)
- ictx->zero_point = fend;
+ spin_lock(&ictx->inode.i_lock);
+ if (fend > ictx->_zero_point)
+ netfs_write_zero_point(inode, fend);
+ spin_unlock(&ictx->inode.i_lock);
folio_detach_private(folio);
group = finfo->netfs_group;
@@ -228,8 +231,10 @@ reassess_streams:
if (!smp_load_acquire(&stream->active))
continue;
- front = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
+ front = list_first_entry_or_null_acquire(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ /* Read first subreq pointer before IN_PROGRESS flag. */
+
while (front) {
trace_netfs_collect_sreq(wreq, front);
//_debug("sreq [%x] %llx %zx/%zx",
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 2db688f94125..c03c7cc45e47 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -204,7 +204,8 @@ void netfs_prepare_write(struct netfs_io_request *wreq,
* remove entries off of the front.
*/
spin_lock(&wreq->lock);
- list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ /* Write IN_PROGRESS before pointer to new subreq */
+ list_add_tail_release(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
if (!stream->active) {
stream->collected_to = subreq->start;
@@ -413,12 +414,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (streamw)
netfs_issue_write(wreq, cache);
- /* Flip the page to the writeback state and unlock. If we're called
- * from write-through, then the page has already been put into the wb
- * state.
- */
- if (wreq->origin == NETFS_WRITEBACK)
- folio_start_writeback(folio);
+ folio_start_writeback(folio);
folio_unlock(folio);
if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
@@ -646,29 +642,41 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
+ int ret;
+
_enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
- if (!*writethrough_cache) {
- if (folio_test_dirty(folio))
- /* Sigh. mmap. */
- folio_clear_dirty_for_io(folio);
+ /* The folio is locked. */
+ if (*writethrough_cache != folio) {
+ if (*writethrough_cache) {
+ /* Did the folio get moved? */
+ folio_put(*writethrough_cache);
+ *writethrough_cache = NULL;
+ }
/* We can make multiple writes to the folio... */
- folio_start_writeback(folio);
if (wreq->len == 0)
trace_netfs_folio(folio, netfs_folio_trace_wthru);
else
trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
*writethrough_cache = folio;
+ folio_get(folio);
}
wreq->len += copied;
- if (!to_page_end)
+
+ if (!to_page_end) {
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
return 0;
+ }
+ ret = netfs_write_folio(wreq, wbc, folio);
+ folio_put(*writethrough_cache);
*writethrough_cache = NULL;
- return netfs_write_folio(wreq, wbc, folio);
+ wreq->submitted = wreq->len;
+ return ret;
}
/*
@@ -682,8 +690,12 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c
_enter("R=%x", wreq->debug_id);
- if (writethrough_cache)
+ if (writethrough_cache) {
+ folio_lock(writethrough_cache);
netfs_write_folio(wreq, wbc, writethrough_cache);
+ folio_put(writethrough_cache);
+ wreq->submitted = wreq->len;
+ }
netfs_end_issue_write(wreq);
@@ -818,6 +830,9 @@ static int netfs_write_folio_single(struct netfs_io_request *wreq,
*
* Write a monolithic, non-pagecache object back to the server and/or
* the cache.
+ *
+ * Return: 0 if successful; 1 if skipped due to lock conflict and WB_SYNC_NONE;
+ * or a negative error code.
*/
int netfs_writeback_single(struct address_space *mapping,
struct writeback_control *wbc,
@@ -834,8 +849,10 @@ int netfs_writeback_single(struct address_space *mapping,
if (!mutex_trylock(&ictx->wb_lock)) {
if (wbc->sync_mode == WB_SYNC_NONE) {
+ /* The VFS will have undirtied the inode. */
+ netfs_single_mark_inode_dirty(&ictx->inode);
netfs_stat(&netfs_n_wb_lock_skip);
- return 0;
+ return 1;
}
netfs_stat(&netfs_n_wb_lock_wait);
mutex_lock(&ictx->wb_lock);
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 29489a23a220..32735abfa03f 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -130,7 +130,9 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
+ spin_lock(&wreq->lock);
list_del(&subreq->rreq_link);
+ spin_unlock(&wreq->lock);
netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
@@ -153,8 +155,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
netfs_sreq_trace_new);
trace_netfs_sreq(subreq, netfs_sreq_trace_split);
+ spin_lock(&wreq->lock);
list_add(&subreq->rreq_link, &to->rreq_link);
- to = list_next_entry(to, rreq_link);
+ spin_unlock(&wreq->lock);
+ to = subreq;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
stream->sreq_max_len = len;
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 51e8c9430477..160018c4fb36 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -266,7 +266,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
else
tsk = find_task_by_pid_ns(arg, pid_ns);
if (!tsk)
- break;
+ return ret;
switch (ioctl) {
case NS_GET_PID_FROM_PIDNS:
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 97b660eaa00c..421c6cdcbb53 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -583,24 +583,13 @@ static u32 ntfs_resident_attr_min_value_length(const __le32 type)
case AT_STANDARD_INFORMATION:
return offsetof(struct standard_information, ver) +
sizeof(((struct standard_information *)0)->ver.v1.reserved12);
- case AT_ATTRIBUTE_LIST:
- return offsetof(struct attr_list_entry, name);
case AT_FILE_NAME:
- return offsetof(struct file_name_attr, file_name);
- case AT_OBJECT_ID:
- return sizeof(struct guid);
- case AT_SECURITY_DESCRIPTOR:
- return sizeof(struct security_descriptor_relative);
+ return offsetof(struct file_name_attr, file_name) +
+ sizeof(__le16) * 1;
case AT_VOLUME_INFORMATION:
return sizeof(struct volume_information);
- case AT_INDEX_ROOT:
- return sizeof(struct index_root);
- case AT_REPARSE_POINT:
- return offsetof(struct reparse_point, reparse_data);
case AT_EA_INFORMATION:
return sizeof(struct ea_information);
- case AT_EA:
- return offsetof(struct ea_attr, ea_name) + 1;
default:
return 0;
}
@@ -672,6 +661,9 @@ static int ntfs_attr_find(const __le32 type, const __le16 *name,
__le16 *upcase = vol->upcase;
u32 upcase_len = vol->upcase_len;
unsigned int space;
+ u16 name_offset;
+ u32 attr_len;
+ u32 name_size;
/*
* Iterate over attributes in mft record starting at @ctx->attr, or the
@@ -699,6 +691,20 @@ static int ntfs_attr_find(const __le32 type, const __le16 *name,
return -ENOENT;
if (unlikely(!a->length))
break;
+ if (a->name_length) {
+ name_offset = le16_to_cpu(a->name_offset);
+ attr_len = le32_to_cpu(a->length);
+ name_size = a->name_length * sizeof(__le16);
+
+ if (name_offset > attr_len ||
+ attr_len - name_offset < name_size) {
+ ntfs_error(vol->sb,
+ "Corrupt attribute name in MFT record %llu\n",
+ ctx->ntfs_ino->mft_no);
+ break;
+ }
+ }
+
if (type == AT_UNUSED)
return 0;
if (a->type != type)
@@ -712,14 +718,6 @@ static int ntfs_attr_find(const __le32 type, const __le16 *name,
if (a->name_length)
return -ENOENT;
} else {
- if (a->name_length && ((le16_to_cpu(a->name_offset) +
- a->name_length * sizeof(__le16)) >
- le32_to_cpu(a->length))) {
- ntfs_error(vol->sb, "Corrupt attribute name in MFT record %llu\n",
- ctx->ntfs_ino->mft_no);
- break;
- }
-
if (!ntfs_are_names_equal(name, name_len,
(__le16 *)((u8 *)a + le16_to_cpu(a->name_offset)),
a->name_length, ic, upcase, upcase_len)) {
@@ -2924,12 +2922,12 @@ int ntfs_attr_open(struct ntfs_inode *ni, const __le32 type,
struct ntfs_inode *base_ni;
int err;
- ntfs_debug("Entering for inode %lld, attr 0x%x.\n",
- (unsigned long long)ni->mft_no, type);
-
if (!ni || !ni->vol)
return -EINVAL;
+ ntfs_debug("Entering for inode %lld, attr 0x%x.\n",
+ ni->mft_no, type);
+
if (NInoAttr(ni))
base_ni = ni->ext.base_ntfs_ino;
else
diff --git a/fs/ntfs/attrlist.c b/fs/ntfs/attrlist.c
index bd501e8a628c..c2594d4c83b0 100644
--- a/fs/ntfs/attrlist.c
+++ b/fs/ntfs/attrlist.c
@@ -119,15 +119,14 @@ int ntfs_attrlist_entry_add(struct ntfs_inode *ni, struct attr_record *attr)
struct mft_record *ni_mrec;
u8 *old_al;
- ntfs_debug("Entering for inode 0x%llx, attr 0x%x.\n",
- (long long) ni->mft_no,
- (unsigned int) le32_to_cpu(attr->type));
-
if (!ni || !attr) {
ntfs_debug("Invalid arguments.\n");
return -EINVAL;
}
+ ntfs_debug("Entering for inode 0x%llx, attr 0x%x.\n",
+ ni->mft_no, (unsigned int) le32_to_cpu(attr->type));
+
ni_mrec = map_mft_record(ni);
if (IS_ERR(ni_mrec)) {
ntfs_debug("Invalid arguments.\n");
diff --git a/fs/ntfs/bdev-io.c b/fs/ntfs/bdev-io.c
index 67e65c88d681..27d7c2767a33 100644
--- a/fs/ntfs/bdev-io.c
+++ b/fs/ntfs/bdev-io.c
@@ -97,6 +97,8 @@ int ntfs_bdev_write(struct super_block *sb, void *buf, loff_t start, size_t size
idx_end++;
for (; idx < idx_end; idx++, from = 0) {
+ u32 len;
+
folio = read_mapping_folio(sb->s_bdev->bd_mapping, idx, NULL);
if (IS_ERR(folio)) {
ntfs_error(sb, "Unable to read %ld page", idx);
@@ -105,9 +107,10 @@ int ntfs_bdev_write(struct super_block *sb, void *buf, loff_t start, size_t size
offset = (loff_t)idx << PAGE_SHIFT;
to = min_t(u32, end - offset, PAGE_SIZE);
+ len = to - from;
- memcpy_to_folio(folio, from, buf + buf_off, to);
- buf_off += to;
+ memcpy_to_folio(folio, from, buf + buf_off, len);
+ buf_off += len;
folio_mark_uptodate(folio);
folio_mark_dirty(folio);
folio_put(folio);
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index a547bdcfa456..146e011c1a41 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -677,11 +677,11 @@ static int ntfs_ib_read(struct ntfs_index_context *icx, s64 vcn, struct index_bl
static int ntfs_icx_parent_inc(struct ntfs_index_context *icx)
{
- icx->pindex++;
- if (icx->pindex >= MAX_PARENT_VCN) {
+ if (icx->pindex >= MAX_PARENT_VCN - 1) {
ntfs_error(icx->idx_ni->vol->sb, "Index is over %d level deep", MAX_PARENT_VCN);
return -EOPNOTSUPP;
}
+ icx->pindex++;
return 0;
}
@@ -1970,6 +1970,7 @@ struct index_entry *ntfs_index_walk_down(struct index_entry *ie, struct ntfs_ind
{
struct index_entry *entry;
struct index_block *ib;
+ int err;
s64 vcn;
entry = ie;
@@ -1979,14 +1980,20 @@ struct index_entry *ntfs_index_walk_down(struct index_entry *ie, struct ntfs_ind
ib = kvzalloc(ictx->block_size, GFP_NOFS);
if (!ib)
return ERR_PTR(-ENOMEM);
- /* down from level zero */
+ /*
+ * Descending from root index (level 0) to the first
+ * child level. is_in_root == true implies pindex == 0,
+ * so advance to level 1.
+ */
+ ictx->pindex = 1;
ictx->ir = NULL;
ictx->ib = ib;
- ictx->pindex = 1;
ictx->is_in_root = false;
} else {
/* down from non-zero level */
- ictx->pindex++;
+ err = ntfs_icx_parent_inc(ictx);
+ if (err)
+ return ERR_PTR(err);
}
ictx->parent_pos[ictx->pindex] = 0;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 16890d411194..360bebd1ee3f 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2582,8 +2582,8 @@ int ntfs_inode_sync_filename(struct ntfs_inode *ni)
mutex_lock_nested(&index_ni->mrec_lock, NTFS_INODE_MUTEX_PARENT);
if (NInoBeingDeleted(ni)) {
- iput(index_vi);
mutex_unlock(&index_ni->mrec_lock);
+ iput(index_vi);
continue;
}
@@ -2591,8 +2591,8 @@ int ntfs_inode_sync_filename(struct ntfs_inode *ni)
if (!ictx) {
ntfs_error(sb, "Failed to get index ctx, inode %llu",
index_ni->mft_no);
- iput(index_vi);
mutex_unlock(&index_ni->mrec_lock);
+ iput(index_vi);
continue;
}
@@ -2601,8 +2601,8 @@ int ntfs_inode_sync_filename(struct ntfs_inode *ni)
ntfs_debug("Index lookup failed, inode %llu",
index_ni->mft_no);
ntfs_index_ctx_put(ictx);
- iput(index_vi);
mutex_unlock(&index_ni->mrec_lock);
+ iput(index_vi);
continue;
}
/* Update flags and file size. */
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 3f8d1640f1d5..d3f25d8e29f9 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -710,6 +710,9 @@ map_vcn:
if (unlikely(lcn == LCN_RL_NOT_MAPPED)) {
vcn = rl->vcn;
kvfree(empty_buf);
+ empty_buf = NULL;
+ kfree(ra);
+ ra = NULL;
goto map_vcn;
}
/* If this run is not valid abort with an error. */
@@ -753,7 +756,7 @@ map_vcn:
} while (start < end);
} while ((++rl)->vcn < end_vcn);
up_write(&log_ni->runlist.lock);
- kfree(empty_buf);
+ kvfree(empty_buf);
kfree(ra);
truncate_inode_pages(log_vi->i_mapping, 0);
/* Set the flag so we do not have to do it again on remount. */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 7d989267a82b..a7d10ee41b34 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -30,6 +30,8 @@ int ntfs_mft_record_check(const struct ntfs_volume *vol, struct mft_record *m,
{
struct attr_record *a;
struct super_block *sb = vol->sb;
+ u16 attrs_offset;
+ u32 bytes_in_use;
if (!ntfs_is_file_record(m->magic)) {
ntfs_error(sb, "Record %llu has no FILE magic (0x%x)\n",
@@ -65,7 +67,16 @@ int ntfs_mft_record_check(const struct ntfs_volume *vol, struct mft_record *m,
goto err_out;
}
- a = (struct attr_record *)((char *)m + le16_to_cpu(m->attrs_offset));
+ attrs_offset = le16_to_cpu(m->attrs_offset);
+ bytes_in_use = le32_to_cpu(m->bytes_in_use);
+
+ if (attrs_offset > bytes_in_use ||
+ bytes_in_use - attrs_offset < sizeof_field(struct attr_record, type)) {
+ ntfs_error(sb, "Record %llu has corrupt attribute offset\n", mft_no);
+ goto err_out;
+ }
+
+ a = (struct attr_record *)((char *)m + attrs_offset);
if ((char *)a < (char *)m || (char *)a > (char *)m + vol->mft_record_size) {
ntfs_error(sb, "Record %llu is corrupt\n", mft_no);
goto err_out;
@@ -449,7 +460,7 @@ static void ntfs_bio_end_io(struct bio *bio)
int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no,
struct mft_record *m)
{
- u8 *kmirr = NULL;
+ u8 *kmirr;
struct folio *folio;
unsigned int folio_ofs, lcn_folio_off = 0;
int err = 0;
@@ -479,6 +490,7 @@ int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no,
kmirr = kmap_local_folio(folio, 0) + folio_ofs;
/* Copy the mst protected mft record to the mirror. */
memcpy(kmirr, m, vol->mft_record_size);
+ kunmap_local(kmirr);
if (vol->cluster_size_bits > PAGE_SHIFT) {
lcn_folio_off = folio->index << PAGE_SHIFT;
@@ -490,20 +502,22 @@ int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no,
NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, vol->mftmirr_lcn) +
lcn_folio_off + folio_ofs);
- if (!bio_add_folio(bio, folio, vol->mft_record_size, folio_ofs)) {
+ if (bio_add_folio(bio, folio, vol->mft_record_size, folio_ofs))
+ err = submit_bio_wait(bio);
+ else
err = -EIO;
- bio_put(bio);
- goto unlock_folio;
- }
+ bio_put(bio);
- bio->bi_end_io = ntfs_bio_end_io;
- submit_bio(bio);
- /* Current state: all buffers are clean, unlocked, and uptodate. */
+ /*
+ * The in-memory mirror is now valid because we just memcpy()'d the
+ * mst-protected mft record into it. Mark the folio uptodate even on
+ * write error so a subsequent read_mapping_folio() does not refetch
+ * the stale on-disk mirror and overwrite this copy. The error is
+ * propagated to the caller via @err.
+ */
folio_mark_uptodate(folio);
-unlock_folio:
folio_unlock(folio);
- kunmap_local(kmirr);
folio_put(folio);
if (likely(!err)) {
ntfs_debug("Done.");
@@ -588,20 +602,36 @@ int write_mft_record_nolock(struct ntfs_inode *ni, struct mft_record *m, int syn
}
/* Synchronize the mft mirror now if not @sync. */
- if (!sync && ni->mft_no < vol->mftmirr_size)
- ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m);
+ if (!sync && ni->mft_no < vol->mftmirr_size) {
+ int sub_err = ntfs_sync_mft_mirror(vol, ni->mft_no,
+ fixup_m);
+ if (unlikely(sub_err) && !err)
+ err = sub_err;
+ }
- folio_get(folio);
- bio->bi_private = folio;
- bio->bi_end_io = ntfs_bio_end_io;
- submit_bio(bio);
+ if (sync) {
+ int sub_err = submit_bio_wait(bio);
+
+ bio_put(bio);
+ if (unlikely(sub_err) && !err)
+ err = sub_err;
+ } else {
+ folio_get(folio);
+ bio->bi_private = folio;
+ bio->bi_end_io = ntfs_bio_end_io;
+ submit_bio(bio);
+ }
offset += vol->cluster_size;
i++;
}
/* If @sync, now synchronize the mft mirror. */
- if (sync && ni->mft_no < vol->mftmirr_size)
- ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m);
+ if (sync && ni->mft_no < vol->mftmirr_size) {
+ int sub_err = ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m);
+
+ if (unlikely(sub_err) && !err)
+ err = sub_err;
+ }
kunmap_local(kaddr);
if (unlikely(err)) {
/* I/O error during writing. This is really bad! */
@@ -617,10 +647,10 @@ put_bio_out:
bio_put(bio);
err_out:
/*
- * Current state: all buffers are clean, unlocked, and uptodate.
- * The caller should mark the base inode as bad so that no more i/o
- * happens. ->drop_inode() will still be invoked so all extent inodes
- * and other allocated memory will be freed.
+ * The caller should mark the base inode as bad so no more I/O
+ * happens. ->drop_inode() will still be invoked so all extent inodes
+ * and other allocated memory will be freed. ENOMEM is retried by
+ * redirtying the mft record below.
*/
if (err == -ENOMEM) {
ntfs_error(vol->sb,
@@ -833,7 +863,7 @@ static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no,
vi = igrab(mft_vi);
WARN_ON(vi != mft_vi);
} else {
- vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na);
+ vi = find_inode_nowait(sb, na.mft_no, ntfs_test_inode_wb, &na);
if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated)
return false;
}
@@ -1034,7 +1064,7 @@ static s64 ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(struct ntfs_volume *vo
b = ffz((unsigned long)*byte);
if (b < 8 && b >= (bit & 7)) {
ll = data_pos + (bit & ~7ull) + b;
- if (unlikely(ll > (1ll << 32))) {
+ if (unlikely(ll >= (1ll << 32))) {
folio_unlock(folio);
kunmap_local(buf);
folio_put(folio);
@@ -2721,8 +2751,11 @@ static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *w
ntfs_debug("Entering for inode 0x%llx, attribute type 0x%x, folio index 0x%lx.",
ni->mft_no, ni->type, folio->index);
- if (!locked_nis || !ref_inos)
+ if (!locked_nis || !ref_inos) {
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
return -ENOMEM;
+ }
/* We have to zero every time due to mmap-at-end-of-file. */
if (folio->index >= (i_size >> folio_shift(folio)))
@@ -2840,9 +2873,13 @@ flush_bio:
}
prev_mft_ofs = mft_ofs;
- if (mft_no < vol->mftmirr_size)
- ntfs_sync_mft_mirror(vol, mft_no,
+ if (mft_no < vol->mftmirr_size) {
+ int sub_err = ntfs_sync_mft_mirror(vol, mft_no,
(struct mft_record *)(kaddr + mft_ofs));
+
+ if (unlikely(sub_err) && !err)
+ err = sub_err;
+ }
} else if (ref_inos[nr_ref_inos])
nr_ref_inos++;
}
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 96c450e62efc..c4f82846c58c 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -344,9 +344,9 @@ static int ntfs_sd_add_everyone(struct ntfs_inode *ni)
sd_len = sizeof(struct security_descriptor_relative) + 2 *
(sizeof(struct ntfs_sid) + 8) + sizeof(struct ntfs_acl) +
sizeof(struct ntfs_ace) + 4;
- sd = kmalloc(sd_len, GFP_NOFS);
+ sd = kzalloc(sd_len, GFP_NOFS);
if (!sd)
- return -1;
+ return -ENOMEM;
sd->revision = 1;
sd->control = SE_DACL_PRESENT | SE_SELF_RELATIVE;
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index da21dbeaaf66..e7de3d01257e 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -2056,10 +2056,11 @@ struct runlist_element *ntfs_rl_collapse_range(struct runlist_element *dst_rl, i
* consists of holes.
*/
merge_cnt = 0;
- i = new_1st_cnt == 0 ? 1 : new_1st_cnt;
- if (ntfs_rle_lcn_contiguous(&new_rl[i - 1], &new_rl[i])) {
- /* Merge right and left */
- s_rl = &new_rl[new_1st_cnt - 1];
+ if (new_1st_cnt > 0 &&
+ ntfs_rle_lcn_contiguous(&new_rl[new_1st_cnt - 1],
+ &new_rl[new_1st_cnt])) {
+ /* Merge right and left. */
+ s_rl = &new_rl[new_1st_cnt - 1];
s_rl->length += s_rl[1].length;
merge_cnt = 1;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 22dc7865eca7..9e321cc2febe 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -413,6 +413,7 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label)
{
struct ntfs_inode *vol_ni = NTFS_I(vol->vol_ino);
struct ntfs_attr_search_ctx *ctx;
+ char *new_label;
__le16 *uname;
int uname_len, ret;
@@ -425,7 +426,7 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label)
return uname_len;
}
- if (uname_len > NTFS_MAX_LABEL_LEN) {
+ if (uname_len > NTFS_MAX_LABEL_LEN) {
ntfs_error(vol->sb,
"Volume label is too long (max %d characters).",
NTFS_MAX_LABEL_LEN);
@@ -433,11 +434,22 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label)
return -EINVAL;
}
+ /*
+ * Allocate the in-memory label copy up front. If kstrdup() fails we
+ * bail out before touching on-disk metadata, so the in-memory label
+ * and the on-disk label stay in sync.
+ */
+ new_label = kstrdup(label, GFP_KERNEL);
+ if (!new_label) {
+ kvfree(uname);
+ return -ENOMEM;
+ }
+
mutex_lock(&vol_ni->mrec_lock);
ctx = ntfs_attr_get_search_ctx(vol_ni, NULL);
if (!ctx) {
ret = -ENOMEM;
- goto out;
+ goto out;
}
if (!ntfs_attr_lookup(AT_VOLUME_NAME, NULL, 0, 0, 0, NULL, 0,
@@ -450,12 +462,14 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label)
out:
mutex_unlock(&vol_ni->mrec_lock);
kvfree(uname);
- mark_inode_dirty_sync(vol->vol_ino);
if (ret >= 0) {
kfree(vol->volume_label);
- vol->volume_label = kstrdup(label, GFP_KERNEL);
+ vol->volume_label = new_label;
+ mark_inode_dirty_sync(vol->vol_ino);
ret = 0;
+ } else {
+ kfree(new_label);
}
return ret;
}
@@ -979,6 +993,13 @@ mft_unmap_out:
ntfs_is_baad_recordp((__le32 *)kmirr))
bytes = vol->mft_record_size;
}
+ /* Compare the two records. */
+ if (memcmp(kmft, kmirr, bytes)) {
+ ntfs_error(sb,
+ "$MFT and $MFTMirr record %i do not match. Run chkdsk.",
+ i);
+ goto mm_unmap_out;
+ }
kmft += vol->mft_record_size;
kmirr += vol->mft_record_size;
} while (++i < vol->mftmirr_size);
@@ -1671,7 +1692,7 @@ iput_attrdef_err_out:
iput_upcase_err_out:
vol->upcase_len = 0;
mutex_lock(&ntfs_lock);
- if (vol->upcase == default_upcase) {
+ if (vol->upcase && vol->upcase == default_upcase) {
ntfs_nr_upcase_users--;
vol->upcase = NULL;
}
@@ -1701,7 +1722,7 @@ static void ntfs_volume_free(struct ntfs_volume *vol)
* the number of upcase users if we are a user.
*/
mutex_lock(&ntfs_lock);
- if (vol->upcase == default_upcase) {
+ if (vol->upcase && vol->upcase == default_upcase) {
ntfs_nr_upcase_users--;
vol->upcase = NULL;
}
@@ -2494,7 +2515,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
}
vol->upcase_len = 0;
mutex_lock(&ntfs_lock);
- if (vol->upcase == default_upcase) {
+ if (vol->upcase && vol->upcase == default_upcase) {
ntfs_nr_upcase_users--;
vol->upcase = NULL;
}
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index bec5475de094..75e65e72c2d6 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -362,7 +362,7 @@ static struct dentry *orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
__orangefs_setattr(dir, &iattr);
out:
op_release(new_op);
- return ERR_PTR(ret);
+ return ret ? ERR_PTR(ret) : NULL;
}
static int orangefs_rename(struct mnt_idmap *idmap,
diff --git a/fs/select.c b/fs/select.c
index 75978b18f48f..bf71c9838dfe 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -708,6 +708,17 @@ static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
if (copy_from_user(&tv, tvp, sizeof(tv)))
return -EFAULT;
+ /*
+ * Reject negative components before normalisation. The seconds
+ * sum below is performed in signed long and a crafted negative
+ * timeval can wrap to a positive value that passes
+ * timespec64_valid() and turns into an effectively-infinite
+ * deadline via timespec64_add_safe()'s saturation, instead of
+ * the -EINVAL POSIX requires for negative timeouts.
+ */
+ if (tv.tv_sec < 0 || tv.tv_usec < 0)
+ return -EINVAL;
+
to = &end_time;
if (poll_select_set_timeout(to,
tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 9f76b0347fa9..f557eb7875c7 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -434,7 +434,8 @@ cifs_alloc_inode(struct super_block *sb)
spin_lock_init(&cifs_inode->writers_lock);
cifs_inode->writers = 0;
cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
- cifs_inode->netfs.remote_i_size = 0;
+ cifs_inode->netfs._remote_i_size = 0;
+ cifs_inode->netfs._zero_point = 0;
cifs_inode->uniqueid = 0;
cifs_inode->createtime = 0;
cifs_inode->epoch = 0;
@@ -1303,7 +1304,8 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
struct cifsFileInfo *smb_file_src = src_file->private_data;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
struct cifs_tcon *target_tcon, *src_tcon;
- unsigned long long destend, fstart, fend, old_size, new_size;
+ unsigned long long i_size, new_size;
+ unsigned long long destend, fstart, fend;
unsigned int xid;
int rc;
@@ -1347,7 +1349,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
* Advance the EOF marker after the flush above to the end of the range
* if it's short of that.
*/
- if (src_cifsi->netfs.remote_i_size < off + len) {
+ if (netfs_read_remote_i_size(src_inode) < off + len) {
rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
if (rc < 0)
goto unlock;
@@ -1368,22 +1370,24 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false);
if (rc)
goto unlock;
- if (fend > target_cifsi->netfs.zero_point)
- target_cifsi->netfs.zero_point = fend + 1;
- old_size = target_cifsi->netfs.remote_i_size;
+
+ spin_lock(&target_inode->i_lock);
+ if (fend > target_cifsi->netfs._zero_point)
+ netfs_write_zero_point(target_inode, fend + 1);
+ i_size = target_inode->i_size;
+ spin_unlock(&target_inode->i_lock);
/* Discard all the folios that overlap the destination region. */
cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend);
truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
- fscache_invalidate(cifs_inode_cookie(target_inode), NULL,
- i_size_read(target_inode), 0);
+ fscache_invalidate(cifs_inode_cookie(target_inode), NULL, i_size, 0);
rc = -EOPNOTSUPP;
if (target_tcon->ses->server->ops->duplicate_extents) {
rc = target_tcon->ses->server->ops->duplicate_extents(xid,
smb_file_src, smb_file_target, off, len, destoff);
- if (rc == 0 && new_size > old_size) {
+ if (rc == 0 && new_size > i_size) {
truncate_setsize(target_inode, new_size);
fscache_resize_cookie(cifs_inode_cookie(target_inode),
new_size);
@@ -1402,8 +1406,12 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
rc = -EINVAL;
}
}
- if (rc == 0 && new_size > target_cifsi->netfs.zero_point)
- target_cifsi->netfs.zero_point = new_size;
+ if (rc == 0) {
+ spin_lock(&target_inode->i_lock);
+ if (new_size > target_cifsi->netfs._zero_point)
+ netfs_write_zero_point(target_inode, new_size);
+ spin_unlock(&target_inode->i_lock);
+ }
}
/* force revalidate of size and timestamps of target file now
@@ -1474,7 +1482,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
* Advance the EOF marker after the flush above to the end of the range
* if it's short of that.
*/
- if (src_cifsi->netfs.remote_i_size < off + len) {
+ if (netfs_read_remote_i_size(src_inode) < off + len) {
rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
if (rc < 0)
goto unlock;
@@ -1502,8 +1510,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
fscache_resize_cookie(cifs_inode_cookie(target_inode),
i_size_read(target_inode));
}
- if (rc > 0 && destoff + rc > target_cifsi->netfs.zero_point)
- target_cifsi->netfs.zero_point = destoff + rc;
+ if (rc > 0) {
+ spin_lock(&target_inode->i_lock);
+ if (destoff + rc > target_cifsi->netfs._zero_point)
+ netfs_write_zero_point(target_inode, destoff + rc);
+ spin_unlock(&target_inode->i_lock);
+ }
}
file_accessed(src_file);
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 3990a9012264..9e27bfa7376b 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1465,6 +1465,7 @@ cifs_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
struct cifs_io_subrequest *rdata = mid->callback_data;
struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode);
struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink);
+ struct inode *inode = &ictx->inode;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 1,
.rq_iter = rdata->subreq.io_iter };
@@ -1538,7 +1539,7 @@ do_retry:
} else {
size_t trans = rdata->subreq.transferred + rdata->got_bytes;
if (trans < rdata->subreq.len &&
- rdata->subreq.start + trans >= ictx->remote_i_size) {
+ rdata->subreq.start + trans >= netfs_read_remote_i_size(inode)) {
rdata->result = 0;
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
} else if (rdata->got_bytes > 0) {
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 664a2c223089..b60344125f27 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -2517,18 +2517,23 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result)
{
struct netfs_io_request *wreq = wdata->rreq;
- struct netfs_inode *ictx = netfs_inode(wreq->inode);
+ struct inode *inode = wreq->inode;
+ struct netfs_inode *ictx = netfs_inode(inode);
loff_t wrend;
if (result > 0) {
+ spin_lock(&inode->i_lock);
+
wrend = wdata->subreq.start + wdata->subreq.transferred + result;
- if (wrend > ictx->zero_point &&
+ if (wrend > ictx->_zero_point &&
(wdata->rreq->origin == NETFS_UNBUFFERED_WRITE ||
wdata->rreq->origin == NETFS_DIO_WRITE))
- ictx->zero_point = wrend;
- if (wrend > ictx->remote_i_size)
+ netfs_write_zero_point(inode, wrend);
+ if (wrend > ictx->_remote_i_size)
netfs_resize_file(ictx, wrend, true);
+
+ spin_unlock(&inode->i_lock);
}
netfs_write_subrequest_terminated(&wdata->subreq, result);
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 16a5310155d5..9472c0a6c187 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -119,7 +119,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode);
mtime = inode_get_mtime(inode);
if (timespec64_equal(&mtime, &fattr->cf_mtime) &&
- cifs_i->netfs.remote_i_size == fattr->cf_eof) {
+ netfs_read_remote_i_size(inode) == fattr->cf_eof) {
cifs_dbg(FYI, "%s: inode %llu is unchanged\n",
__func__, cifs_i->uniqueid);
return;
@@ -173,12 +173,12 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr,
CIFS_I(inode)->time = 0; /* force reval */
return -ESTALE;
}
- if (inode_state_read_once(inode) & I_NEW)
- CIFS_I(inode)->netfs.zero_point = fattr->cf_eof;
-
cifs_revalidate_cache(inode, fattr);
spin_lock(&inode->i_lock);
+ if (inode_state_read_once(inode) & I_NEW)
+ netfs_write_zero_point(inode, fattr->cf_eof);
+
fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode);
fattr->cf_atime = timestamp_truncate(fattr->cf_atime, inode);
fattr->cf_ctime = timestamp_truncate(fattr->cf_ctime, inode);
@@ -212,7 +212,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr,
else
clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags);
- cifs_i->netfs.remote_i_size = fattr->cf_eof;
+ netfs_write_remote_i_size(inode, fattr->cf_eof);
/*
* Can't safely change the file size here if the client is writing to
* it due to potential races.
@@ -2772,7 +2772,9 @@ cifs_revalidate_mapping(struct inode *inode)
if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RW_CACHE)
goto skip_invalidate;
- cifs_inode->netfs.zero_point = cifs_inode->netfs.remote_i_size;
+ spin_lock(&inode->i_lock);
+ netfs_write_zero_point(inode, netfs_inode(inode)->_remote_i_size);
+ spin_unlock(&inode->i_lock);
rc = filemap_invalidate_inode(inode, true, 0, LLONG_MAX);
if (rc) {
cifs_dbg(VFS, "%s: invalidate inode %p failed with rc %d\n",
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index be22bbc4a65a..e860fa08b5e3 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -143,7 +143,8 @@ retry:
fattr->cf_rdev = inode->i_rdev;
fattr->cf_uid = inode->i_uid;
fattr->cf_gid = inode->i_gid;
- fattr->cf_eof = CIFS_I(inode)->netfs.remote_i_size;
+ fattr->cf_eof =
+ netfs_read_remote_i_size(inode);
fattr->cf_symlink_target = NULL;
} else {
CIFS_I(inode)->time = 0;
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 3738204984f5..189bb863a9af 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -3402,8 +3402,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
- struct netfs_inode *ictx = netfs_inode(inode);
- unsigned long long i_size, new_size, remote_size;
+ unsigned long long i_size, new_size, remote_i_size, zero_point;
long rc;
unsigned int xid;
@@ -3414,9 +3413,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
filemap_invalidate_lock(inode->i_mapping);
- i_size = i_size_read(inode);
- remote_size = ictx->remote_i_size;
- if (offset + len >= remote_size && offset < i_size) {
+ netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
+ if (offset + len >= remote_i_size && offset < i_size) {
unsigned long long top = umin(offset + len, i_size);
rc = filemap_write_and_wait_range(inode->i_mapping, offset, top - 1);
@@ -3449,9 +3447,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
cfile->fid.volatile_fid, cfile->pid, new_size);
if (rc >= 0) {
truncate_setsize(inode, new_size);
+ spin_lock(&inode->i_lock);
netfs_resize_file(&cifsi->netfs, new_size, true);
- if (offset < cifsi->netfs.zero_point)
- cifsi->netfs.zero_point = offset;
+ if (offset < cifsi->netfs._zero_point)
+ netfs_write_zero_point(inode, offset);
+ spin_unlock(&inode->i_lock);
fscache_resize_cookie(cifs_inode_cookie(inode), new_size);
}
}
@@ -3474,7 +3474,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
struct file_zero_data_information fsctl_buf;
- unsigned long long end = offset + len, i_size, remote_i_size;
+ unsigned long long end = offset + len, i_size, remote_i_size, zero_point;
long rc;
unsigned int xid;
__u8 set_sparse = 1;
@@ -3516,14 +3516,17 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
* that we locally hole-punch the tail of the dirty data, the proposed
* EOF update will end up in the wrong place.
*/
- i_size = i_size_read(inode);
- remote_i_size = netfs_inode(inode)->remote_i_size;
+ netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
+
if (end > remote_i_size && i_size > remote_i_size) {
unsigned long long extend_to = umin(end, i_size);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, extend_to);
- if (rc >= 0)
- netfs_inode(inode)->remote_i_size = extend_to;
+ if (rc >= 0) {
+ spin_lock(&inode->i_lock);
+ netfs_write_remote_i_size(inode, extend_to);
+ spin_unlock(&inode->i_lock);
+ }
}
unlock:
@@ -3787,7 +3790,6 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
- struct netfs_inode *ictx = &cifsi->netfs;
loff_t old_eof, new_eof;
xid = get_xid();
@@ -3805,7 +3807,9 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
goto out_2;
truncate_pagecache_range(inode, off, old_eof);
- ictx->zero_point = old_eof;
+ spin_lock(&inode->i_lock);
+ netfs_write_zero_point(inode, old_eof);
+ spin_unlock(&inode->i_lock);
netfs_wait_for_outstanding_io(inode);
rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
@@ -3822,8 +3826,10 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
rc = 0;
truncate_setsize(inode, new_eof);
+ spin_lock(&inode->i_lock);
netfs_resize_file(&cifsi->netfs, new_eof, true);
- ictx->zero_point = new_eof;
+ netfs_write_zero_point(inode, new_eof);
+ spin_unlock(&inode->i_lock);
fscache_resize_cookie(cifs_inode_cookie(inode), new_eof);
out_2:
filemap_invalidate_unlock(inode->i_mapping);
@@ -3866,13 +3872,17 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
goto out_2;
truncate_setsize(inode, new_eof);
+ spin_lock(&inode->i_lock);
netfs_resize_file(&cifsi->netfs, i_size_read(inode), true);
+ spin_unlock(&inode->i_lock);
fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode));
rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
if (rc < 0)
goto out_2;
- cifsi->netfs.zero_point = new_eof;
+ spin_lock(&inode->i_lock);
+ netfs_write_zero_point(inode, new_eof);
+ spin_unlock(&inode->i_lock);
rc = smb3_zero_data(file, tcon, off, len, xid);
if (rc < 0)
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 995fcdd30681..3bd300347f16 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4608,6 +4608,7 @@ smb2_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode);
struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink);
struct smb2_hdr *shdr = (struct smb2_hdr *)rdata->iov[0].iov_base;
+ struct inode *inode = &ictx->inode;
struct cifs_credits credits = {
.value = 0,
.instance = 0,
@@ -4721,7 +4722,7 @@ do_retry:
} else {
size_t trans = rdata->subreq.transferred + rdata->got_bytes;
if (trans < rdata->subreq.len &&
- rdata->subreq.start + trans >= ictx->remote_i_size) {
+ rdata->subreq.start + trans >= netfs_read_remote_i_size(inode)) {
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0;
}
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 8feca02ddbf2..0f5c18520eff 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -481,8 +481,12 @@ static inline int compare_guid_key(struct oplock_info *opinfo,
const char *guid1, const char *key1)
{
const char *guid2, *key2;
+ struct ksmbd_conn *conn;
- guid2 = opinfo->conn->ClientGUID;
+ conn = READ_ONCE(opinfo->conn);
+ if (!conn)
+ return 0;
+ guid2 = conn->ClientGUID;
key2 = opinfo->o_lease->lease_key;
if (!memcmp(guid1, guid2, SMB2_CLIENT_GUID_SIZE) &&
!memcmp(key1, key2, SMB2_LEASE_KEY_SIZE))
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index c1d1f34581d6..9161e9d7ed24 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -643,8 +643,10 @@ static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap,
ntace = (struct smb_ace *)((char *)pndace + *size);
ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, flags,
pace->e_perm, 0777);
- if (check_add_overflow(*size, ace_sz, size))
+ if (check_add_overflow(*size, ace_sz, size)) {
+ kfree(sid);
break;
+ }
(*num_aces)++;
if (pace->e_tag == ACL_USER)
ntace->access_req |=
@@ -655,8 +657,10 @@ static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap,
ntace = (struct smb_ace *)((char *)pndace + *size);
ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED,
0x03, pace->e_perm, 0777);
- if (check_add_overflow(*size, ace_sz, size))
+ if (check_add_overflow(*size, ace_sz, size)) {
+ kfree(sid);
break;
+ }
(*num_aces)++;
if (pace->e_tag == ACL_USER)
ntace->access_req |=
@@ -698,8 +702,10 @@ posix_default_acl:
ntace = (struct smb_ace *)((char *)pndace + *size);
ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, 0x0b,
pace->e_perm, 0777);
- if (check_add_overflow(*size, ace_sz, size))
+ if (check_add_overflow(*size, ace_sz, size)) {
+ kfree(sid);
break;
+ }
(*num_aces)++;
if (pace->e_tag == ACL_USER)
ntace->access_req |=
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 354c4d8a1cfb..913164c958b1 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -81,7 +81,7 @@ static int proc_show_files(struct seq_file *m, void *v)
read_lock(&global_ft.lock);
idr_for_each_entry(global_ft.idr, fp, id) {
seq_printf(m, "%#-10x %#-10llx %#-10llx %#-10x",
- fp->tcon->id,
+ fp->tcon ? fp->tcon->id : 0,
fp->persistent_id,
fp->volatile_id,
atomic_read(&fp->refcount));
diff --git a/include/asm-generic/ring_buffer.h b/include/asm-generic/ring_buffer.h
new file mode 100644
index 000000000000..201d2aee1005
--- /dev/null
+++ b/include/asm-generic/ring_buffer.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic arch dependent ring_buffer macros.
+ */
+#ifndef __ASM_GENERIC_RING_BUFFER_H__
+#define __ASM_GENERIC_RING_BUFFER_H__
+
+#include <linux/cacheflush.h>
+
+/* Flush cache on ring buffer range if needed. Do nothing by default. */
+#define arch_ring_buffer_flush_range(start, end) do { } while (0)
+
+#endif /* __ASM_GENERIC_RING_BUFFER_H__ */
diff --git a/include/crypto/krb5.h b/include/crypto/krb5.h
index 71dd38f59be1..aac3ecf88467 100644
--- a/include/crypto/krb5.h
+++ b/include/crypto/krb5.h
@@ -121,9 +121,12 @@ size_t crypto_krb5_how_much_buffer(const struct krb5_enctype *krb5,
size_t crypto_krb5_how_much_data(const struct krb5_enctype *krb5,
enum krb5_crypto_mode mode,
size_t *_buffer_size, size_t *_offset);
-void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5,
- enum krb5_crypto_mode mode,
- size_t *_offset, size_t *_len);
+int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t *_offset, size_t *_len);
+int crypto_krb5_check_data_len(const struct krb5_enctype *krb5,
+ enum krb5_crypto_mode mode,
+ size_t len, size_t min_content);
struct crypto_aead *crypto_krb5_prepare_encryption(const struct krb5_enctype *krb5,
const struct krb5_buffer *TK,
u32 usage, gfp_t gfp);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 72e76ec54641..ccbc35479684 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -61,7 +61,7 @@ typedef void *efi_handle_t;
/*
* The UEFI spec and EDK2 reference implementation both define EFI_GUID as
- * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment
+ * struct { u32 a; u16 b; u16 c; u8 d[8]; }; and so the implied alignment
* is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM),
* this means that firmware services invoked by the kernel may assume that
* efi_guid_t* arguments are 32-bit aligned, and use memory accessors that
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 6c75df30a281..cd4972a7c97c 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -273,11 +273,11 @@ enum {
*
* %__GFP_ZERO returns a zeroed page on success.
*
- * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself
- * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that
- * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting
- * memory tags at the same time as zeroing memory has minimal additional
- * performance impact.
+ * %__GFP_ZEROTAGS zeroes memory tags at allocation time. Setting memory tags at
+ * the same time as zeroing memory (e.g., with __GFP_ZERO) has minimal
+ * additional performance impact. However, __GFP_ZEROTAGS also zeroes the tags
+ * even if memory is not getting zeroed at allocation time (e.g.,
+ * with init_on_free).
*
* %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation.
* Used for userspace and vmalloc pages; the latter are unpoisoned by
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index af03db851a1d..d7aac9de1c8a 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -347,10 +347,11 @@ static inline void clear_highpage_kasan_tagged(struct page *page)
#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
-/* Return false to let people know we did not initialize the pages */
-static inline bool tag_clear_highpages(struct page *page, int numpages)
+/* Returns true if the caller has to initialize the pages */
+static inline bool tag_clear_highpages(struct page *page, int numpages,
+ bool clear_pages)
{
- return false;
+ return clear_pages;
}
#endif
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5c085ef4eda7..127229fbd1a6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -371,6 +371,7 @@ enum {
/* return values for ->qc_defer */
ATA_DEFER_LINK = 1,
ATA_DEFER_PORT = 2,
+ ATA_DEFER_LINK_EXCL = 3,
/* desc_len for ata_eh_info and context */
ATA_EH_DESC_LEN = 80,
@@ -854,6 +855,9 @@ struct ata_link {
unsigned int sata_spd; /* current SATA PHY speed */
enum ata_lpm_policy lpm_policy;
+ struct work_struct deferred_qc_work;
+ struct ata_queued_cmd *deferred_qc;
+
/* record runtime error info, protected by host_set lock */
struct ata_eh_info eh_info;
/* EH context */
@@ -899,9 +903,6 @@ struct ata_port {
u64 qc_active;
int nr_active_links; /* #links with active qcs */
- struct work_struct deferred_qc_work;
- struct ata_queued_cmd *deferred_qc;
-
struct ata_link link; /* host default link */
struct ata_link *slave_link; /* see ata_slave_link_init() */
diff --git a/include/linux/list.h b/include/linux/list.h
index 00ea8e5fb88b..09d979976b3b 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -191,6 +191,29 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
__list_add(new, head->prev, head);
}
+/**
+ * list_add_tail_release - add a new entry with release barrier
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head, using a release barrier to set
+ * the ->next pointer that points to it. This is useful for implementing
+ * queues, in particular one that the elements will be walked through forwards
+ * locklessly.
+ */
+static inline void list_add_tail_release(struct list_head *new,
+ struct list_head *head)
+{
+ struct list_head *prev = head->prev;
+
+ if (__list_add_valid(new, prev, head)) {
+ new->next = head;
+ new->prev = prev;
+ head->prev = new;
+ smp_store_release(&prev->next, new);
+ }
+}
+
/*
* Delete a list entry by making the prev/next entries
* point to each other.
@@ -645,6 +668,20 @@ static inline void list_splice_tail_init(struct list_head *list,
})
/**
+ * list_first_entry_or_null_acquire - get the first element from a list with barrier
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * Note that if the list is empty, it returns NULL.
+ */
+#define list_first_entry_or_null_acquire(ptr, type, member) ({ \
+ struct list_head *head__ = (ptr); \
+ struct list_head *pos__ = smp_load_acquire(&head__->next); \
+ pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
+})
+
+/**
* list_last_entry_or_null - get the last element from a list
* @ptr: the list head to take the element from.
* @type: the type of the struct this is embedded in.
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ba17ac5bf356..243c0f737938 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -62,8 +62,8 @@ struct netfs_inode {
struct fscache_cookie *cache;
#endif
struct mutex wb_lock; /* Writeback serialisation */
- loff_t remote_i_size; /* Size of the remote file */
- loff_t zero_point; /* Size after which we assume there's no data
+ loff_t _remote_i_size; /* Size of the remote file */
+ loff_t _zero_point; /* Size after which we assume there's no data
* on the server */
atomic_t io_count; /* Number of outstanding reqs */
unsigned long flags;
@@ -252,7 +252,7 @@ struct netfs_io_request {
unsigned long long collected_to; /* Point we've collected to */
unsigned long long cleaned_to; /* Position we've cleaned folios to */
unsigned long long abandon_to; /* Position to abandon folios to */
- pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
+ const struct folio *no_unlock_folio; /* Don't unlock this folio after read */
unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
unsigned int debug_id;
unsigned int rsize; /* Maximum read size (0 for none) */
@@ -475,6 +475,254 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
}
/**
+ * netfs_read_remote_i_size - Read remote_i_size safely
+ * @inode: The inode to access
+ *
+ * Read remote_i_size safely without the potential for tearing on 32-bit
+ * arches.
+ *
+ * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
+ * i_size_read/write must be atomic with respect to the local cpu (unlike with
+ * preempt disabled), but they don't need to be atomic with respect to other
+ * cpus like in true SMP (so they need either to either locally disable irq
+ * around the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
+ * archs it makes no difference if preempt is enabled or not.
+ */
+static inline unsigned long long netfs_read_remote_i_size(const struct inode *inode)
+{
+ const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
+ unsigned long long remote_i_size;
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&inode->i_size_seqcount);
+ remote_i_size = ictx->_remote_i_size;
+ } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ remote_i_size = ictx->_remote_i_size;
+ preempt_enable();
+#else
+ /* Pairs with smp_store_release() in netfs_write_remote_i_size() */
+ remote_i_size = smp_load_acquire(&ictx->_remote_i_size);
+#endif
+ return remote_i_size;
+}
+
+/*
+ * netfs_write_remote_i_size - Set remote_i_size safely
+ * @inode: The inode to access
+ * @remote_i_size: The new value for the size of the file on the server
+ *
+ * Set remote_i_size safely without the potential for tearing on 32-bit arches.
+ *
+ * Context: The caller must hold inode->i_lock.
+ *
+ * NOTE: unlike netfs_read_remote_i_size(), netfs_write_remote_i_size() does
+ * need locking around it (normally i_rwsem), otherwise on 32bit/SMP an update
+ * of i_size_seqcount can be lost, resulting in subsequent i_size_read() calls
+ * spinning forever.
+ */
+static inline void netfs_write_remote_i_size(struct inode *inode,
+ unsigned long long remote_i_size)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&inode->i_size_seqcount);
+ ictx->_remote_i_size = remote_i_size;
+ write_seqcount_end(&inode->i_size_seqcount);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ ictx->_remote_i_size = remote_i_size;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in netfs_read_remote_i_size() to
+ * ensure changes related to inode size (such as page contents) are
+ * visible before we see the changed inode size.
+ */
+ smp_store_release(&ictx->_remote_i_size, remote_i_size);
+#endif
+}
+
+/**
+ * netfs_read_zero_point - Read zero_point safely
+ * @inode: The inode to access
+ *
+ * Read zero_point safely without the potential for tearing on 32-bit
+ * arches.
+ *
+ * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
+ * i_size_read/write must be atomic with respect to the local cpu (unlike with
+ * preempt disabled), but they don't need to be atomic with respect to other
+ * cpus like in true SMP (so they need either to either locally disable irq
+ * around the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
+ * archs it makes no difference if preempt is enabled or not.
+ */
+static inline unsigned long long netfs_read_zero_point(const struct inode *inode)
+{
+ struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
+ unsigned long long zero_point;
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&inode->i_size_seqcount);
+ zero_point = ictx->_zero_point;
+ } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ zero_point = ictx->_zero_point;
+ preempt_enable();
+#else
+ /* Pairs with smp_store_release() in netfs_write_zero_point() */
+ zero_point = smp_load_acquire(&ictx->_zero_point);
+#endif
+ return zero_point;
+}
+
+/*
+ * netfs_write_zero_point - Set zero_point safely
+ * @inode: The inode to access
+ * @zero_point: The new value for the point beyond which the server has no data
+ *
+ * Set zero_point safely without the potential for tearing on 32-bit arches.
+ *
+ * Context: The caller must hold inode->i_lock.
+ *
+ * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need
+ * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of
+ * i_size_seqcount can be lost, resulting in subsequent read calls spinning
+ * forever.
+ */
+static inline void netfs_write_zero_point(struct inode *inode,
+ unsigned long long zero_point)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&inode->i_size_seqcount);
+ ictx->_zero_point = zero_point;
+ write_seqcount_end(&inode->i_size_seqcount);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ ictx->_zero_point = zero_point;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in netfs_read_zero_point() to
+ * ensure changes related to inode size (such as page contents) are
+ * visible before we see the changed inode size.
+ */
+ smp_store_release(&ictx->_zero_point, zero_point);
+#endif
+}
+
+/**
+ * netfs_read_sizes - Read remote_i_size and zero_point safely
+ * @inode: The inode to access
+ * @i_size: Where to return the local file size.
+ * @remote_i_size: Where to return the size of the file on the server
+ * @zero_point: Where to return the the point beyond which the server has no data
+ *
+ * Read remote_i_size and zero_point safely without the potential for tearing
+ * on 32-bit arches.
+ *
+ * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
+ * i_size_read/write must be atomic with respect to the local cpu (unlike with
+ * preempt disabled), but they don't need to be atomic with respect to other
+ * cpus like in true SMP (so they need either to either locally disable irq
+ * around the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
+ * archs it makes no difference if preempt is enabled or not.
+ */
+static inline void netfs_read_sizes(const struct inode *inode,
+ unsigned long long *i_size,
+ unsigned long long *remote_i_size,
+ unsigned long long *zero_point)
+{
+ const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&inode->i_size_seqcount);
+ *i_size = inode->i_size;
+ *remote_i_size = ictx->_remote_i_size;
+ *zero_point = ictx->_zero_point;
+ } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ *i_size = inode->i_size;
+ *remote_i_size = ictx->_remote_i_size;
+ *zero_point = ictx->_zero_point;
+ preempt_enable();
+#else
+ /* Pairs with smp_store_release() in i_size_write() */
+ *i_size = smp_load_acquire(&inode->i_size);
+ /* Pairs with smp_store_release() in netfs_write_remote_i_size() */
+ *remote_i_size = smp_load_acquire(&ictx->_remote_i_size);
+ /* Pairs with smp_store_release() in netfs_write_zero_point() */
+ *zero_point = smp_load_acquire(&ictx->_zero_point);
+#endif
+}
+
+/*
+ * netfs_write_sizes - Set i_size, remote_i_size and zero_point safely
+ * @inode: The inode to access
+ * @i_size: The new value for the local size of the file
+ * @remote_i_size: The new value for the size of the file on the server
+ * @zero_point: The new value for the point beyond which the server has no data
+ *
+ * Set both remote_i_size and zero_point safely without the potential for
+ * tearing on 32-bit arches.
+ *
+ * Context: The caller must hold inode->i_lock.
+ *
+ * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need
+ * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of
+ * i_size_seqcount can be lost, resulting in subsequent read calls spinning
+ * forever.
+ */
+static inline void netfs_write_sizes(struct inode *inode,
+ unsigned long long i_size,
+ unsigned long long remote_i_size,
+ unsigned long long zero_point)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&inode->i_size_seqcount);
+ inode->i_size = i_size;
+ ictx->_remote_i_size = remote_i_size;
+ ictx->_zero_point = zero_point;
+ write_seqcount_end(&inode->i_size_seqcount);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ inode->i_size = i_size;
+ ictx->_remote_i_size = remote_i_size;
+ ictx->_zero_point = zero_point;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in i_size_read(),
+ * netfs_read_remote_i_size() and netfs_read_zero_point() to ensure
+ * changes related to inode size (such as page contents) are visible
+ * before we see the changed inode size.
+ */
+ smp_store_release(&inode->i_size, i_size);
+ smp_store_release(&ictx->_remote_i_size, remote_i_size);
+ smp_store_release(&ictx->_zero_point, zero_point);
+#endif
+}
+
+/**
* netfs_inode_init - Initialise a netfslib inode context
* @ctx: The netfs inode to initialise
* @ops: The netfs's operations list
@@ -488,8 +736,8 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
bool use_zero_point)
{
ctx->ops = ops;
- ctx->remote_i_size = i_size_read(&ctx->inode);
- ctx->zero_point = LLONG_MAX;
+ ctx->_remote_i_size = i_size_read(&ctx->inode);
+ ctx->_zero_point = LLONG_MAX;
ctx->flags = 0;
atomic_set(&ctx->io_count, 0);
#if IS_ENABLED(CONFIG_FSCACHE)
@@ -498,7 +746,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
mutex_init(&ctx->wb_lock);
/* ->releasepage() drives zero_point */
if (use_zero_point) {
- ctx->zero_point = ctx->remote_i_size;
+ ctx->_zero_point = ctx->_remote_i_size;
mapping_set_release_always(ctx->inode.i_mapping);
}
}
@@ -511,13 +759,40 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
*
* Inform the netfs lib that a file got resized so that it can adjust its state.
*/
-static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
+static inline void netfs_resize_file(struct netfs_inode *ictx,
+ unsigned long long new_i_size,
bool changed_on_server)
{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ struct inode *inode = &ictx->inode;
+
+ preempt_disable();
+ write_seqcount_begin(&inode->i_size_seqcount);
+ if (changed_on_server)
+ ictx->_remote_i_size = new_i_size;
+ if (new_i_size < ictx->_zero_point)
+ ictx->_zero_point = new_i_size;
+ write_seqcount_end(&inode->i_size_seqcount);
+ preempt_enable();
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
if (changed_on_server)
- ctx->remote_i_size = new_i_size;
- if (new_i_size < ctx->zero_point)
- ctx->zero_point = new_i_size;
+ ictx->_remote_i_size = new_i_size;
+ if (new_i_size < ictx->_zero_point)
+ ictx->_zero_point = new_i_size;
+ preempt_enable();
+#else
+ /*
+ * Pairs with smp_load_acquire() in netfs_read_remote_i_size and
+ * netfs_read_zero_point() to ensure changes related to inode size
+ * (such as page contents) are visible before we see the changed inode
+ * size.
+ */
+ if (changed_on_server)
+ smp_store_release(&ictx->_remote_i_size, new_i_size);
+ if (new_i_size < ictx->_zero_point)
+ smp_store_release(&ictx->_zero_point, new_i_size);
+#endif
}
/**
diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h
index d01ef4a6b3d7..7589fccfeef6 100644
--- a/include/linux/soc/airoha/airoha_offload.h
+++ b/include/linux/soc/airoha/airoha_offload.h
@@ -71,9 +71,9 @@ static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev,
#define NPU_RX1_DESC_NUM 512
/* CTRL */
-#define NPU_RX_DMA_DESC_LAST_MASK BIT(27)
-#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(26, 14)
-#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(13, 1)
+#define NPU_RX_DMA_DESC_LAST_MASK BIT(29)
+#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15)
+#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1)
#define NPU_RX_DMA_DESC_DONE_MASK BIT(0)
/* INFO */
#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 29)
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 69eed69f7f26..3faea66b1979 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -398,6 +398,7 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src);
struct bt_sock {
struct sock sk;
struct list_head accept_q;
+ spinlock_t accept_q_lock; /* protects accept_q */
struct sock *parent;
unsigned long flags;
void (*skb_msg_name)(struct sk_buff *, void *, int *);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 02762ce73a0c..a02e569813d2 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1186,8 +1186,9 @@ struct netns_ipvs {
struct timer_list dest_trash_timer; /* expiration timer */
struct mutex service_mutex; /* service reconfig */
struct rw_semaphore svc_resize_sem; /* svc_table resizing */
+ struct rw_semaphore svc_replace_sem; /* svc_table replace */
struct delayed_work svc_resize_work; /* resize svc_table */
- atomic_t svc_table_changes;/* ++ on new table */
+ atomic_t svc_table_changes;/* ++ on table changes */
/* Service counters */
atomic_t num_services[IP_VS_AF_MAX]; /* Services */
atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 2dfee6d4258a..8860cc2175fc 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -489,11 +489,15 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
{
- unsigned int seq, hh_alen;
+ unsigned int seq, hh_alen = HH_DATA_ALIGN(ETH_HLEN);
+ int err;
+
+ err = skb_cow_head(skb, hh_alen);
+ if (err)
+ return err;
do {
seq = read_seqbegin(&hh->hh_lock);
- hh_alen = HH_DATA_ALIGN(ETH_HLEN);
memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN);
} while (read_seqretry(&hh->hh_lock, seq));
return 0;
diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h
index 5c3f49b52fe9..3939b816b001 100644
--- a/include/net/net_shaper.h
+++ b/include/net/net_shaper.h
@@ -53,6 +53,7 @@ struct net_shaper {
/* private: */
u32 leaves; /* accounted only for NODE scope */
+ bool valid;
struct rcu_head rcu;
};
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index d17035d14d96..3978c3174cdb 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -14,6 +14,7 @@ struct nf_queue_entry {
struct list_head list;
struct rhash_head hash_node;
struct sk_buff *skb;
+ struct net_device *skb_dev;
unsigned int id;
unsigned int hook_index; /* index in hook_entries->hook[] */
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ecbadcb3a744..98848db62894 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -65,8 +65,6 @@ static inline void tcp_orphan_count_dec(void)
this_cpu_dec(tcp_orphan_count);
}
-DECLARE_PER_CPU(u32, tcp_tw_isn);
-
void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@@ -1102,10 +1100,13 @@ struct tcp_skb_cb {
__u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
union {
- /* Note :
+ /* Notes :
+ * tcp_tw_isn is used in input path only
+ * (isn chosen by tcp_timewait_state_process())
* tcp_gso_segs/size are used in write queue only,
* cf tcp_skb_pcount()/tcp_skb_mss()
*/
+ u32 tcp_tw_isn;
struct {
u16 tcp_gso_segs;
u16 tcp_gso_size;
diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
index 24fc402ab3c8..7e25f4469b81 100644
--- a/include/trace/events/damon.h
+++ b/include/trace/events/damon.h
@@ -41,7 +41,7 @@ TRACE_EVENT(damos_stat_after_apply_interval,
),
TP_printk("ctx_idx=%u scheme_idx=%u nr_tried=%lu sz_tried=%lu "
- "nr_applied=%lu sz_tried=%lu sz_ops_filter_passed=%lu "
+ "nr_applied=%lu sz_applied=%lu sz_ops_filter_passed=%lu "
"qt_exceeds=%lu nr_snapshots=%lu",
__entry->context_idx, __entry->scheme_idx,
__entry->nr_tried, __entry->sz_tried,
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 8c936fc575d5..082cb03c6131 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -177,7 +177,11 @@
EM(netfs_folio_is_uptodate, "mod-uptodate") \
EM(netfs_just_prefetch, "mod-prefetch") \
EM(netfs_whole_folio_modify, "mod-whole-f") \
+ EM(netfs_whole_folio_modify_efault, "mod-whole-f!") \
+ EM(netfs_whole_folio_modify_filled, "mod-whole-f+") \
+ EM(netfs_whole_folio_modify_filled_efault, "mod-whole-f+!") \
EM(netfs_modify_and_clear, "mod-n-clear") \
+ EM(netfs_modify_and_clear_rm_finfo, "mod-n-clear+") \
EM(netfs_streaming_write, "mod-streamw") \
EM(netfs_streaming_write_cont, "mod-streamw+") \
EM(netfs_flush_content, "flush") \
@@ -194,6 +198,10 @@
EM(netfs_folio_trace_copy_to_cache, "mark-copy") \
EM(netfs_folio_trace_end_copy, "end-copy") \
EM(netfs_folio_trace_filled_gaps, "filled-gaps") \
+ EM(netfs_folio_trace_invalidate_all, "inval-all") \
+ EM(netfs_folio_trace_invalidate_front, "inval-front") \
+ EM(netfs_folio_trace_invalidate_middle, "inval-mid") \
+ EM(netfs_folio_trace_invalidate_tail, "inval-tail") \
EM(netfs_folio_trace_kill, "kill") \
EM(netfs_folio_trace_kill_cc, "kill-cc") \
EM(netfs_folio_trace_kill_g, "kill-g") \
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 573f2df3a2c9..704a10de6670 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -71,6 +71,7 @@
EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \
EM(rxkad_abort_resp_version, "rxkad-resp-version") \
/* RxGK security errors */ \
+ EM(rxgk_abort_1_short_header, "rxgk1-short-hdr") \
EM(rxgk_abort_1_verify_mic_eproto, "rxgk1-vfy-mic-eproto") \
EM(rxgk_abort_2_decrypt_eproto, "rxgk2-dec-eproto") \
EM(rxgk_abort_2_short_data, "rxgk2-short-data") \
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 0d01cd8c4b4a..7c2f7cc131f7 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -897,11 +897,9 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp
{
int cpu;
- for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
- if (!(mask & (1UL << (cpu - snp->grplo))))
- continue;
- srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
- }
+ for (cpu = snp->grplo; cpu <= snp->grphi; cpu++)
+ if ((mask & (1UL << (cpu - snp->grplo))) && rcu_cpu_beenfullyonline(cpu))
+ srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
}
/*
@@ -1322,7 +1320,9 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
*/
idx = __srcu_read_lock_nmisafe(ssp);
ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state);
- if (ss_state < SRCU_SIZE_WAIT_CALL)
+ // If !rcu_cpu_beenfullyonline(), interrupts are still disabled,
+ // so no migration is possible in either direction from this CPU.
+ if (ss_state < SRCU_SIZE_WAIT_CALL || !rcu_cpu_beenfullyonline(raw_smp_processor_id()))
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = raw_cpu_ptr(ssp->sda);
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 9b0834134cae..8d3d96e847d8 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -154,7 +154,8 @@ quiet_cmd_check_undefined = NM $<
echo "Unexpected symbols in $<:" >&2; \
echo "$$undefsyms" >&2; \
false; \
- fi
+ fi; \
+ touch $@
$(obj)/%.o.checked: $(obj)/%.o $(obj)/undefsyms_base.o FORCE
$(call if_changed,check_undefined)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5326924615a4..7b07d2004cc6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -7,6 +7,7 @@
#include <linux/ring_buffer_types.h>
#include <linux/sched/isolation.h>
#include <linux/trace_recursion.h>
+#include <linux/panic_notifier.h>
#include <linux/trace_events.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
@@ -31,6 +32,7 @@
#include <linux/oom.h>
#include <linux/mm.h>
+#include <asm/ring_buffer.h>
#include <asm/local64.h>
#include <asm/local.h>
#include <asm/setup.h>
@@ -559,6 +561,7 @@ struct trace_buffer {
unsigned long range_addr_start;
unsigned long range_addr_end;
+ struct notifier_block flush_nb;
struct ring_buffer_meta *meta;
@@ -2521,6 +2524,16 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
kfree(cpu_buffer);
}
+/* Stop recording on a persistent buffer and flush cache if needed. */
+static int rb_flush_buffer_cb(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct trace_buffer *buffer = container_of(nb, struct trace_buffer, flush_nb);
+
+ ring_buffer_record_off(buffer);
+ arch_ring_buffer_flush_range(buffer->range_addr_start, buffer->range_addr_end);
+ return NOTIFY_DONE;
+}
+
static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
int order, unsigned long start,
unsigned long end,
@@ -2651,6 +2664,12 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
mutex_init(&buffer->mutex);
+ /* Persistent ring buffer needs to flush cache before reboot. */
+ if (start && end) {
+ buffer->flush_nb.notifier_call = rb_flush_buffer_cb;
+ atomic_notifier_chain_register(&panic_notifier_list, &buffer->flush_nb);
+ }
+
return_ptr(buffer);
fail_free_buffers:
@@ -2749,6 +2768,9 @@ ring_buffer_free(struct trace_buffer *buffer)
{
int cpu;
+ if (buffer->range_addr_start && buffer->range_addr_end)
+ atomic_notifier_chain_unregister(&panic_notifier_list, &buffer->flush_nb);
+
cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
irq_work_sync(&buffer->irq_work.work);
@@ -5407,6 +5429,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
iter->head_page = cpu_buffer->reader_page;
iter->head = cpu_buffer->reader_page->read;
iter->next_event = iter->head;
+ iter->missed_events = 0;
iter->cache_reader_page = iter->head_page;
iter->cache_read = cpu_buffer->read;
@@ -6086,10 +6109,7 @@ ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts,
*/
bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter)
{
- bool ret = iter->missed_events != 0;
-
- iter->missed_events = 0;
- return ret;
+ return iter->missed_events != 0;
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped);
@@ -6251,7 +6271,7 @@ void ring_buffer_iter_advance(struct ring_buffer_iter *iter)
unsigned long flags;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
-
+ iter->missed_events = 0;
rb_advance_iter(iter);
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/simple_ring_buffer.c b/kernel/trace/simple_ring_buffer.c
index 02af2297ae5a..f4642f5adda3 100644
--- a/kernel/trace/simple_ring_buffer.c
+++ b/kernel/trace/simple_ring_buffer.c
@@ -395,7 +395,6 @@ int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
cpu_buffer->meta->meta_page_size = PAGE_SIZE;
- cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
/* The reader page is not part of the ring initially */
page = load_page(desc->page_va[0]);
@@ -431,12 +430,13 @@ int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
if (ret) {
for (i--; i >= 0; i--)
- unload_page((void *)desc->page_va[i]);
+ unload_page(bpages[i].page);
unload_page(cpu_buffer->meta);
return ret;
}
+ cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
/* Close the ring */
bpage->link.next = &cpu_buffer->tail_page->link;
cpu_buffer->tail_page->link.prev = &bpage->link;
diff --git a/lib/tests/kunit_iov_iter.c b/lib/tests/kunit_iov_iter.c
index 37bd6eb25896..f02f7b7aa796 100644
--- a/lib/tests/kunit_iov_iter.c
+++ b/lib/tests/kunit_iov_iter.c
@@ -1128,7 +1128,7 @@ static void __init iov_kunit_iter_to_sg_kvec(struct kunit *test)
struct kvec kvec;
size_t bufsize;
- bufsize = 0x100000;
+ bufsize = 0x200000;
iov_kunit_iter_to_sg_init(test, bufsize, false, &data);
kvec.iov_base = data.buffer;
@@ -1146,7 +1146,7 @@ static void __init iov_kunit_iter_to_sg_bvec(struct kunit *test)
struct bio_vec *bvec;
struct iov_iter iter;
- bufsize = 0x100000;
+ bufsize = 0x200000;
iov_kunit_iter_to_sg_init(test, bufsize, false, &data);
bvec = kunit_kmalloc_array(test, data.npages, sizeof(*bvec),
@@ -1173,7 +1173,7 @@ static void __init iov_kunit_iter_to_sg_folioq(struct kunit *test)
struct iov_iter iter;
size_t bufsize;
- bufsize = 0x100000;
+ bufsize = 0x200000;
iov_kunit_iter_to_sg_init(test, bufsize, false, &data);
folioq = iov_kunit_create_folioq(test);
@@ -1190,7 +1190,7 @@ static void __init iov_kunit_iter_to_sg_xarray(struct kunit *test)
struct iov_iter iter;
size_t bufsize;
- bufsize = 0x100000;
+ bufsize = 0x200000;
iov_kunit_iter_to_sg_init(test, bufsize, false, &data);
xarray = iov_kunit_create_xarray(test);
@@ -1206,7 +1206,7 @@ static void __init iov_kunit_iter_to_sg_ubuf(struct kunit *test)
struct iov_iter iter;
size_t bufsize;
- bufsize = 0x100000;
+ bufsize = 0x200000;
iov_kunit_iter_to_sg_init(test, bufsize, true, &data);
iov_iter_ubuf(&iter, READ, data.ubuf, bufsize);
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 245d63808411..04746cbb3327 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -2594,6 +2594,7 @@ static int damon_sysfs_memcg_path_to_id(char *memcg_path, u64 *id)
if (damon_sysfs_memcg_path_eq(memcg, path, memcg_path)) {
*id = mem_cgroup_id(memcg);
found = true;
+ mem_cgroup_iter_break(NULL, memcg);
break;
}
}
diff --git a/mm/memory.c b/mm/memory.c
index ea6568571131..86a973119bd4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -612,6 +612,21 @@ static void print_bad_page_map(struct vm_area_struct *vma,
dump_stack();
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
+
+static inline bool pgtable_level_has_pxx_special(enum pgtable_level level)
+{
+ switch (level) {
+ case PGTABLE_LEVEL_PTE:
+ return IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL);
+ case PGTABLE_LEVEL_PMD:
+ return IS_ENABLED(CONFIG_ARCH_SUPPORTS_PMD_PFNMAP);
+ case PGTABLE_LEVEL_PUD:
+ return IS_ENABLED(CONFIG_ARCH_SUPPORTS_PUD_PFNMAP);
+ default:
+ return false;
+ }
+}
+
#define print_bad_pte(vma, addr, pte, page) \
print_bad_page_map(vma, addr, pte_val(pte), page, PGTABLE_LEVEL_PTE)
@@ -684,7 +699,7 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, bool special,
unsigned long long entry, enum pgtable_level level)
{
- if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
+ if (pgtable_level_has_pxx_special(level)) {
if (unlikely(special)) {
#ifdef CONFIG_FIND_NORMAL_PAGE
if (vma->vm_ops && vma->vm_ops->find_normal_page)
@@ -699,8 +714,9 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma,
return NULL;
}
/*
- * With CONFIG_ARCH_HAS_PTE_SPECIAL, any special page table
- * mappings (incl. shared zero folios) are marked accordingly.
+ * With working pte_special()/pmd_special()..., any special page
+ * table mappings (incl. shared zero folios) are marked
+ * accordingly.
*/
} else {
if (unlikely(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) {
@@ -1739,7 +1755,7 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb,
* consider uffd-wp bit when zap. For more information,
* see zap_install_uffd_wp_if_needed().
*/
- WARN_ON_ONCE(!vma_is_anonymous(vma));
+ WARN_ON_ONCE(!folio_test_anon(folio));
rss[mm_counter(folio)]--;
folio_remove_rmap_pte(folio, page, vma);
folio_put(folio);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2a943ec57c85..40c7915dabe0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1422,6 +1422,8 @@ static void remove_memory_blocks_and_altmaps(u64 start, u64 size)
altmap = mem->altmap;
mem->altmap = NULL;
+ /* drop the ref. we got via find_memory_block() */
+ put_device(&mem->dev);
remove_memory_block_devices(cur_start, memblock_size);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index fbfe5715f635..ab49d4dcdb60 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -850,7 +850,7 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
ptl = pmd_lock(vma->vm_mm, pmdp);
csa_ret = check_stable_address_space(vma->vm_mm);
if (csa_ret)
- goto abort;
+ goto unlock_abort;
/*
* Check for userfaultfd but do not deliver the fault. Instead,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 227d58dc3de6..23c7298d3be2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1808,9 +1808,9 @@ static inline bool should_skip_init(gfp_t flags)
inline void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
+ const bool zero_tags = gfp_flags & __GFP_ZEROTAGS;
bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) &&
!should_skip_init(gfp_flags);
- bool zero_tags = init && (gfp_flags & __GFP_ZEROTAGS);
int i;
set_page_private(page, 0);
@@ -1832,11 +1832,11 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
*/
/*
- * If memory tags should be zeroed
- * (which happens only when memory should be initialized as well).
+ * Clearing tags can efficiently clear the memory for us as well, if
+ * required.
*/
if (zero_tags)
- init = !tag_clear_highpages(page, 1 << order);
+ init = tag_clear_highpages(page, 1 << order, /* clear_pages= */init);
if (!should_skip_kasan_unpoison(gfp_flags) &&
kasan_unpoison_pages(page, order, init)) {
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index e7315c01a299..30493ea3c010 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -542,6 +542,11 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
struct ddpehdr *ddp = (struct ddpehdr *)skb->data;
int ft = 2;
+ if (!at) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
/*
* Compressible ?
*
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 74ef7dc2b2f9..b8b1b997960a 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -224,6 +224,8 @@ static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
hard_iface->bat_iv.ogm_buff = NULL;
mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+ cancel_delayed_work_sync(&hard_iface->bat_iv.reschedule_work);
}
static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
@@ -536,8 +538,10 @@ out:
* @if_incoming: interface where the packet was received
* @if_outgoing: interface for which the retransmission should be considered
* @own_packet: true if it is a self-generated ogm
+ *
+ * Return: whether forward packet was scheduled
*/
-static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
+static bool batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
int packet_len, unsigned long send_time,
bool direct_link,
struct batadv_hard_iface *if_incoming,
@@ -561,13 +565,13 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
skb = netdev_alloc_skb_ip_align(NULL, skb_size);
if (!skb)
- return;
+ return false;
forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
queue_left, bat_priv, skb);
if (!forw_packet_aggr) {
kfree_skb(skb);
- return;
+ return false;
}
forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
@@ -590,6 +594,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
batadv_iv_send_outstanding_bat_ogm_packet);
batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time);
+
+ return true;
}
/* aggregate a new packet into the existing ogm packet */
@@ -617,8 +623,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
* @if_outgoing: interface for which the retransmission should be considered
* @own_packet: true if it is a self-generated ogm
* @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * Return: whether forward packet was scheduled
*/
-static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
+static bool batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
unsigned char *packet_buff,
int packet_len,
struct batadv_hard_iface *if_incoming,
@@ -670,14 +678,16 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
if (!own_packet && atomic_read(&bat_priv->aggregated_ogms))
send_time += max_aggregation_jiffies;
- batadv_iv_ogm_aggregate_new(packet_buff, packet_len,
- send_time, direct_link,
- if_incoming, if_outgoing,
- own_packet);
+ return batadv_iv_ogm_aggregate_new(packet_buff, packet_len,
+ send_time, direct_link,
+ if_incoming, if_outgoing,
+ own_packet);
} else {
batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff,
packet_len, direct_link);
spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+ return true;
}
}
@@ -790,6 +800,9 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
u32 seqno;
u16 tvlv_len = 0;
unsigned long send_time;
+ bool reschedule = false;
+ bool scheduled;
+ int ret;
lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
@@ -813,9 +826,15 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
* appended as it may alter the tt tvlv container
*/
batadv_tt_local_commit_changes(bat_priv);
- tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
- ogm_buff_len,
- BATADV_OGM_HLEN);
+ ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
+ ogm_buff_len,
+ BATADV_OGM_HLEN);
+ if (ret < 0) {
+ reschedule = true;
+ goto out;
+ }
+
+ tvlv_len = ret;
}
batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff);
@@ -834,8 +853,11 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
/* OGMs from secondary interfaces are only scheduled on their
* respective interfaces.
*/
- batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len,
- hard_iface, hard_iface, 1, send_time);
+ scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len,
+ hard_iface, hard_iface, 1, send_time);
+ if (!scheduled)
+ reschedule = true;
+
goto out;
}
@@ -847,15 +869,28 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
if (!kref_get_unless_zero(&tmp_hard_iface->refcount))
continue;
- batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
- *ogm_buff_len, hard_iface,
- tmp_hard_iface, 1, send_time);
-
+ scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
+ *ogm_buff_len, hard_iface,
+ tmp_hard_iface, 1, send_time);
batadv_hardif_put(tmp_hard_iface);
+
+ if (!scheduled && tmp_hard_iface == hard_iface)
+ reschedule = true;
}
rcu_read_unlock();
out:
+ if (reschedule) {
+ /* there was a failure scheduling the own forward packet.
+ * as result, the batadv_iv_send_outstanding_bat_ogm_packet()
+ * work item is no longer scheduled. it is therefore necessary
+ * to reschedule it manually
+ */
+ queue_delayed_work(batadv_event_workqueue,
+ &hard_iface->bat_iv.reschedule_work,
+ msecs_to_jiffies(atomic_read(&bat_priv->orig_interval)));
+ }
+
batadv_hardif_put(primary_if);
}
@@ -870,6 +905,17 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
+static void batadv_iv_ogm_reschedule(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct batadv_hard_iface *hard_iface;
+
+ hard_iface = container_of(delayed_work,
+ struct batadv_hard_iface,
+ bat_iv.reschedule_work);
+ batadv_iv_ogm_schedule(hard_iface);
+}
+
/**
* batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface
* @orig_node: originator which reproadcasted the OGMs directly
@@ -2262,6 +2308,8 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface)
{
+ INIT_DELAYED_WORK(&hard_iface->bat_iv.reschedule_work, batadv_iv_ogm_reschedule);
+
/* begin scheduling originator messages on that interface */
batadv_iv_ogm_schedule(hard_iface);
}
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index e3870492dab7..d66ca77b1aaa 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -113,14 +113,14 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
/**
* batadv_v_ogm_send_to_if() - send a batman ogm using a given interface
+ * @bat_priv: the bat priv with all the mesh interface information
* @skb: the OGM to send
* @hard_iface: the interface to use to send the OGM
*/
-static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
+static void batadv_v_ogm_send_to_if(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
struct batadv_hard_iface *hard_iface)
{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface);
-
if (hard_iface->if_status != BATADV_IF_ACTIVE) {
kfree_skb(skb);
return;
@@ -187,6 +187,7 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
/**
* batadv_v_ogm_aggr_send() - flush & send aggregation queue
+ * @bat_priv: the bat priv with all the mesh interface information
* @hard_iface: the interface with the aggregation queue to flush
*
* Aggregates all OGMv2 packets currently in the aggregation queue into a
@@ -196,7 +197,8 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
*
* Caller needs to hold the hard_iface->bat_v.aggr_list.lock.
*/
-static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
+static void batadv_v_ogm_aggr_send(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface)
{
unsigned int aggr_len = hard_iface->bat_v.aggr_len;
struct sk_buff *skb_aggr;
@@ -226,27 +228,32 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
consume_skb(skb);
}
- batadv_v_ogm_send_to_if(skb_aggr, hard_iface);
+ batadv_v_ogm_send_to_if(bat_priv, skb_aggr, hard_iface);
}
/**
* batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface
+ * @bat_priv: the bat priv with all the mesh interface information
* @skb: the OGM to queue
* @hard_iface: the interface to queue the OGM on
*/
-static void batadv_v_ogm_queue_on_if(struct sk_buff *skb,
+static void batadv_v_ogm_queue_on_if(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
struct batadv_hard_iface *hard_iface)
{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface);
+ if (hard_iface->mesh_iface != bat_priv->mesh_iface) {
+ kfree_skb(skb);
+ return;
+ }
if (!atomic_read(&bat_priv->aggregated_ogms)) {
- batadv_v_ogm_send_to_if(skb, hard_iface);
+ batadv_v_ogm_send_to_if(bat_priv, skb, hard_iface);
return;
}
spin_lock_bh(&hard_iface->bat_v.aggr_list.lock);
if (!batadv_v_ogm_queue_left(skb, hard_iface))
- batadv_v_ogm_aggr_send(hard_iface);
+ batadv_v_ogm_aggr_send(bat_priv, hard_iface);
hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb);
__skb_queue_tail(&hard_iface->bat_v.aggr_list, skb);
@@ -262,10 +269,10 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
struct batadv_hard_iface *hard_iface;
struct batadv_ogm2_packet *ogm_packet;
struct sk_buff *skb, *skb_tmp;
- unsigned char *ogm_buff;
+ unsigned char **ogm_buff;
struct list_head *iter;
- int ogm_buff_len;
- u16 tvlv_len = 0;
+ int *ogm_buff_len;
+ u16 tvlv_len;
int ret;
lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex);
@@ -273,25 +280,27 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
goto out;
- ogm_buff = bat_priv->bat_v.ogm_buff;
- ogm_buff_len = bat_priv->bat_v.ogm_buff_len;
+ ogm_buff = &bat_priv->bat_v.ogm_buff;
+ ogm_buff_len = &bat_priv->bat_v.ogm_buff_len;
+
/* tt changes have to be committed before the tvlv data is
* appended as it may alter the tt tvlv container
*/
batadv_tt_local_commit_changes(bat_priv);
- tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff,
- &ogm_buff_len,
- BATADV_OGM2_HLEN);
+ ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
+ ogm_buff_len,
+ BATADV_OGM2_HLEN);
+ if (ret < 0)
+ goto reschedule;
- bat_priv->bat_v.ogm_buff = ogm_buff;
- bat_priv->bat_v.ogm_buff_len = ogm_buff_len;
+ tvlv_len = ret;
- skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len);
+ skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + *ogm_buff_len);
if (!skb)
goto reschedule;
skb_reserve(skb, ETH_HLEN);
- skb_put_data(skb, ogm_buff, ogm_buff_len);
+ skb_put_data(skb, *ogm_buff, *ogm_buff_len);
ogm_packet = (struct batadv_ogm2_packet *)skb->data;
ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
@@ -343,7 +352,7 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
break;
}
- batadv_v_ogm_queue_on_if(skb_tmp, hard_iface);
+ batadv_v_ogm_queue_on_if(bat_priv, skb_tmp, hard_iface);
batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
@@ -383,12 +392,14 @@ void batadv_v_ogm_aggr_work(struct work_struct *work)
{
struct batadv_hard_iface_bat_v *batv;
struct batadv_hard_iface *hard_iface;
+ struct batadv_priv *bat_priv;
batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work);
hard_iface = container_of(batv, struct batadv_hard_iface, bat_v);
+ bat_priv = netdev_priv(hard_iface->mesh_iface);
spin_lock_bh(&hard_iface->bat_v.aggr_list.lock);
- batadv_v_ogm_aggr_send(hard_iface);
+ batadv_v_ogm_aggr_send(bat_priv, hard_iface);
spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock);
batadv_v_ogm_start_queue_timer(hard_iface);
@@ -578,7 +589,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
if_outgoing->net_dev->name, ntohl(ogm_forward->throughput),
ogm_forward->ttl, if_incoming->net_dev->name);
- batadv_v_ogm_queue_on_if(skb, if_outgoing);
+ batadv_v_ogm_queue_on_if(bat_priv, skb, if_outgoing);
out:
batadv_orig_ifinfo_put(orig_ifinfo);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index cec11f1251d6..ffe854018bd3 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -356,12 +356,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac,
sizeof(local_claim_dest));
local_claim_dest.type = claimtype;
- mesh_iface = primary_if->mesh_iface;
+ mesh_iface = READ_ONCE(primary_if->mesh_iface);
+ if (!mesh_iface)
+ goto out;
skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
/* IP DST: 0.0.0.0 */
zeroip,
- primary_if->mesh_iface,
+ mesh_iface,
/* IP SRC: 0.0.0.0 */
zeroip,
/* Ethernet DST: Broadcast */
@@ -514,8 +516,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig,
entry->crc = BATADV_BLA_CRC_INIT;
entry->bat_priv = bat_priv;
spin_lock_init(&entry->crc_lock);
- atomic_set(&entry->request_sent, 0);
- atomic_set(&entry->wait_periods, 0);
+ entry->state = BATADV_BLA_BACKBONE_GW_SYNCED;
+ entry->wait_periods = 0;
ether_addr_copy(entry->orig, orig);
INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
kref_init(&entry->refcount);
@@ -544,9 +546,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig,
batadv_bla_send_announce(bat_priv, entry);
/* this will be decreased in the worker thread */
- atomic_inc(&entry->request_sent);
- atomic_set(&entry->wait_periods, BATADV_BLA_WAIT_PERIODS);
- atomic_inc(&bat_priv->bla.num_requests);
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (entry->state == BATADV_BLA_BACKBONE_GW_SYNCED) {
+ entry->state = BATADV_BLA_BACKBONE_GW_UNSYNCED;
+ entry->wait_periods = BATADV_BLA_WAIT_PERIODS;
+ atomic_inc(&bat_priv->bla.num_requests);
+ }
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
}
return entry;
@@ -649,10 +655,12 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
backbone_gw->vid, BATADV_CLAIM_TYPE_REQUEST);
/* no local broadcasts should be sent or received, for now. */
- if (!atomic_read(&backbone_gw->request_sent)) {
+ spin_lock_bh(&backbone_gw->bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_SYNCED) {
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_UNSYNCED;
atomic_inc(&backbone_gw->bat_priv->bla.num_requests);
- atomic_set(&backbone_gw->request_sent, 1);
}
+ spin_unlock_bh(&backbone_gw->bat_priv->bla.num_requests_lock);
}
/**
@@ -873,10 +881,12 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
/* if we have sent a request and the crc was OK,
* we can allow traffic again.
*/
- if (atomic_read(&backbone_gw->request_sent)) {
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) {
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED;
atomic_dec(&backbone_gw->bat_priv->bla.num_requests);
- atomic_set(&backbone_gw->request_sent, 0);
}
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
}
batadv_backbone_gw_put(backbone_gw);
@@ -1224,6 +1234,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
struct hlist_head *head;
struct batadv_hashtable *hash;
spinlock_t *list_lock; /* protects write access to the hash lists */
+ bool purged;
int i;
hash = bat_priv->bla.backbone_hash;
@@ -1234,30 +1245,49 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
head = &hash->table[i];
list_lock = &hash->list_locks[i];
- spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(backbone_gw, node_tmp,
- head, hash_entry) {
- if (now)
- goto purge_now;
- if (!batadv_has_timed_out(backbone_gw->lasttime,
- BATADV_BLA_BACKBONE_TIMEOUT))
- continue;
+ do {
+ purged = false;
+
+ spin_lock_bh(list_lock);
+ hlist_for_each_entry_safe(backbone_gw, node_tmp,
+ head, hash_entry) {
+ if (now)
+ goto purge_now;
+ if (!batadv_has_timed_out(backbone_gw->lasttime,
+ BATADV_BLA_BACKBONE_TIMEOUT))
+ continue;
- batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
- "%s(): backbone gw %pM timed out\n",
- __func__, backbone_gw->orig);
+ batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
+ "%s(): backbone gw %pM timed out\n",
+ __func__, backbone_gw->orig);
purge_now:
- /* don't wait for the pending request anymore */
- if (atomic_read(&backbone_gw->request_sent))
- atomic_dec(&bat_priv->bla.num_requests);
+ purged = true;
- batadv_bla_del_backbone_claims(backbone_gw);
+ /* don't wait for the pending request anymore */
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED)
+ atomic_dec(&bat_priv->bla.num_requests);
- hlist_del_rcu(&backbone_gw->hash_entry);
- batadv_backbone_gw_put(backbone_gw);
- }
- spin_unlock_bh(list_lock);
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_STOPPED;
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
+
+ batadv_bla_del_backbone_claims(backbone_gw);
+
+ hlist_del_rcu(&backbone_gw->hash_entry);
+ break;
+ }
+ spin_unlock_bh(list_lock);
+
+ if (purged) {
+ /* reference for pending report_work */
+ if (cancel_work_sync(&backbone_gw->report_work))
+ batadv_backbone_gw_put(backbone_gw);
+
+ /* reference for hash_entry */
+ batadv_backbone_gw_put(backbone_gw);
+ }
+ } while (purged);
}
}
@@ -1492,7 +1522,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
batadv_bla_send_loopdetect(bat_priv,
backbone_gw);
- /* request_sent is only set after creation to avoid
+ /* state is only set to unsynced after creation to avoid
* problems when we are not yet known as backbone gw
* in the backbone.
*
@@ -1501,14 +1531,21 @@ static void batadv_bla_periodic_work(struct work_struct *work)
* some grace time.
*/
- if (atomic_read(&backbone_gw->request_sent) == 0)
- continue;
+ spin_lock_bh(&bat_priv->bla.num_requests_lock);
+ if (backbone_gw->state != BATADV_BLA_BACKBONE_GW_UNSYNCED)
+ goto unlock_next;
- if (!atomic_dec_and_test(&backbone_gw->wait_periods))
- continue;
+ if (backbone_gw->wait_periods > 0)
+ backbone_gw->wait_periods--;
+ if (backbone_gw->wait_periods > 0)
+ goto unlock_next;
+
+ backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED;
atomic_dec(&backbone_gw->bat_priv->bla.num_requests);
- atomic_set(&backbone_gw->request_sent, 0);
+
+unlock_next:
+ spin_unlock_bh(&bat_priv->bla.num_requests_lock);
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 3efc4cf50b46..0a8bd95e2f99 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -696,6 +696,9 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv,
goto free_orig;
tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
+ if (!tmp_skb)
+ goto free_neigh;
+
if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
cand[i].orig_node,
packet_subtype)) {
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index f4e45cc25816..e9553db42349 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -17,6 +17,7 @@
#include <linux/lockdep.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
+#include <linux/overflow.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -80,9 +81,9 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
*
* Return: the maximum size of payload that can be fragmented.
*/
-static int batadv_frag_size_limit(void)
+static size_t batadv_frag_size_limit(void)
{
- int limit = BATADV_FRAG_MAX_FRAG_SIZE;
+ size_t limit = BATADV_FRAG_MAX_FRAG_SIZE;
limit -= sizeof(struct batadv_frag_packet);
limit *= BATADV_FRAG_MAX_FRAGMENTS;
@@ -143,7 +144,9 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
struct batadv_frag_packet *frag_packet;
u8 bucket;
u16 seqno, hdr_size = sizeof(struct batadv_frag_packet);
+ bool overflow = false;
bool ret = false;
+ size_t data_len;
/* Linearize packet to avoid linearizing 16 packets in a row when doing
* the later merge. Non-linear merge should be added to remove this
@@ -153,6 +156,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
goto err;
frag_packet = (struct batadv_frag_packet *)skb->data;
+ data_len = skb->len - hdr_size;
seqno = ntohs(frag_packet->seqno);
bucket = seqno % BATADV_FRAG_BUFFER_COUNT;
@@ -171,7 +175,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
spin_lock_bh(&chain->lock);
if (batadv_frag_init_chain(chain, seqno)) {
hlist_add_head(&frag_entry_new->list, &chain->fragment_list);
- chain->size = skb->len - hdr_size;
+ chain->size = data_len;
chain->timestamp = jiffies;
chain->total_size = ntohs(frag_packet->total_size);
ret = true;
@@ -188,7 +192,11 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
if (frag_entry_curr->no < frag_entry_new->no) {
hlist_add_before(&frag_entry_new->list,
&frag_entry_curr->list);
- chain->size += skb->len - hdr_size;
+
+ if (check_add_overflow(chain->size, data_len,
+ &chain->size))
+ overflow = true;
+
chain->timestamp = jiffies;
ret = true;
goto out;
@@ -201,13 +209,16 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
/* Reached the end of the list, so insert after 'frag_entry_last'. */
if (likely(frag_entry_last)) {
hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list);
- chain->size += skb->len - hdr_size;
+
+ if (check_add_overflow(chain->size, data_len, &chain->size))
+ overflow = true;
+
chain->timestamp = jiffies;
ret = true;
}
out:
- if (chain->size > batadv_frag_size_limit() ||
+ if (overflow || chain->size > batadv_frag_size_limit() ||
chain->total_size != ntohs(frag_packet->total_size) ||
chain->total_size > batadv_frag_size_limit()) {
/* Clear chain if total size of either the list or the packet
@@ -294,6 +305,31 @@ free:
}
/**
+ * batadv_skb_is_frag() - check if newly merged skb contains unicast fragment
+ * @skb: newly merged skb
+ *
+ * Return: if newly merged skb is of type BATADV_UNICAST_FRAG
+ */
+static bool batadv_skb_is_frag(struct sk_buff *skb)
+{
+ struct batadv_ogm_packet *batadv_ogm_packet;
+
+ /* packet should hold at least type and version */
+ if (unlikely(!pskb_may_pull(skb, 2)))
+ return false;
+
+ batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data;
+
+ if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION)
+ return false;
+
+ if (batadv_ogm_packet->packet_type != BATADV_UNICAST_FRAG)
+ return false;
+
+ return true;
+}
+
+/**
* batadv_frag_skb_buffer() - buffer fragment for later merge
* @skb: skb to buffer
* @orig_node_src: originator that the skb is received from
@@ -326,6 +362,16 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
if (!skb_out)
goto out_err;
+ /* fragment in fragment is not allowed. otherwise it is possible
+ * to exhaust the stack when receiving a matryoshka-style
+ * "fragments in a fragment packet"
+ */
+ if (batadv_skb_is_frag(skb_out)) {
+ kfree_skb(skb_out);
+ skb_out = NULL;
+ goto out_err;
+ }
+
out:
ret = true;
out_err:
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 51e9c081a2a4..a9d0346e8332 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -478,10 +478,14 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
*/
void batadv_gw_node_free(struct batadv_priv *bat_priv)
{
+ struct batadv_gw_node *curr_gw;
struct batadv_gw_node *gw_node;
struct hlist_node *node_tmp;
spin_lock_bh(&bat_priv->gw.list_lock);
+ curr_gw = rcu_replace_pointer(bat_priv->gw.curr_gw, NULL, true);
+ batadv_gw_node_put(curr_gw);
+
hlist_for_each_entry_safe(gw_node, node_tmp,
&bat_priv->gw.gateway_list, list) {
hlist_del_init_rcu(&gw_node->list);
diff --git a/net/batman-adv/mesh-interface.c b/net/batman-adv/mesh-interface.c
index 56ca1c1b83f2..e7aa45bc6b7a 100644
--- a/net/batman-adv/mesh-interface.c
+++ b/net/batman-adv/mesh-interface.c
@@ -787,6 +787,7 @@ static int batadv_meshif_init_late(struct net_device *dev)
atomic_set(&bat_priv->tt.ogm_append_cnt, 0);
#ifdef CONFIG_BATMAN_ADV_BLA
atomic_set(&bat_priv->bla.num_requests, 0);
+ spin_lock_init(&bat_priv->bla.num_requests_lock);
#endif
atomic_set(&bat_priv->tp_num, 0);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index b3468ccab535..ad4921b659d9 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -835,8 +835,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
orig_node = container_of(rcu, struct batadv_orig_node, rcu);
- batadv_mcast_purge_orig(orig_node);
-
batadv_frag_purge_orig(orig_node, NULL);
kfree(orig_node->tt_buff);
@@ -887,6 +885,8 @@ void batadv_orig_node_release(struct kref *ref)
}
spin_unlock_bh(&orig_node->vlan_list_lock);
+ batadv_mcast_purge_orig(orig_node);
+
call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 066c76113fc4..0fc4ca78e84e 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -8,6 +8,7 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bug.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
@@ -254,6 +255,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
* batadv_tp_list_find() - find a tp_vars object in the global list
* @bat_priv: the bat priv with all the mesh interface information
* @dst: the other endpoint MAC address to look for
+ * @role: role of the session
*
* Look for a tp_vars object matching dst as end_point and return it after
* having increment the refcounter. Return NULL is not found
@@ -261,7 +263,8 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
* Return: matching tp_vars or NULL when no tp_vars with @dst was found
*/
static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
- const u8 *dst)
+ const u8 *dst,
+ enum batadv_tp_meter_role role)
{
struct batadv_tp_vars *pos, *tp_vars = NULL;
@@ -270,6 +273,9 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
if (!batadv_compare_eth(pos->other_end, dst))
continue;
+ if (pos->role != role)
+ continue;
+
/* most of the time this function is invoked during the normal
* process..it makes sens to pay more when the session is
* finished and to speed the process up during the measurement
@@ -286,11 +292,32 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
}
/**
+ * batadv_tp_list_active() - check if session from/to destination is ongoing
+ * @bat_priv: the bat priv with all the mesh interface information
+ * @dst: the other endpoint MAC address to look for
+ *
+ * Return: if matching session with @dst was found
+ */
+static bool batadv_tp_list_active(struct batadv_priv *bat_priv, const u8 *dst)
+ __must_hold(&bat_priv->tp_list_lock)
+{
+ struct batadv_tp_vars *tp_vars;
+
+ hlist_for_each_entry_rcu(tp_vars, &bat_priv->tp_list, list) {
+ if (batadv_compare_eth(tp_vars->other_end, dst))
+ return true;
+ }
+
+ return false;
+}
+
+/**
* batadv_tp_list_find_session() - find tp_vars session object in the global
* list
* @bat_priv: the bat priv with all the mesh interface information
* @dst: the other endpoint MAC address to look for
* @session: session identifier
+ * @role: role of the session
*
* Look for a tp_vars object matching dst as end_point, session as tp meter
* session and return it after having increment the refcounter. Return NULL
@@ -300,7 +327,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
*/
static struct batadv_tp_vars *
batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
- const u8 *session)
+ const u8 *session, enum batadv_tp_meter_role role)
{
struct batadv_tp_vars *pos, *tp_vars = NULL;
@@ -312,6 +339,9 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
if (memcmp(pos->session, session, sizeof(pos->session)) != 0)
continue;
+ if (pos->role != role)
+ continue;
+
/* most of the time this function is invoked during the normal
* process..it makes sense to pay more when the session is
* finished and to speed the process up during the measurement
@@ -400,13 +430,7 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars)
batadv_tp_list_detach(tp_vars);
/* kill the timer and remove its reference */
- timer_delete_sync(&tp_vars->timer);
- /* the worker might have rearmed itself therefore we kill it again. Note
- * that if the worker should run again before invoking the following
- * timer_delete(), it would not re-arm itself once again because the status
- * is OFF now
- */
- timer_delete(&tp_vars->timer);
+ timer_shutdown_sync(&tp_vars->timer);
batadv_tp_vars_put(tp_vars);
}
@@ -418,11 +442,14 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars)
static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
struct batadv_tp_vars *tp_vars)
{
+ enum batadv_tp_meter_reason reason;
u32 session_cookie;
+ reason = atomic_read(&tp_vars->send_result);
+
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Test towards %pM finished..shutting down (reason=%d)\n",
- tp_vars->other_end, tp_vars->reason);
+ tp_vars->other_end, reason);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n",
@@ -435,7 +462,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
session_cookie = batadv_tp_session_cookie(tp_vars->session,
tp_vars->icmp_uid);
- batadv_tp_batctl_notify(tp_vars->reason,
+ batadv_tp_batctl_notify(reason,
tp_vars->other_end,
bat_priv,
tp_vars->start_time,
@@ -451,10 +478,18 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
enum batadv_tp_meter_reason reason)
{
- if (!atomic_dec_and_test(&tp_vars->sending))
- return;
+ atomic_cmpxchg(&tp_vars->send_result, 0, reason);
+}
- tp_vars->reason = reason;
+/**
+ * batadv_tp_sender_stopped() - check if tp session was stopped with reason
+ * @tp_vars: the private data of the current TP meter session
+ *
+ * Return: whether stop reason was found
+ */
+static bool batadv_tp_sender_stopped(struct batadv_tp_vars *tp_vars)
+{
+ return atomic_read(&tp_vars->send_result) != 0;
}
/**
@@ -484,7 +519,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
/* most of the time this function is invoked while normal packet
* reception...
*/
- if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ if (unlikely(batadv_tp_sender_stopped(tp_vars)))
/* timer ref will be dropped in batadv_tp_sender_cleanup */
return;
@@ -504,7 +539,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t)
struct batadv_tp_vars *tp_vars = timer_container_of(tp_vars, t, timer);
struct batadv_priv *bat_priv = tp_vars->bat_priv;
- if (atomic_read(&tp_vars->sending) == 0)
+ if (batadv_tp_sender_stopped(tp_vars))
return;
/* if the user waited long enough...shutdown the test */
@@ -659,11 +694,11 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv,
/* find the tp_vars */
tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
- icmp->session);
+ icmp->session, BATADV_TP_SENDER);
if (unlikely(!tp_vars))
return;
- if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ if (unlikely(batadv_tp_sender_stopped(tp_vars)))
goto out;
/* old ACK? silently drop it.. */
@@ -829,21 +864,21 @@ static int batadv_tp_send(void *arg)
if (unlikely(tp_vars->role != BATADV_TP_SENDER)) {
err = BATADV_TP_REASON_DST_UNREACHABLE;
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
goto out;
}
orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end);
if (unlikely(!orig_node)) {
err = BATADV_TP_REASON_DST_UNREACHABLE;
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
goto out;
}
primary_if = batadv_primary_if_get_selected(bat_priv);
if (unlikely(!primary_if)) {
err = BATADV_TP_REASON_DST_UNREACHABLE;
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
goto out;
}
@@ -862,7 +897,7 @@ static int batadv_tp_send(void *arg)
queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work,
msecs_to_jiffies(tp_vars->test_length));
- while (atomic_read(&tp_vars->sending) != 0) {
+ while (!batadv_tp_sender_stopped(tp_vars)) {
if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) {
batadv_tp_wait_available(tp_vars, payload_len);
continue;
@@ -885,8 +920,7 @@ static int batadv_tp_send(void *arg)
"Meter: %s() cannot send packets (%d)\n",
__func__, err);
/* ensure nobody else tries to stop the thread now */
- if (atomic_dec_and_test(&tp_vars->sending))
- tp_vars->reason = err;
+ batadv_tp_sender_shutdown(tp_vars, err);
break;
}
@@ -972,10 +1006,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
return;
}
- tp_vars = batadv_tp_list_find(bat_priv, dst);
- if (tp_vars) {
+ if (batadv_tp_list_active(bat_priv, dst)) {
spin_unlock_bh(&bat_priv->tp_list_lock);
- batadv_tp_vars_put(tp_vars);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Meter: test to or from the same node already ongoing, aborting\n");
batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING,
@@ -1008,7 +1040,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
ether_addr_copy(tp_vars->other_end, dst);
kref_init(&tp_vars->refcount);
tp_vars->role = BATADV_TP_SENDER;
- atomic_set(&tp_vars->sending, 1);
+ atomic_set(&tp_vars->send_result, 0);
memcpy(tp_vars->session, session_id, sizeof(session_id));
tp_vars->icmp_uid = icmp_uid;
@@ -1096,16 +1128,16 @@ void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
if (!orig_node)
return;
- tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig);
+ tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig, BATADV_TP_SENDER);
if (!tp_vars) {
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Meter: trying to interrupt an already over connection\n");
- goto out;
+ goto out_put_orig_node;
}
batadv_tp_sender_shutdown(tp_vars, return_value);
batadv_tp_vars_put(tp_vars);
-out:
+out_put_orig_node:
batadv_orig_node_put(orig_node);
}
@@ -1156,6 +1188,9 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t)
spin_unlock_bh(&tp_vars->unacked_lock);
/* drop reference of timer */
+ if (WARN_ON(atomic_xchg(&tp_vars->receiving, 0) != 1))
+ return;
+
batadv_tp_vars_put(tp_vars);
}
@@ -1356,7 +1391,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv,
goto out_unlock;
tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
- icmp->session);
+ icmp->session, BATADV_TP_RECEIVER);
if (tp_vars)
goto out_unlock;
@@ -1374,6 +1409,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv,
ether_addr_copy(tp_vars->other_end, icmp->orig);
tp_vars->role = BATADV_TP_RECEIVER;
+ atomic_set(&tp_vars->receiving, 1);
memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session));
tp_vars->last_recv = BATADV_TP_FIRST_SEQ;
tp_vars->bat_priv = bat_priv;
@@ -1426,7 +1462,7 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
}
} else {
tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
- icmp->session);
+ icmp->session, BATADV_TP_RECEIVER);
if (!tp_vars) {
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Unexpected packet from %pM!\n",
@@ -1435,13 +1471,6 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
}
}
- if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) {
- batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: dropping packet: not expected (role=%u)\n",
- tp_vars->role);
- goto out;
- }
-
tp_vars->last_recv_time = jiffies;
/* if the packet is a duplicate, it may be the case that an ACK has been
@@ -1546,8 +1575,12 @@ void batadv_tp_stop_all(struct batadv_priv *bat_priv)
break;
case BATADV_TP_RECEIVER:
batadv_tp_list_detach(tp_var);
- if (timer_shutdown_sync(&tp_var->timer))
- batadv_tp_vars_put(tp_var);
+ timer_shutdown_sync(&tp_var->timer);
+
+ if (atomic_xchg(&tp_var->receiving, 0) != 1)
+ break;
+
+ batadv_tp_vars_put(tp_var);
break;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 05cddcf994f6..9f6e67771ffa 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -797,24 +797,33 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
s32 *tt_len)
{
u16 num_vlan = 0;
- u16 num_entries = 0;
u16 tvlv_len = 0;
unsigned int change_offset;
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_orig_node_vlan *vlan;
+ u16 total_entries = 0;
u8 *tt_change_ptr;
+ int vlan_entries;
+ u16 sum_entries;
spin_lock_bh(&orig_node->vlan_list_lock);
hlist_for_each_entry(vlan, &orig_node->vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+
+ if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) {
+ *tt_len = 0;
+ goto out;
+ }
+
+ total_entries = sum_entries;
num_vlan++;
- num_entries += atomic_read(&vlan->tt.num_entries);
}
change_offset = struct_size(*tt_data, vlan_data, num_vlan);
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
- *tt_len = batadv_tt_len(num_entries);
+ *tt_len = batadv_tt_len(total_entries);
if (change_offset > U16_MAX || *tt_len > U16_MAX - change_offset) {
*tt_len = 0;
@@ -835,14 +844,26 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
(*tt_data)->num_vlan = htons(num_vlan);
tt_vlan = (*tt_data)->vlan_data;
+ num_vlan = 0;
hlist_for_each_entry(vlan, &orig_node->vlan_list, list) {
+ vlan_entries = atomic_read(&vlan->tt.num_entries);
+ if (vlan_entries < 1)
+ continue;
+
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
tt_vlan->reserved = 0;
tt_vlan++;
+ num_vlan++;
}
+ /* recalculate in case number of VLANs reduced */
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
+ tvlv_len = *tt_len + change_offset;
+
+ (*tt_data)->num_vlan = htons(num_vlan);
+
tt_change_ptr = (u8 *)*tt_data + change_offset;
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
@@ -877,21 +898,25 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
{
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_meshif_vlan *vlan;
+ size_t change_offset;
u16 num_vlan = 0;
- u16 vlan_entries = 0;
u16 total_entries = 0;
u16 tvlv_len;
u8 *tt_change_ptr;
- int change_offset;
+ int vlan_entries;
+ u16 sum_entries;
spin_lock_bh(&bat_priv->meshif_vlan_list_lock);
hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) {
vlan_entries = atomic_read(&vlan->tt.num_entries);
- if (vlan_entries < 1)
- continue;
+ if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) {
+ tvlv_len = 0;
+ goto out;
+ }
+
+ total_entries = sum_entries;
num_vlan++;
- total_entries += vlan_entries;
}
change_offset = struct_size(*tt_data, vlan_data, num_vlan);
@@ -900,8 +925,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
if (*tt_len < 0)
*tt_len = batadv_tt_len(total_entries);
- tvlv_len = *tt_len;
- tvlv_len += change_offset;
+ if (check_add_overflow(*tt_len, change_offset, &tvlv_len)) {
+ tvlv_len = 0;
+ goto out;
+ }
*tt_data = kmalloc(tvlv_len, GFP_ATOMIC);
if (!*tt_data) {
@@ -914,6 +941,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
(*tt_data)->num_vlan = htons(num_vlan);
tt_vlan = (*tt_data)->vlan_data;
+ num_vlan = 0;
hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) {
vlan_entries = atomic_read(&vlan->tt.num_entries);
if (vlan_entries < 1)
@@ -924,8 +952,15 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
tt_vlan->reserved = 0;
tt_vlan++;
+ num_vlan++;
}
+ /* recalculate in case number of VLANs reduced */
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
+ tvlv_len = *tt_len + change_offset;
+
+ (*tt_data)->num_vlan = htons(num_vlan);
+
tt_change_ptr = (u8 *)*tt_data + change_offset;
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 8129a3f9c44d..cc6ac580c620 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -8,10 +8,12 @@
#include <linux/byteorder/generic.h>
#include <linux/container_of.h>
+#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/kref.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -159,10 +161,10 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
*
* Return: size of all currently registered tvlv containers in bytes.
*/
-static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+static size_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
{
struct batadv_tvlv_container *tvlv;
- u16 tvlv_len = 0;
+ size_t tvlv_len = 0;
lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
@@ -306,26 +308,35 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
* The ogm packet might be enlarged or shrunk depending on the current size
* and the size of the to-be-appended tvlv containers.
*
- * Return: size of all appended tvlv containers in bytes.
+ * Return: size of all appended tvlv containers in bytes (max U16_MAX), negative
+ * if operation failed
*/
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
unsigned char **packet_buff,
int *packet_buff_len, int packet_min_len)
{
struct batadv_tvlv_container *tvlv;
struct batadv_tvlv_hdr *tvlv_hdr;
- u16 tvlv_value_len;
+ size_t tvlv_value_len;
void *tvlv_value;
+ int tvlv_len_ret;
bool ret;
spin_lock_bh(&bat_priv->tvlv.container_list_lock);
tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
+ if (tvlv_value_len > U16_MAX) {
+ tvlv_len_ret = -E2BIG;
+ goto end;
+ }
ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
packet_min_len, tvlv_value_len);
-
- if (!ret)
+ if (!ret) {
+ tvlv_len_ret = -ENOMEM;
goto end;
+ }
+
+ tvlv_len_ret = tvlv_value_len;
if (!tvlv_value_len)
goto end;
@@ -344,7 +355,8 @@ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
end:
spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
- return tvlv_value_len;
+
+ return tvlv_len_ret;
}
/**
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index e5697230d991..f96f6b3f44a0 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -16,7 +16,7 @@
void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
u8 type, u8 version,
void *tvlv_value, u16 tvlv_value_len);
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
unsigned char **packet_buff,
int *packet_buff_len, int packet_min_len);
void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index daa06f421154..a01ee46d97f3 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -83,6 +83,9 @@ struct batadv_hard_iface_bat_iv {
/** @ogm_seqno: OGM sequence number - used to identify each OGM */
atomic_t ogm_seqno;
+ /** @reschedule_work: recover OGM schedule after schedule error */
+ struct delayed_work reschedule_work;
+
/** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
struct mutex ogm_buff_mutex;
};
@@ -301,7 +304,7 @@ struct batadv_frag_table_entry {
u16 seqno;
/** @size: accumulated size of packets in list */
- u16 size;
+ size_t size;
/** @total_size: expected size of the assembled packet */
u16 total_size;
@@ -452,7 +455,7 @@ struct batadv_orig_node {
* @tt_buff_len: length of the last tt changeset this node received
* from the orig node
*/
- s16 tt_buff_len;
+ u16 tt_buff_len;
/** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */
spinlock_t tt_buff_lock;
@@ -993,7 +996,7 @@ struct batadv_priv_tt {
* @last_changeset_len: length of last tt changeset this host has
* generated
*/
- s16 last_changeset_len;
+ u16 last_changeset_len;
/**
* @last_changeset_lock: lock protecting last_changeset &
@@ -1024,6 +1027,12 @@ struct batadv_priv_bla {
atomic_t num_requests;
/**
+ * @num_requests_lock: locks update num_requests +
+ * batadv_backbone_gw::state + batadv_backbone_gw::wait_periods update
+ */
+ spinlock_t num_requests_lock;
+
+ /**
* @claim_hash: hash table containing mesh nodes this host has claimed
*/
struct batadv_hashtable *claim_hash;
@@ -1320,11 +1329,14 @@ struct batadv_tp_vars {
/** @role: receiver/sender modi */
enum batadv_tp_meter_role role;
- /** @sending: sending binary semaphore: 1 if sending, 0 is not */
- atomic_t sending;
+ /**
+ * @send_result: 0 when sending is ongoing and otherwise
+ * enum batadv_tp_meter_reason
+ */
+ atomic_t send_result;
- /** @reason: reason for a stopped session */
- enum batadv_tp_meter_reason reason;
+ /** @receiving: receiving binary semaphore: 1 if receiving, 0 is not */
+ atomic_t receiving;
/** @finish_work: work item for the finishing procedure */
struct delayed_work finish_work;
@@ -1666,6 +1678,27 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_BLA
+enum batadv_bla_backbone_gw_state {
+ /**
+ * @BATADV_BLA_BACKBONE_GW_STOPPED: backbone gw is being removed
+ * and it must not longer work on requests
+ */
+ BATADV_BLA_BACKBONE_GW_STOPPED,
+
+ /**
+ * @BATADV_BLA_BACKBONE_GW_UNSYNCED: backbone was detected out
+ * of sync and a request was send. No traffic is forwarded until the
+ * situation is resolved
+ */
+ BATADV_BLA_BACKBONE_GW_UNSYNCED,
+
+ /**
+ * @BATADV_BLA_BACKBONE_GW_SYNCED: backbone is consider to be in
+ * sync. traffic can be forwarded
+ */
+ BATADV_BLA_BACKBONE_GW_SYNCED,
+};
+
/**
* struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN
*/
@@ -1691,16 +1724,12 @@ struct batadv_bla_backbone_gw {
/**
* @wait_periods: grace time for bridge forward delays and bla group
* forming at bootup phase - no bcast traffic is formwared until it has
- * elapsed
+ * elapsed. Must only be access with num_requests_lock.
*/
- atomic_t wait_periods;
+ u8 wait_periods;
- /**
- * @request_sent: if this bool is set to true we are out of sync with
- * this backbone gateway - no bcast traffic is formwared until the
- * situation was resolved
- */
- atomic_t request_sent;
+ /** @state: sync state. Must only be access with num_requests_lock. */
+ enum batadv_bla_backbone_gw_state state;
/** @crc: crc16 checksum over all claims */
u16 crc;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 33d053d63407..1a6aa3f8d4d6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -154,6 +154,7 @@ struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+ spin_lock_init(&bt_sk(sk)->accept_q_lock);
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -214,6 +215,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
const struct cred *old_cred;
struct pid *old_pid;
+ struct bt_sock *par = bt_sk(parent);
BT_DBG("parent %p, sk %p", parent, sk);
@@ -224,9 +226,13 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
else
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ spin_lock_bh(&par->accept_q_lock);
+ list_add_tail(&bt_sk(sk)->accept_q, &par->accept_q);
+ sk_acceptq_added(parent);
+ spin_unlock_bh(&par->accept_q_lock);
+
/* Copy credentials from parent since for incoming connections the
* socket is allocated by the kernel.
*/
@@ -244,8 +250,6 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
bh_unlock_sock(sk);
else
release_sock(sk);
-
- sk_acceptq_added(parent);
}
EXPORT_SYMBOL(bt_accept_enqueue);
@@ -254,45 +258,72 @@ EXPORT_SYMBOL(bt_accept_enqueue);
*/
void bt_accept_unlink(struct sock *sk)
{
+ struct sock *parent = bt_sk(sk)->parent;
+
BT_DBG("sk %p state %d", sk, sk->sk_state);
+ spin_lock_bh(&bt_sk(parent)->accept_q_lock);
list_del_init(&bt_sk(sk)->accept_q);
- sk_acceptq_removed(bt_sk(sk)->parent);
+ sk_acceptq_removed(parent);
+ spin_unlock_bh(&bt_sk(parent)->accept_q_lock);
bt_sk(sk)->parent = NULL;
sock_put(sk);
}
EXPORT_SYMBOL(bt_accept_unlink);
+static struct sock *bt_accept_get(struct sock *parent, struct sock *sk)
+{
+ struct bt_sock *bt = bt_sk(parent);
+ struct sock *next = NULL;
+
+ /* accept_q is modified from child teardown paths too, so take a
+ * temporary reference before dropping the queue lock.
+ */
+ spin_lock_bh(&bt->accept_q_lock);
+
+ if (sk) {
+ if (bt_sk(sk)->parent != parent)
+ goto out;
+
+ if (!list_is_last(&bt_sk(sk)->accept_q, &bt->accept_q)) {
+ next = &list_next_entry(bt_sk(sk), accept_q)->sk;
+ sock_hold(next);
+ }
+ } else if (!list_empty(&bt->accept_q)) {
+ next = &list_first_entry(&bt->accept_q,
+ struct bt_sock, accept_q)->sk;
+ sock_hold(next);
+ }
+
+out:
+ spin_unlock_bh(&bt->accept_q_lock);
+ return next;
+}
+
struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
{
- struct bt_sock *s, *n;
- struct sock *sk;
+ struct sock *sk, *next;
BT_DBG("parent %p", parent);
restart:
- list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)s;
-
+ for (sk = bt_accept_get(parent, NULL); sk; sk = next) {
/* Prevent early freeing of sk due to unlink and sock_kill */
- sock_hold(sk);
lock_sock(sk);
/* Check sk has not already been unlinked via
* bt_accept_unlink() due to serialisation caused by sk locking
*/
- if (!bt_sk(sk)->parent) {
+ if (bt_sk(sk)->parent != parent) {
BT_DBG("sk %p, already unlinked", sk);
release_sock(sk);
sock_put(sk);
- /* Restart the loop as sk is no longer in the list
- * and also avoid a potential infinite loop because
- * list_for_each_entry_safe() is not thread safe.
- */
goto restart;
}
+ next = bt_accept_get(parent, sk);
+
/* sk is safely in the parent list so reduce reference count */
sock_put(sk);
@@ -309,7 +340,19 @@ restart:
if (newsock)
sock_graft(sk, newsock);
+ /* Hand the caller a reference taken while sk is
+ * still locked. bt_accept_unlink() just dropped
+ * the accept-queue reference; without this hold a
+ * concurrent teardown (e.g. l2cap_conn_del() ->
+ * l2cap_sock_kill()) could free sk between
+ * release_sock() and the caller using it. Every
+ * caller drops this with sock_put() when done.
+ */
+ sock_hold(sk);
+
release_sock(sk);
+ if (next)
+ sock_put(next);
return sk;
}
@@ -518,18 +561,28 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg);
static inline __poll_t bt_accept_poll(struct sock *parent)
{
- struct bt_sock *s, *n;
+ struct bt_sock *bt = bt_sk(parent);
+ struct bt_sock *s;
struct sock *sk;
+ __poll_t mask = 0;
+
+ spin_lock_bh(&bt->accept_q_lock);
+ list_for_each_entry(s, &bt->accept_q, accept_q) {
+ int state;
- list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
sk = (struct sock *)s;
- if (sk->sk_state == BT_CONNECTED ||
- (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) &&
- sk->sk_state == BT_CONNECT2))
- return EPOLLIN | EPOLLRDNORM;
+ state = READ_ONCE(sk->sk_state);
+
+ if (state == BT_CONNECTED ||
+ (test_bit(BT_SK_DEFER_SETUP, &bt->flags) &&
+ state == BT_CONNECT2)) {
+ mask = EPOLLIN | EPOLLRDNORM;
+ break;
+ }
}
+ spin_unlock_bh(&bt->accept_q_lock);
- return 0;
+ return mask;
}
__poll_t bt_sock_poll(struct file *file, struct socket *sock,
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 853c8d7644b5..0de5df690bd0 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -645,8 +645,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
goto failed;
}
- up_write(&bnep_session_sem);
strcpy(req->device, dev->name);
+ up_write(&bnep_session_sem);
return 0;
failed:
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index fd3aacdea512..aff8562a8690 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4438,6 +4438,9 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
events[4] |= 0x02; /* LE BIG Info Advertising Report */
}
+ if (ll_ext_feature_capable(hdev))
+ events[5] |= BIT(2);
+
if (le_cs_capable(hdev)) {
/* Channel Sounding events */
events[5] |= 0x08; /* LE CS Read Remote Supported Cap Complete event */
@@ -7413,9 +7416,6 @@ static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev,
sizeof(cp), &cp,
HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE,
HCI_CMD_TIMEOUT, NULL);
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_ALL_REMOTE_FEATURES,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
static int hci_le_read_remote_features_sync(struct hci_dev *hdev, void *data)
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 7cb2864fe872..d7af617cda45 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -751,6 +751,8 @@ static void iso_sock_cleanup_listen(struct sock *parent)
while ((sk = bt_accept_dequeue(parent, NULL))) {
iso_sock_close(sk);
iso_sock_kill(sk);
+ /* Drop the reference handed back by bt_accept_dequeue(). */
+ sock_put(sk);
}
/* If listening socket has a hcon, properly disconnect it */
@@ -1356,8 +1358,13 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock,
}
ch = bt_accept_dequeue(sk, newsock);
- if (ch)
+ if (ch) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps ch alive from here.
+ */
+ sock_put(ch);
break;
+ }
if (!timeo) {
err = -EAGAIN;
@@ -2593,6 +2600,11 @@ int iso_recv(struct hci_dev *hdev, u16 handle, struct sk_buff *skb, u16 flags)
break;
case ISO_END:
+ if (!conn->rx_len) {
+ BT_ERR("Unexpected end frame (len %d)", skb->len);
+ goto drop;
+ }
+
skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
skb->len);
conn->rx_len -= skb->len;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7701528f1167..fdccd62ccca8 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7274,7 +7274,7 @@ static void l2cap_ecred_reconfigure(struct l2cap_chan *chan)
chan->ident = l2cap_get_ident(conn);
l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ,
- sizeof(pdu), &pdu);
+ struct_size(pdu, scid, 1), pdu);
}
int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index cf590a67d364..b34e7da8d906 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -349,8 +349,13 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
}
nsk = bt_accept_dequeue(sk, newsock);
- if (nsk)
+ if (nsk) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps nsk alive from here.
+ */
+ sock_put(nsk);
break;
+ }
if (!timeo) {
err = -EAGAIN;
@@ -1475,22 +1480,54 @@ static void l2cap_sock_cleanup_listen(struct sock *parent)
BT_DBG("parent %p state %s", parent,
state_to_string(parent->sk_state));
- /* Close not yet accepted channels */
+ /* Close not yet accepted channels.
+ *
+ * bt_accept_dequeue() now returns sk with an extra reference held
+ * (taken while sk was still locked) so a concurrent l2cap_conn_del()
+ * -> l2cap_sock_kill() cannot free sk under us.
+ *
+ * cleanup_listen() runs under the parent sk lock, so unlike
+ * l2cap_sock_shutdown() we must NOT take conn->lock here: that would
+ * establish sk_lock -> conn->lock and invert the established
+ * conn->lock -> chan->lock -> sk_lock order (lockdep deadlock).
+ *
+ * Instead, briefly take the child sk lock to fetch and pin its chan.
+ * l2cap_conn_del() reaches the chan free only via
+ * l2cap_chan_del() -> l2cap_sock_teardown_cb(), which itself takes
+ * the child sk lock; holding it across l2cap_chan_hold_unless_zero()
+ * therefore guarantees the chan cannot be freed while we read and
+ * pin it (hold_unless_zero() additionally skips a chan already past
+ * its last reference). We then drop the sk lock before taking
+ * chan->lock, so sk and chan locks are never held together.
+ */
while ((sk = bt_accept_dequeue(parent, NULL))) {
- struct l2cap_chan *chan = l2cap_pi(sk)->chan;
+ struct l2cap_chan *chan;
+
+ lock_sock_nested(sk, L2CAP_NESTING_NORMAL);
+ chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan);
+ release_sock(sk);
+ if (!chan) {
+ /* l2cap_conn_del() already tearing this child down */
+ sock_put(sk);
+ continue;
+ }
BT_DBG("child chan %p state %s", chan,
state_to_string(chan->state));
- l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
-
__clear_chan_timer(chan);
l2cap_chan_close(chan, ECONNRESET);
- l2cap_sock_kill(sk);
-
+ /* l2cap_conn_del() may already have killed this socket
+ * (it sets SOCK_DEAD); skip the duplicate to avoid a
+ * double sock_put()/l2cap_chan_put().
+ */
+ if (!sock_flag(sk, SOCK_DEAD))
+ l2cap_sock_kill(sk);
l2cap_chan_unlock(chan);
+
l2cap_chan_put(chan);
+ sock_put(sk);
}
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index b05bb380e5f8..de5bd6b637b2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -9110,9 +9110,15 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data,
struct adv_info *adv_instance;
int err = 0;
struct mgmt_pending_cmd *cmd;
+ u16 expected_len;
BT_DBG("%s", hdev->name);
+ expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len);
+ if (expected_len != data_len)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA,
+ MGMT_STATUS_INVALID_PARAMS);
+
hci_dev_lock(hdev);
adv_instance = hci_find_adv_instance(hdev, cp->instance);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index be6639cd6f59..bd7d959c6e9e 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -180,6 +180,8 @@ static void rfcomm_sock_cleanup_listen(struct sock *parent)
while ((sk = bt_accept_dequeue(parent, NULL))) {
rfcomm_sock_close(sk);
rfcomm_sock_kill(sk);
+ /* Drop the reference handed back by bt_accept_dequeue(). */
+ sock_put(sk);
}
parent->sk_state = BT_CLOSED;
@@ -497,8 +499,13 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock,
}
nsk = bt_accept_dequeue(sk, newsock);
- if (nsk)
+ if (nsk) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps nsk alive from here.
+ */
+ sock_put(nsk);
break;
+ }
if (!timeo) {
err = -EAGAIN;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index eba44525d41d..f1799c6a6f87 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -502,6 +502,8 @@ static void sco_sock_cleanup_listen(struct sock *parent)
while ((sk = bt_accept_dequeue(parent, NULL))) {
sco_sock_close(sk);
sco_sock_kill(sk);
+ /* Drop the reference handed back by bt_accept_dequeue(). */
+ sock_put(sk);
}
parent->sk_state = BT_CLOSED;
@@ -765,8 +767,13 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock,
}
ch = bt_accept_dequeue(sk, newsock);
- if (ch)
+ if (ch) {
+ /* Drop the bridging ref from bt_accept_dequeue();
+ * the grafted socket keeps ch alive from here.
+ */
+ sock_put(ch);
break;
+ }
if (!timeo) {
err = -EAGAIN;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 881d866d687a..2eef4f3345cd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -4640,10 +4640,24 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
rcu_read_unlock();
}
-static void br_multicast_del_grps(struct net_bridge *br)
+static void br_multicast_enable_all_ports(struct net_bridge *br)
{
struct net_bridge_port *port;
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return;
+
+ list_for_each_entry(port, &br->port_list, list)
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
+}
+
+static void br_multicast_disable_all_ports(struct net_bridge *br)
+{
+ struct net_bridge_port *port;
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return;
+
list_for_each_entry(port, &br->port_list, list)
__br_multicast_disable_port_ctx(&port->multicast_ctx);
}
@@ -4651,7 +4665,6 @@ static void br_multicast_del_grps(struct net_bridge *br)
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- struct net_bridge_port *port;
bool change_snoopers = false;
int err = 0;
@@ -4668,7 +4681,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
change_snoopers = true;
- br_multicast_del_grps(br);
+ br_multicast_disable_all_ports(br);
goto unlock;
}
@@ -4676,8 +4689,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
goto unlock;
br_multicast_open(br);
- list_for_each_entry(port, &br->port_list, list)
- __br_multicast_enable_port_ctx(&port->multicast_ctx);
+ br_multicast_enable_all_ports(br);
change_snoopers = true;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 0ab1c94db4b9..0a394e5f4391 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -297,7 +297,11 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
goto free_skb;
}
- neigh_hh_bridge(&neigh->hh, skb);
+ if (neigh_hh_bridge(&neigh->hh, skb)) {
+ neigh_release(neigh);
+ goto free_skb;
+ }
+
skb->dev = br_indev;
ret = br_handle_frame_finish(net, sk, skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6fd5386a1d64..c04a4d0889ae 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1824,6 +1824,7 @@ static int br_fill_linkxstats(struct sk_buff *skb,
const struct net_device *dev,
int *prividx, int attr)
{
+ unsigned int limit = U16_MAX - nla_total_size(0);
struct nlattr *nla __maybe_unused;
struct net_bridge_port *p = NULL;
struct net_bridge_vlan_group *vg;
@@ -1841,6 +1842,7 @@ static int br_fill_linkxstats(struct sk_buff *skb,
p = br_port_get_rtnl(dev);
if (!p)
return 0;
+ limit -= nla_total_size_64bit(sizeof(p->stp_xstats));
br = p->br;
vg = nbp_vlan_group(p);
break;
@@ -1855,6 +1857,9 @@ static int br_fill_linkxstats(struct sk_buff *skb,
if (vg) {
u16 pvid;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ limit -= nla_total_size_64bit(sizeof(struct br_mcast_stats));
+#endif
pvid = br_get_pvid(vg);
list_for_each_entry(v, &vg->vlan_list, vlist) {
struct bridge_vlan_xstats vxi;
@@ -1862,6 +1867,11 @@ static int br_fill_linkxstats(struct sk_buff *skb,
if (++vl_idx < *prividx)
continue;
+
+ if (skb_tail_pointer(skb) - (unsigned char *)nest +
+ nla_total_size(sizeof(vxi)) >= limit)
+ goto nla_put_failure;
+
memset(&vxi, 0, sizeof(vxi));
vxi.vid = v->vid;
vxi.flags = v->flags;
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 468344739db2..4f71de44c0fb 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -241,6 +241,11 @@ net_devmem_bind_dmabuf(struct net_device *dev,
}
if (direction == DMA_TO_DEVICE) {
+ if (!IS_ALIGNED(dmabuf->size, PAGE_SIZE)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "TX dma-buf size must be a multiple of PAGE_SIZE");
+ goto err_unmap;
+ }
binding->tx_vec = kvmalloc_objs(struct net_iov *,
dmabuf->size / PAGE_SIZE);
if (!binding->tx_vec) {
@@ -267,6 +272,12 @@ net_devmem_bind_dmabuf(struct net_device *dev,
size_t len = sg_dma_len(sg);
struct net_iov *niov;
+ if (!IS_ALIGNED(len, PAGE_SIZE)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "dma-buf SG length must be PAGE_SIZE aligned");
+ goto err_free_chunks;
+ }
+
owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
dev_to_node(&dev->dev));
if (!owner) {
diff --git a/net/core/gro.c b/net/core/gro.c
index 31d21de5b15a..a84753983467 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -109,6 +109,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (p->pp_recycle != skb->pp_recycle)
return -ETOOMANYREFS;
+ if (skb_zcopy(p) || skb_zcopy(skb))
+ return -ETOOMANYREFS;
+
if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) ||
NAPI_GRO_CB(skb)->flush))
return -E2BIG;
@@ -213,10 +216,12 @@ done:
p->data_len += len;
p->truesize += delta_truesize;
p->len += len;
+ skb_shinfo(p)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG;
if (lp != p) {
lp->data_len += len;
lp->truesize += delta_truesize;
lp->len += len;
+ skb_shinfo(lp)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG;
}
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
@@ -244,6 +249,8 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
p->truesize += skb->truesize;
p->len += skb->len;
+ skb_shinfo(p)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 84faace50ac2..3f4a17fa5713 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -319,6 +319,8 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
lockdep_assert_irqs_disabled();
dev = np->dev;
+ /* npinfo->txq belongs to np->dev, so retries must stay bound to it. */
+ skb->dev = dev;
rcu_read_lock();
npinfo = rcu_dereference_bh(dev->npinfo);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index df042da422ef..511c25bf6f2a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -6328,8 +6328,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
0, &filters, &idxattr, &prividx, extack);
if (err < 0) {
- /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
- WARN_ON(err == -EMSGSIZE);
+ /* -EMSGSIZE implies BUG in if_nlmsg_stats_size
+ * or a too big nested attribute.
+ */
kfree_skb(nskb);
} else {
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7dad68e3b518..44ac121cfccb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2248,6 +2248,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
skb_frag_ref(skb, i);
}
skb_shinfo(n)->nr_frags = i;
+ skb_shinfo(n)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
}
if (skb_has_frag_list(skb)) {
@@ -4349,6 +4350,8 @@ onlymerged:
tgt->ip_summed = CHECKSUM_PARTIAL;
skb->ip_summed = CHECKSUM_PARTIAL;
+ skb_shinfo(tgt)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+
skb_len_add(skb, -shiftlen);
skb_len_add(tgt, shiftlen);
@@ -4959,7 +4962,8 @@ normal:
skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
+ skb_shinfo(nskb)->flags |= (skb_shinfo(head_skb)->flags |
+ skb_shinfo(frag_skb)->flags) &
SKBFL_SHARED_FRAG;
if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
@@ -4976,6 +4980,9 @@ normal:
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
+
+ skb_shinfo(nskb)->flags |= skb_shinfo(frag_skb)->flags & SKBFL_SHARED_FRAG;
+
if (!skb_headlen(list_skb)) {
BUG_ON(!nfrags);
} else {
@@ -6200,6 +6207,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
from_shinfo->frags,
from_shinfo->nr_frags * sizeof(skb_frag_t));
to_shinfo->nr_frags += from_shinfo->nr_frags;
+ if (from_shinfo->nr_frags)
+ to_shinfo->flags |= from_shinfo->flags & SKBFL_SHARED_FRAG;
if (!skb_cloned(from))
from_shinfo->nr_frags = 0;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6187a83bd741..e1850caf1a71 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1268,12 +1268,19 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
const struct proto_ops *ops = NULL;
+ struct sk_psock *psock;
struct socket *sock;
int copied;
trace_sk_data_ready(sk);
rcu_read_lock();
+ psock = sk_psock(sk);
+ if (psock && tls_sw_has_ctx_rx(sk)) {
+ psock->saved_data_ready(sk);
+ rcu_read_unlock();
+ return;
+ }
sock = READ_ONCE(sk->sk_socket);
if (likely(sock))
ops = READ_ONCE(sock->ops);
@@ -1283,8 +1290,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
- struct sk_psock *psock;
-
rcu_read_lock();
psock = sk_psock(sk);
if (psock)
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 124619920d38..b514e43766ef 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -163,8 +163,8 @@ void hsr_del_nodes(struct list_head *node_db)
struct hsr_node *tmp;
list_for_each_entry_safe(node, tmp, node_db, mac_list) {
- list_del(&node->mac_list);
- hsr_free_node(node);
+ list_del_rcu(&node->mac_list);
+ call_rcu(&node->rcu_head, hsr_free_node_rcu);
}
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7eeff658b467..23e921d313b3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -961,6 +961,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
if (IS_ERR(rt))
goto out_unlock;
+ if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ goto ende;
+
/* peer icmp_ratelimit */
if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5aaf9c62c8e1..68e88cb3e55c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -391,7 +391,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
* in, reject the frame as invalid
*/
err = -EINVAL;
- if (iphlen > length)
+ if (iphlen > length || iphlen < sizeof(*iph))
goto error_free;
if (iphlen >= sizeof(*iph)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bc1296f0ea69..3d62d45d84bd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1272,7 +1272,7 @@ static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
__func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
skb->dev ? skb->dev->name : "?");
kfree_skb(skb);
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return 0;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 432fa28e47d4..389a7cc17110 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -299,9 +299,6 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
-DEFINE_PER_CPU(u32, tcp_tw_isn);
-EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
-
long sysctl_tcp_mem[3] __read_mostly;
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d5c9e65d9760..de9f68a9c0cf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -7589,6 +7589,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
+ u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
const struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
@@ -7599,20 +7600,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
- u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
- isn = __this_cpu_read(tcp_tw_isn);
- if (isn) {
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- __this_cpu_write(tcp_tw_isn, 0);
- } else {
+ /* If isn is non-zero, this SYN originally matched a TIME_WAIT socket.
+ * TW sockets are converted to open requests without limitations,
+ * we skip the queue limits and syncookie checks in the block below.
+ */
+ if (!isn) {
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c0526cc03980..fdc81150ff6c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2198,6 +2198,7 @@ lookup:
}
}
+ isn = 0;
process:
if (static_branch_unlikely(&ip4_min_ttl)) {
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
@@ -2227,6 +2228,7 @@ process:
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = isn;
skb->dev = NULL;
@@ -2313,7 +2315,6 @@ do_time_wait:
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
- __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f9d8755705f7..6e4bb411dc04 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2626,6 +2626,7 @@ static int tcp_clone_payload(struct sock *sk, struct sk_buff *to,
todo = min_t(int, skb_frag_size(fragfrom),
probe_size - len);
len += todo;
+ skb_shinfo(to)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
if (lastfrag &&
skb_frag_page(fragfrom) == skb_frag_page(lastfrag) &&
skb_frag_off(fragfrom) == skb_frag_off(lastfrag) +
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a0813d425b71..29651b1a0bc7 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -482,11 +482,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
struct sock *sk = gso_skb->sk;
unsigned int sum_truesize = 0;
struct sk_buff *segs, *seg;
- __be16 newlen, msslen;
struct udphdr *uh;
unsigned int mss;
bool copy_dtor;
__sum16 check;
+ __be16 newlen;
int ret = 0;
mss = skb_shinfo(gso_skb)->gso_size;
@@ -555,15 +555,6 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
return segs;
}
- msslen = htons(sizeof(*uh) + mss);
-
- /* GSO partial and frag_list segmentation only requires splitting
- * the frame into an MSS multiple and possibly a remainder, both
- * cases return a GSO skb. So update the mss now.
- */
- if (skb_is_gso(segs))
- mss *= skb_shinfo(segs)->gso_segs;
-
seg = segs;
uh = udp_hdr(seg);
@@ -586,7 +577,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (!seg->next)
break;
- uh->len = msslen;
+ uh->len = newlen;
uh->check = check;
if (seg->ip_summed == CHECKSUM_PARTIAL)
@@ -599,9 +590,12 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
uh = udp_hdr(seg);
}
- /* last packet can be partial gso_size, account for that in checksum */
- newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
- seg->data_len);
+ /* Unless skb fits perfectly as GSO_PARTIAL, the trailing
+ * segment may not be full MSS, account for that in the checksum
+ */
+ if (!skb_is_gso(seg))
+ newlen = htons(skb_tail_pointer(seg) -
+ skb_transport_header(seg) + seg->data_len);
check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
uh->len = newlen;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 03cbce842c1a..cf90f933ca1a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -910,16 +910,27 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
{
+ enum skb_drop_reason drop_reason;
struct ioam6_trace_hdr *trace;
struct ioam6_namespace *ns;
+ struct inet6_dev *idev;
struct ioam6_hdr *hdr;
+ drop_reason = SKB_DROP_REASON_IP_INHDR;
+
/* Bad alignment (must be 4n-aligned) */
if (optoff & 3)
goto drop;
+ /* Does the device still have IPv6 configuration? */
+ idev = __in6_dev_get(skb->dev);
+ if (!idev) {
+ drop_reason = SKB_DROP_REASON_IPV6DISABLED;
+ goto drop;
+ }
+
/* Ignore if IOAM is not enabled on ingress */
- if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled))
+ if (!READ_ONCE(idev->cnf.ioam6_enabled))
goto ignore;
/* Truncated Option header */
@@ -955,9 +966,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len))
goto drop;
- /* Trace pointer may have changed */
- trace = (struct ioam6_trace_hdr *)(skb_network_header(skb)
- + optoff + sizeof(*hdr));
+ /* Trace and hdr pointers may have changed */
+ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
ioam6_fill_trace_data(skb, ns, trace, true);
@@ -972,7 +983,7 @@ ignore:
return true;
drop:
- kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
+ kfree_skb_reason(skb, drop_reason);
return false;
}
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e7a3fb9355ee..450dd53846a2 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -168,6 +168,10 @@ static int hbh_mt6_check(const struct xt_mtchk_param *par)
pr_debug("unknown flags %X\n", optsinfo->invflags);
return -EINVAL;
}
+ if (optsinfo->optsnr > IP6T_OPTS_OPTSNR) {
+ pr_debug("too many supported opts specified\n");
+ return -EINVAL;
+ }
if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
pr_debug("Not strict - not implemented");
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e3d355d1fbd6..b106e5fef9cb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6933,7 +6933,7 @@ int __init ip6_route_init(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
ret = bpf_iter_register();
if (ret)
- goto out_register_late_subsys;
+ goto out_register_notifier;
#endif
for_each_possible_cpu(cpu) {
@@ -6946,6 +6946,10 @@ int __init ip6_route_init(void)
out:
return ret;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+out_register_notifier:
+ unregister_netdevice_notifier(&ip6_route_dev_notifier);
+#endif
out_register_late_subsys:
rtnl_unregister_all(PF_INET6);
unregister_pernet_subsys(&ip6_route_net_late_ops);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d13d49bfef19..36d75fb50a70 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1839,6 +1839,7 @@ lookup:
}
}
+ isn = 0;
process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
@@ -1868,6 +1869,7 @@ process:
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = isn;
skb->dev = NULL;
@@ -1956,7 +1958,6 @@ do_time_wait:
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
- __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 157fc23ce4e1..1455f67e01dd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1360,7 +1360,7 @@ static void l2tp_session_unhash(struct l2tp_session *session)
spin_lock_bh(&pn->l2tp_session_idr_lock);
/* Remove from the per-tunnel list */
- list_del_init(&session->list);
+ list_del_rcu(&session->list);
/* Remove from per-net IDR */
if (tunnel->version == L2TP_HDR_VER_3) {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7b77d57c9f96..f9ee9947a94d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2344,8 +2344,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta->sta.max_sp = params->max_sp;
}
- ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab,
- params->ext_capab_len);
+ if (params->ext_capab)
+ ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab,
+ params->ext_capab_len);
/*
* cfg80211 validates this (1-2007) and allows setting the AID
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0a0f27836d57..b98ddfa3003e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8164,6 +8164,7 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata,
"No active links for TID %d", tid);
return -EINVAL;
}
+ pos += map_size;
} else {
map = 0;
}
@@ -8182,7 +8183,6 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata,
default:
return -EINVAL;
}
- pos += map_size;
}
return 0;
}
@@ -11232,6 +11232,9 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata,
control = get_unaligned_le16(pos);
link_id = control & IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID;
+ if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS)
+ continue;
+
link = sdata_dereference(sdata->link[link_id], sdata);
if (!link)
continue;
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 2b3632c6008a..77894d997113 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -34,6 +34,22 @@
#include "led.h"
#include "wep.h"
+static const u8 empty_non_inheritance[] = {
+ WLAN_EID_EXTENSION, 1, WLAN_EID_EXT_NON_INHERITANCE,
+ /*
+ * cfg80211_is_element_inherited() hardcodes elements that
+ * cannot be inherited, so we just need an empty one to be
+ * calling it at all.
+ */
+};
+
+struct ieee80211_elem_defrag {
+ const struct element *elem;
+ /* container start/len */
+ const u8 *start;
+ size_t len;
+};
+
struct ieee80211_elems_parse {
/* must be first for kfree to work */
struct ieee802_11_elems elems;
@@ -41,11 +57,7 @@ struct ieee80211_elems_parse {
/* The basic Multi-Link element in the original elements */
const struct element *ml_basic_elem;
- /* The reconfiguration Multi-Link element in the original elements */
- const struct element *ml_reconf_elem;
-
- /* The EPCS Multi-Link element in the original elements */
- const struct element *ml_epcs_elem;
+ struct ieee80211_elem_defrag ml_reconf, ml_epcs;
bool multi_link_inner;
bool skip_vendor;
@@ -162,10 +174,14 @@ ieee80211_parse_extension_element(u32 *crc,
}
break;
case IEEE80211_ML_CONTROL_TYPE_RECONF:
- elems_parse->ml_reconf_elem = elem;
+ elems_parse->ml_reconf.elem = elem;
+ elems_parse->ml_reconf.start = params->start;
+ elems_parse->ml_reconf.len = params->len;
break;
case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS:
- elems_parse->ml_epcs_elem = elem;
+ elems_parse->ml_epcs.elem = elem;
+ elems_parse->ml_epcs.start = params->start;
+ elems_parse->ml_epcs.len = params->len;
break;
default:
break;
@@ -916,7 +932,7 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse,
{
struct ieee802_11_elems *elems = &elems_parse->elems;
struct ieee80211_mle_per_sta_profile *prof;
- const struct element *tmp;
+ const struct element *tmp, *ret;
ssize_t ml_len;
const u8 *end;
@@ -986,50 +1002,40 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse,
sub->from_ap = params->from_ap;
sub->link_id = -1;
- return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- sub->start, sub->len);
-}
-
-static void
-ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse)
-{
- struct ieee802_11_elems *elems = &elems_parse->elems;
- ssize_t ml_len;
+ ret = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ sub->start, sub->len);
+ if (ret)
+ return ret;
- ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem,
- elems->ie_start,
- elems->total_len,
- elems_parse->scratch_pos,
- elems_parse->scratch +
- elems_parse->scratch_len -
- elems_parse->scratch_pos,
- WLAN_EID_FRAGMENT);
- if (ml_len < 0)
- return;
- elems->ml_reconf = (void *)elems_parse->scratch_pos;
- elems->ml_reconf_len = ml_len;
- elems_parse->scratch_pos += ml_len;
+ /*
+ * Since we know we want and found a profile, apply an empty
+ * non-inheritance if the profile didn't have one, so that any
+ * element that shouldn't be inherited by spec isn't.
+ */
+ return (const void *)empty_non_inheritance;
}
-static void
-ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse)
+static const void *
+ieee80211_mle_defrag(struct ieee80211_elems_parse *elems_parse,
+ struct ieee80211_elem_defrag *defrag,
+ size_t *out_len)
{
- struct ieee802_11_elems *elems = &elems_parse->elems;
+ const void *ret;
ssize_t ml_len;
- ml_len = cfg80211_defragment_element(elems_parse->ml_epcs_elem,
- elems->ie_start,
- elems->total_len,
+ ml_len = cfg80211_defragment_element(defrag->elem,
+ defrag->start, defrag->len,
elems_parse->scratch_pos,
elems_parse->scratch +
elems_parse->scratch_len -
elems_parse->scratch_pos,
WLAN_EID_FRAGMENT);
if (ml_len < 0)
- return;
- elems->ml_epcs = (void *)elems_parse->scratch_pos;
- elems->ml_epcs_len = ml_len;
+ return NULL;
+ ret = elems_parse->scratch_pos;
+ *out_len = ml_len;
elems_parse->scratch_pos += ml_len;
+ return ret;
}
struct ieee802_11_elems *
@@ -1042,6 +1048,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
size_t scratch_len = 3 * params->len;
bool multi_link_inner = false;
+ BUILD_BUG_ON(sizeof(empty_non_inheritance) != empty_non_inheritance[1] + 2);
BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0);
/* cannot parse for both a specific link and non-transmitted BSS */
@@ -1089,6 +1096,17 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
sub.start, nontx_len);
+ /*
+ * If it's a non-transmitted BSS, we shouldn't pick
+ * any elements in the outer parsing that shouldn't
+ * be inherited. If the profile has a non-inheritance
+ * element this automatically happens, but if not then
+ * provide an empty one so that the hard-coded elements
+ * in cfg80211_is_element_inherited() are ignored, but
+ * it must be called.
+ */
+ if (params->bss->transmitted_bss && !non_inherit)
+ non_inherit = (const void *)empty_non_inheritance;
} else {
/* must always parse to get elems_parse->ml_basic_elem */
non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params,
@@ -1109,9 +1127,12 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
_ieee802_11_parse_elems_full(&sub, elems_parse, NULL);
}
- ieee80211_mle_defrag_reconf(elems_parse);
-
- ieee80211_mle_defrag_epcs(elems_parse);
+ elems->ml_reconf = ieee80211_mle_defrag(elems_parse,
+ &elems_parse->ml_reconf,
+ &elems->ml_reconf_len);
+ elems->ml_epcs = ieee80211_mle_defrag(elems_parse,
+ &elems_parse->ml_epcs,
+ &elems->ml_epcs_len);
if (elems->tim && !elems->parse_error) {
const struct ieee80211_tim_ie *tim_ie = elems->tim;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d18e962126ce..3fb40449c6c5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4984,6 +4984,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 sa[ETH_ALEN];
} addrs __aligned(2);
struct ieee80211_sta_rx_stats *stats;
+ u32 encoded_rate;
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
* to a common data structure; drivers can implement that per queue
@@ -5091,11 +5092,14 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
/* push the addresses in front */
memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs));
+ /* capture before mesh forward may memset or free skb->cb */
+ encoded_rate = sta_stats_encode_rate(status);
+
res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
switch (res) {
case RX_QUEUED:
stats->last_rx = jiffies;
- stats->last_rate = sta_stats_encode_rate(status);
+ stats->last_rate = encoded_rate;
return true;
case RX_CONTINUE:
break;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 3c152bf66cd5..3e770c7407e1 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -364,7 +364,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
spin_lock_bh(&msk->pm.lock);
- if (!mptcp_pm_should_add_signal_addr(msk)) {
+ /* The cancel path (mptcp_pm_del_add_timer()) can race with this
+ * callback. Once cancel updates retrans_times to MAX, suppress further
+ * retransmissions here. If this callback acquires pm.lock first, one
+ * final transmit attempt is still possible.
+ */
+ if (entry->retrans_times < ADD_ADDR_RETRANS_MAX &&
+ !mptcp_pm_should_add_signal_addr(msk)) {
pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id);
mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk);
@@ -414,8 +420,12 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
/* Note: entry might have been removed by another thread.
* We hold rcu_read_lock() to ensure it is not freed under us.
*/
- if (stop_timer)
- sk_stop_timer_sync(sk, &entry->add_timer);
+ if (stop_timer) {
+ if (check_id)
+ sk_stop_timer(sk, &entry->add_timer);
+ else
+ sk_stop_timer_sync(sk, &entry->add_timer);
+ }
rcu_read_unlock();
return entry;
@@ -882,6 +892,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
struct mptcp_addr_info *addr, bool *echo,
bool *drop_other_suboptions)
{
+ bool skip_add_addr = false;
int ret = false;
u8 add_addr;
u8 family;
@@ -903,24 +914,49 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
}
*echo = mptcp_pm_should_add_signal_echo(msk);
- port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
-
- family = *echo ? msk->pm.remote.family : msk->pm.local.family;
- if (remaining < mptcp_add_addr_len(family, *echo, port))
- goto out_unlock;
-
if (*echo) {
*addr = msk->pm.remote;
add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
+ port = !!msk->pm.remote.port;
+ family = msk->pm.remote.family;
} else {
*addr = msk->pm.local;
add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
+ port = !!msk->pm.local.port;
+ family = msk->pm.local.family;
}
- WRITE_ONCE(msk->pm.addr_signal, add_addr);
+
+ if (remaining < mptcp_add_addr_len(family, *echo, port)) {
+ struct net *net = sock_net((struct sock *)msk);
+
+ if (!*drop_other_suboptions)
+ goto out_unlock;
+
+ if (*echo) {
+ MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP);
+ } else {
+ skip_add_addr = true;
+ MPTCP_INC_STATS(net, MPTCP_MIB_ADDADDRTXDROP);
+ }
+ goto drop_signal_mark;
+ }
+
ret = true;
+drop_signal_mark:
+ WRITE_ONCE(msk->pm.addr_signal, add_addr);
+
out_unlock:
spin_unlock_bh(&msk->pm.lock);
+
+ /* On pure-ACK option-space exhaustion, stop retrying this ADD_ADDR:
+ * clear the signal bit, cancel the matching retransmission timer, and
+ * let the PM state machine progress.
+ */
+ if (skip_add_addr) {
+ mptcp_pm_del_add_timer(msk, addr, true);
+ mptcp_pm_subflow_established(msk);
+ }
return ret;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4546a8b09884..a72a6ad6ee8b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -397,12 +397,26 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
return false;
}
- /* old data, keep it simple and drop the whole pkt, sender
- * will retransmit as needed, if needed.
+ /* Completely old data? */
+ if (!after64(MPTCP_SKB_CB(skb)->end_seq, msk->ack_seq)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
+ mptcp_drop(sk, skb);
+ return false;
+ }
+
+ /* Partial packet: map_seq < ack_seq < end_seq.
+ * Skip the already-acked bytes and enqueue the new data.
*/
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
- mptcp_drop(sk, skb);
- return false;
+ copy_len = MPTCP_SKB_CB(skb)->end_seq - msk->ack_seq;
+ MPTCP_SKB_CB(skb)->offset += msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+ MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq -
+ MPTCP_SKB_CB(skb)->map_seq;
+ msk->bytes_received += copy_len;
+ WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
+
+ skb_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ return true;
}
static void mptcp_stop_rtx_timer(struct sock *sk)
@@ -3473,6 +3487,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
/* for fallback's sake */
WRITE_ONCE(msk->ack_seq, 0);
+ atomic64_set(&msk->rcv_wnd_sent, 0);
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 1cf608e7357b..87b5796d0135 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -67,6 +67,12 @@ static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
return 0;
}
+static void __mptcp_subflow_set_rcvbuf(struct sock *ssk, int val)
+{
+ WRITE_ONCE(ssk->sk_rcvbuf, val);
+ tcp_set_rcvbuf(ssk, val);
+}
+
static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
{
struct mptcp_subflow_context *subflow;
@@ -100,7 +106,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
case SO_RCVBUF:
case SO_RCVBUFFORCE:
ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
+ __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf);
break;
case SO_MARK:
if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
@@ -1560,7 +1566,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
}
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
- WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
+ __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf);
}
if (sock_flag(sk, SOCK_LINGER)) {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index c5a26236a0bb..3706b4a85a0f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1613,6 +1613,7 @@ dump_last:
((dump_type == DUMP_ALL) ==
!!(set->type->features & IPSET_DUMP_LAST))) {
write_unlock_bh(&ip_set_ref_lock);
+ set = NULL;
continue;
}
pr_debug("List set: %s\n", set->name);
@@ -1648,13 +1649,13 @@ dump_last:
if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
goto nla_put_failure;
+ if (set->variant->uref)
+ set->variant->uref(set, cb, true);
ret = set->variant->head(set, skb);
if (ret < 0)
goto release_refcount;
if (dump_flags & IPSET_FLAG_LIST_HEADER)
goto next_set;
- if (set->variant->uref)
- set->variant->uref(set, cb, true);
fallthrough;
default:
ret = set->variant->list(set, skb, cb);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index b79e5dd2af03..04e4627ddfc1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -386,8 +386,9 @@ static void
mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
{
int i;
+ u8 pos = smp_load_acquire(&n->pos);
- for (i = 0; i < n->pos; i++)
+ for (i = 0; i < pos; i++)
if (test_bit(i, n->used))
ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
}
@@ -490,7 +491,7 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
- u8 htable_bits = t->htable_bits;
+ u8 pos, htable_bits = t->htable_bits;
spin_lock_bh(&t->hregion[r].lock);
for (i = ahash_bucket_start(r, htable_bits);
@@ -498,7 +499,8 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
- for (j = 0, d = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0, d = 0; j < pos; j++) {
if (!test_bit(j, n->used)) {
d++;
continue;
@@ -534,7 +536,7 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
/* Still try to delete expired elements. */
continue;
tmp->size = n->size - AHASH_INIT_SIZE;
- for (j = 0, d = 0; j < n->pos; j++) {
+ for (j = 0, d = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
@@ -623,7 +625,7 @@ mtype_resize(struct ip_set *set, bool retried)
{
struct htype *h = set->data;
struct htable *t, *orig;
- u8 htable_bits;
+ u8 pos, htable_bits;
size_t hsize, dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
@@ -685,7 +687,8 @@ retry:
n = __ipset_dereference(hbucket(orig, i));
if (!n)
continue;
- for (j = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
@@ -809,9 +812,10 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
{
struct htype *h = set->data;
const struct htable *t;
- u32 i, j, r;
struct hbucket *n;
struct mtype_elem *data;
+ u32 i, j, r;
+ u8 pos;
t = rcu_dereference_bh(h->table);
for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
@@ -820,7 +824,8 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
n = rcu_dereference_bh(hbucket(t, i));
if (!n)
continue;
- for (j = 0; j < n->pos; j++) {
+ pos = smp_load_acquire(&n->pos);
+ for (j = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, set->dsize);
@@ -848,6 +853,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
bool flag_exist = flags & IPSET_FLAG_EXIST;
bool deleted = false, forceadd = false, reuse = false;
u32 r, key, multi = 0, elements, maxelem;
+ u8 npos = 0;
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
@@ -889,7 +895,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ext_size(AHASH_INIT_SIZE, set->dsize);
goto copy_elem;
}
- for (i = 0; i < n->pos; i++) {
+ npos = smp_load_acquire(&n->pos);
+ for (i = 0; i < npos; i++) {
if (!test_bit(i, n->used)) {
/* Reuse first deleted entry */
if (j == -1) {
@@ -933,7 +940,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (elements >= maxelem)
goto set_full;
/* Create a new slot */
- if (n->pos >= n->size) {
+ if (npos >= n->size) {
#ifdef IP_SET_HASH_WITH_MULTI
if (h->bucketsize >= AHASH_MAX_TUNED)
goto set_full;
@@ -962,7 +969,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
copy_elem:
- j = n->pos++;
+ j = npos++;
data = ahash_data(n, j, set->dsize);
copy_data:
t->hregion[r].elements++;
@@ -985,6 +992,8 @@ overwrite_extensions:
if (SET_WITH_TIMEOUT(set))
ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
smp_mb__before_atomic();
+ /* Ensure all data writes are visible before updating position */
+ smp_store_release(&n->pos, npos);
set_bit(j, n->used);
if (old != ERR_PTR(-ENOENT)) {
rcu_assign_pointer(hbucket(t, key), n);
@@ -1043,6 +1052,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, j, k, r, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
size_t dsize = set->dsize;
+ u8 pos;
/* Userspace add and resize is excluded by the mutex.
* Kernespace add does not trigger resize.
@@ -1058,7 +1068,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
n = rcu_dereference_bh(hbucket(t, key));
if (!n)
goto out;
- for (i = 0, k = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0, k = 0; i < pos; i++) {
if (!test_bit(i, n->used)) {
k++;
continue;
@@ -1072,8 +1083,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ret = 0;
clear_bit(i, n->used);
smp_mb__after_atomic();
- if (i + 1 == n->pos)
- n->pos--;
+ if (i + 1 == pos)
+ smp_store_release(&n->pos, --pos);
t->hregion[r].elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
@@ -1094,11 +1105,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
x->flags = flags;
}
}
- for (; i < n->pos; i++) {
+ for (; i < pos; i++) {
if (!test_bit(i, n->used))
k++;
}
- if (k == n->pos) {
+ if (k == pos) {
t->hregion[r].ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
@@ -1109,7 +1120,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (!tmp)
goto out;
tmp->size = n->size - AHASH_INIT_SIZE;
- for (j = 0, k = 0; j < n->pos; j++) {
+ for (j = 0, k = 0; j < pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
@@ -1170,6 +1181,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
int ret, i, j = 0;
#endif
u32 key, multi = 0;
+ u8 pos;
pr_debug("test by nets\n");
for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
@@ -1187,7 +1199,8 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
n = rcu_dereference_bh(hbucket(t, key));
if (!n)
continue;
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
@@ -1221,6 +1234,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct mtype_elem *data;
int i, ret = 0;
u32 key, multi = 0;
+ u8 pos;
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
@@ -1243,7 +1257,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ret = 0;
goto out;
}
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
@@ -1360,6 +1375,7 @@ mtype_list(const struct ip_set *set,
/* We assume that one hash bucket fills into one page */
void *incomplete;
int i, ret = 0;
+ u8 pos;
atd = nla_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
@@ -1378,7 +1394,8 @@ mtype_list(const struct ip_set *set,
cb->args[IPSET_CB_ARG0], t, n);
if (!n)
continue;
- for (i = 0; i < n->pos; i++) {
+ pos = smp_load_acquire(&n->pos);
+ for (i = 0; i < pos; i++) {
if (!test_bit(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index a22ec1a6f6ec..e26ca2a370e3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -150,7 +150,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++, i++) {
+ for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
if (i > IPSET_MAX_RANGE) {
hash_ipmark4_data_next(&h->next, &e);
@@ -162,6 +162,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
ret = 0;
+
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index e977b5a9c48d..41ca24a22a02 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -186,7 +186,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to;) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++, i++) {
@@ -203,6 +203,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = 0;
}
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 39a01934b153..b9ac2efaa15c 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -182,7 +182,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to;) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++, i++) {
@@ -199,6 +199,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = 0;
}
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5c6de605a9fb..2d6652d43199 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -274,7 +274,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
p = port;
ip2 = ip2_from;
}
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to;) {
e.ip = htonl(ip);
for (; p <= port_to; p++) {
e.port = htons(p);
@@ -298,6 +298,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip2 = ip2_from;
}
p = port;
+ if (ip == ip_to)
+ break;
+ ip++;
}
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c7c7f6a7a9f6..bd9cae44d214 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -327,18 +327,22 @@ ip_vs_use_count_dec(void)
/* Service hashing:
* Operation Locking order
* ---------------------------------------------------------------------------
- * add table service_mutex, svc_resize_sem(W)
- * del table service_mutex
- * move between tables svc_resize_sem(W), seqcount_t(W), bit lock
- * add/del service service_mutex, bit lock
+ * add first table service_mutex
+ * attach new table service_mutex
+ * add/del service service_mutex, RCU, bit lock
+ * move between tables (rehash) svc_resize_sem(W), seqcount_t(W), bit lock
+ * replace old with attached svc_resize_sem(W), svc_replace_sem(W)
* find service RCU, seqcount_t(R)
* walk services(blocking) service_mutex, svc_resize_sem(R)
* walk services(non-blocking) RCU, seqcount_t(R)
+ * walk services(non-blocking) svc_resize_sem(R), RCU, seqcount_t(R)
+ * walk services(non-blocking) svc_replace_sem(R), RCU, seqcount_t(R)
+ * del table service_mutex after stopped work
*
- * - new tables are linked/unlinked under service_mutex and svc_resize_sem
- * - new table is linked on resizing and all operations can run in parallel
- * in 2 tables until the new table is registered as current one
- * - two contexts can modify buckets: config and table resize, both in
+ * - new table is attached on resizing under service_mutex and all operations
+ * can run in parallel in 2 tables until the new table is registered as current
+ * one
+ * - two contexts can modify buckets: config and table resize (work), both in
* process context
* - only table resizer can move entries, so we do not protect t->seqc[]
* items with t->lock[]
@@ -346,9 +350,13 @@ ip_vs_use_count_dec(void)
* services are moved to new table
* - move operations may disturb readers: find operation will not miss entries
* but walkers may see same entry twice if they are forced to retry chains
- * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to hold
- * service_mutex to disallow new tables to be installed or to check
+ * or to walk the newly attached second table
+ * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to check
* svc_table_changes and repeat the RCU read section if new table is installed
+ * - walkers may serialize with the whole resizing process (svc_resize_sem)
+ * to prevent seeing same service twice or just with the svc_table
+ * replace (svc_replace_sem) when we can see entries twice but we
+ * prefer to run concurrently with the rehashing.
*/
/*
@@ -387,9 +395,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/* increase its refcnt because it is referenced by the svc table */
atomic_inc(&svc->refcnt);
+ /* We know if new table is attached under service_mutex but rely on
+ * RCU to hold the old table to be freed in resizer
+ */
+ rcu_read_lock();
+
+ /* This can be the old or the new table */
+ t = rcu_dereference(ipvs->svc_table);
+
/* New entries go into recent table */
- t = rcu_dereference_protected(ipvs->svc_table, 1);
- t = rcu_dereference_protected(t->new_tbl, 1);
+ t = rcu_dereference(t->new_tbl);
if (svc->fwmark == 0) {
/*
@@ -410,6 +425,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
hlist_bl_add_head_rcu(&svc->s_list, head);
hlist_bl_unlock(head);
+ rcu_read_unlock();
+
return 1;
}
@@ -432,7 +449,13 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
return 0;
}
- t = rcu_dereference_protected(ipvs->svc_table, 1);
+ /* We know if new table is attached under service_mutex but rely on
+ * RCU to hold the old table to be freed in resizer
+ */
+ rcu_read_lock();
+
+ /* This can be the old or the new table */
+ t = rcu_dereference(ipvs->svc_table);
hash_key = READ_ONCE(svc->hash_key);
/* We need to lock the bucket in the right table */
if (ip_vs_rht_same_table(t, hash_key)) {
@@ -443,13 +466,13 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
/* Moved to new table ? */
if (hash_key != hash_key2) {
hlist_bl_unlock(head);
- t = rcu_dereference_protected(t->new_tbl, 1);
+ t = rcu_dereference(t->new_tbl);
head = t->buckets + (hash_key2 & t->mask);
hlist_bl_lock(head);
}
} else {
/* It is already moved to new table */
- t = rcu_dereference_protected(t->new_tbl, 1);
+ t = rcu_dereference(t->new_tbl);
head = t->buckets + (hash_key & t->mask);
hlist_bl_lock(head);
}
@@ -459,6 +482,8 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
svc->flags &= ~IP_VS_SVC_F_HASHED;
atomic_dec(&svc->refcnt);
hlist_bl_unlock(head);
+
+ rcu_read_unlock();
return 1;
}
@@ -666,15 +691,14 @@ static void svc_resize_work_handler(struct work_struct *work)
goto unlock_sem;
more_work = false;
clear_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags);
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
+ if (!READ_ONCE(ipvs->enable))
goto unlock_m;
t = rcu_dereference_protected(ipvs->svc_table, 1);
/* Do nothing if table is removed */
if (!t)
goto unlock_m;
- /* New table needs to be registered? BUG! */
- if (t != rcu_dereference_protected(t->new_tbl, 1))
+ /* New table already attached? BUG! */
+ if (t != rcu_access_pointer(t->new_tbl))
goto unlock_m;
lfactor = sysctl_svc_lfactor(ipvs);
@@ -691,6 +715,7 @@ static void svc_resize_work_handler(struct work_struct *work)
/* Flip the table_id */
t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK;
+ /* Attach new table */
rcu_assign_pointer(t->new_tbl, t_new);
/* Allow add/del to new_tbl while moving from old table */
mutex_unlock(&ipvs->service_mutex);
@@ -698,8 +723,8 @@ static void svc_resize_work_handler(struct work_struct *work)
ip_vs_rht_for_each_bucket(t, bucket, head) {
same_bucket:
if (++limit >= 16) {
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE,
+ /* Check if work is stopped */
+ if (test_bit(IP_VS_WORK_SVC_NORESIZE,
&ipvs->work_flags))
goto unlock_sem;
if (resched_score >= 100) {
@@ -764,16 +789,12 @@ same_bucket:
goto same_bucket;
}
- /* Tables can be switched only under service_mutex */
- while (!mutex_trylock(&ipvs->service_mutex)) {
- cond_resched();
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
- goto unlock_sem;
- }
- if (!READ_ONCE(ipvs->enable) ||
- test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
- goto unlock_m;
+ /* Serialize with readers that don't like svc_table changes */
+ down_write(&ipvs->svc_replace_sem);
+
+ /* Check if work is stopped to avoid synchronize_rcu() */
+ if (test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
+ goto unlock_repl;
rcu_assign_pointer(ipvs->svc_table, t_new);
/* Inform readers that new table is installed */
@@ -781,8 +802,8 @@ same_bucket:
atomic_inc(&ipvs->svc_table_changes);
t_free = t;
-unlock_m:
- mutex_unlock(&ipvs->service_mutex);
+unlock_repl:
+ up_write(&ipvs->svc_replace_sem);
unlock_sem:
up_write(&ipvs->svc_resize_sem);
@@ -801,6 +822,11 @@ out:
test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
return;
queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 1);
+ return;
+
+unlock_m:
+ mutex_unlock(&ipvs->service_mutex);
+ goto unlock_sem;
}
static inline void
@@ -1691,6 +1717,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_pe *pe = NULL;
int ret_hooks = -1;
int ret = 0;
+ bool grow;
/* increase the module use count */
if (!ip_vs_use_count_inc())
@@ -1732,16 +1759,25 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
}
#endif
- t = rcu_dereference_protected(ipvs->svc_table, 1);
+ /* The old table can be freed, protect it with RCU */
+ rcu_read_lock();
+ t = rcu_dereference(ipvs->svc_table);
if (!t) {
int lfactor = sysctl_svc_lfactor(ipvs);
int new_size = ip_vs_svc_desired_size(ipvs, NULL, lfactor);
+ rcu_read_unlock();
t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor);
if (!t_new) {
ret = -ENOMEM;
goto out_err;
}
+ grow = false;
+ } else {
+ /* Even the currently attached new table may need to grow */
+ t = rcu_dereference(t->new_tbl);
+ grow = ip_vs_get_num_services(ipvs) + 1 > t->u_thresh;
+ rcu_read_unlock();
}
if (!rcu_dereference_protected(ipvs->conn_tab, 1)) {
@@ -1800,6 +1836,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
goto out_err;
if (t_new) {
+ /* Add table for first time */
clear_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags);
rcu_assign_pointer(ipvs->svc_table, t_new);
t_new = NULL;
@@ -1831,8 +1868,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
ip_vs_svc_hash(svc);
/* Schedule resize work */
- if (t && ip_vs_get_num_services(ipvs) > t->u_thresh &&
- !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags))
+ if (grow && !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags))
queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work,
1);
@@ -2054,7 +2090,6 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
return -EEXIST;
ipvs = svc->ipvs;
ip_vs_unlink_service(svc, false);
- t = rcu_dereference_protected(ipvs->svc_table, 1);
/* Drop the table if no more services */
ns = ip_vs_get_num_services(ipvs);
@@ -2062,6 +2097,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/* Stop the resizer and drop the tables */
set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags);
cancel_delayed_work_sync(&ipvs->svc_resize_work);
+ t = rcu_dereference_protected(ipvs->svc_table, 1);
if (t) {
rcu_assign_pointer(ipvs->svc_table, NULL);
/* Inform readers that table is removed */
@@ -2075,11 +2111,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
t = p;
}
}
- } else if (ns <= t->l_thresh &&
- !test_and_set_bit(IP_VS_WORK_SVC_RESIZE,
- &ipvs->work_flags)) {
- queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work,
- 1);
+ } else {
+ bool shrink;
+
+ rcu_read_lock();
+ t = rcu_dereference(ipvs->svc_table);
+ /* Even the currently attached new table may need to shrink */
+ t = rcu_dereference(t->new_tbl);
+ shrink = ns <= t->l_thresh;
+ rcu_read_unlock();
+ if (shrink && !test_and_set_bit(IP_VS_WORK_SVC_RESIZE,
+ &ipvs->work_flags))
+ queue_delayed_work(system_unbound_wq,
+ &ipvs->svc_resize_work, 1);
}
return 0;
}
@@ -2184,17 +2228,21 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
struct ip_vs_service *svc;
struct hlist_bl_node *e;
struct ip_vs_dest *dest;
- int old_gen, new_gen;
+ int old_gen;
if (event != NETDEV_DOWN || !ipvs)
return NOTIFY_DONE;
IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
+ /* Allow concurrent rehashing on resize but to avoid loop
+ * serialize with installing the new table.
+ */
+ down_read(&ipvs->svc_replace_sem);
+
old_gen = atomic_read(&ipvs->svc_table_changes);
rcu_read_lock();
-repeat:
smp_rmb(); /* ipvs->svc_table and svc_table_changes */
ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) {
hlist_bl_for_each_entry_rcu(svc, e, head, s_list) {
@@ -2207,17 +2255,17 @@ repeat:
}
resched_score++;
if (resched_score >= 100) {
- resched_score = 0;
cond_resched_rcu();
- new_gen = atomic_read(&ipvs->svc_table_changes);
- /* New table installed ? */
- if (old_gen != new_gen) {
- old_gen = new_gen;
- goto repeat;
- }
+ /* Flushed? So no more dev refs */
+ if (atomic_read(&ipvs->svc_table_changes) != old_gen)
+ goto done;
+ resched_score = 0;
}
}
+
+done:
rcu_read_unlock();
+ up_read(&ipvs->svc_replace_sem);
return NOTIFY_DONE;
}
@@ -2244,6 +2292,10 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
struct ip_vs_service *svc;
struct hlist_bl_node *e;
+ /* svc_table can not be replaced (svc_replace_sem) or
+ * removed (service_mutex)
+ */
+ down_read(&ipvs->svc_replace_sem);
rcu_read_lock();
ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) {
@@ -2259,6 +2311,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
}
rcu_read_unlock();
+ up_read(&ipvs->svc_replace_sem);
ip_vs_zero_stats(&ipvs->tot_stats->s);
return 0;
@@ -3062,6 +3115,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v)
u32 sum;
int i;
+ /* Info for conns */
rcu_read_lock();
t = rcu_dereference(ipvs->conn_tab);
@@ -3123,6 +3177,12 @@ repeat_conn:
}
after_conns:
+ rcu_read_unlock();
+
+ /* Info for services */
+ down_read(&ipvs->svc_replace_sem);
+ rcu_read_lock();
+
t = rcu_dereference(ipvs->svc_table);
count = ip_vs_get_num_services(ipvs);
@@ -3133,9 +3193,7 @@ after_conns:
if (!count)
goto after_svc;
old_gen = atomic_read(&ipvs->svc_table_changes);
- loops = 0;
-repeat_svc:
smp_rmb(); /* ipvs->svc_table and svc_table_changes */
memset(counts, 0, sizeof(counts));
ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, pt) {
@@ -3157,15 +3215,10 @@ repeat_svc:
if (resched_score >= 100) {
resched_score = 0;
cond_resched_rcu();
- new_gen = atomic_read(&ipvs->svc_table_changes);
- /* New table installed ? */
- if (old_gen != new_gen) {
- /* Too many changes? */
- if (++loops >= 5)
- goto after_svc;
- old_gen = new_gen;
- goto repeat_svc;
- }
+ /* Flushed? */
+ if (atomic_read(&ipvs->svc_table_changes) !=
+ old_gen)
+ goto after_svc;
}
counts[count]++;
}
@@ -3184,6 +3237,9 @@ repeat_svc:
}
after_svc:
+ rcu_read_unlock();
+ up_read(&ipvs->svc_replace_sem);
+
seq_printf(seq, "Stats thread slots:\t%d (max %lu)\n",
ipvs->est_kt_count, ipvs->est_max_threads);
seq_printf(seq, "Stats chain max len:\t%d\n", ipvs->est_chain_max);
@@ -3191,7 +3247,6 @@ after_svc:
ipvs->est_chain_max * IPVS_EST_CHAIN_FACTOR *
IPVS_EST_NTICKS);
- rcu_read_unlock();
return 0;
}
@@ -3503,7 +3558,7 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
int ret = 0;
lockdep_assert_held(&ipvs->svc_resize_sem);
- /* All service modifications are disabled, go ahead */
+ /* All svc_table modifications are disabled, go ahead */
ip_vs_rht_walk_buckets(ipvs->svc_table, head) {
hlist_bl_for_each_entry(svc, e, head, s_list) {
/* Only expose IPv4 entries to old interface */
@@ -3687,7 +3742,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
pr_err("length: %u != %zu\n", *len, size);
return -EINVAL;
}
- /* Protect against table resizer moving the entries.
+ /* Prevent modifications to the list with services.
* Try reverse locking, so that we do not hold the mutex
* while waiting for semaphore.
*/
@@ -4029,6 +4084,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
int start = cb->args[0];
int idx = 0;
+ /* Make sure we do not see same service twice during resize */
down_read(&ipvs->svc_resize_sem);
rcu_read_lock();
ip_vs_rht_walk_buckets_safe_rcu(ipvs->svc_table, head) {
@@ -5072,6 +5128,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
/* Initialize service_mutex, svc_table per netns */
__mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key);
init_rwsem(&ipvs->svc_resize_sem);
+ init_rwsem(&ipvs->svc_replace_sem);
INIT_DELAYED_WORK(&ipvs->svc_resize_work, svc_resize_work_handler);
atomic_set(&ipvs->svc_table_changes, 0);
RCU_INIT_POINTER(ipvs->svc_table, NULL);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index b594cd244fe1..17e971bd4c74 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -321,8 +321,8 @@ __printf(3, 4)
void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
const char *fmt, ...)
{
+ const char *helper_name = "(null)";
const struct nf_conn_help *help;
- const struct nf_conntrack_helper *helper;
struct va_format vaf;
va_list args;
@@ -331,14 +331,17 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
vaf.fmt = fmt;
vaf.va = &args;
- /* Called from the helper function, this call never fails */
help = nfct_help(ct);
+ if (help) {
+ const struct nf_conntrack_helper *helper;
- /* rcu_read_lock()ed by nf_hook_thresh */
- helper = rcu_dereference(help->helper);
+ helper = rcu_dereference(help->helper);
+ if (helper)
+ helper_name = helper->name;
+ }
nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
- "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
+ "helper %s dropping packet: %pV ", helper_name, &vaf);
va_end(args);
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a6c81c04b3a5..57b450024a99 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -61,6 +61,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
struct nf_hook_state *state = &entry->state;
/* Release those devices we held, or Alexey will kill me. */
+ dev_put(entry->skb_dev);
dev_put(state->in);
dev_put(state->out);
if (state->sk)
@@ -102,6 +103,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
return false;
+ dev_hold(entry->skb_dev);
dev_hold(state->in);
dev_hold(state->out);
@@ -202,11 +204,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
*entry = (struct nf_queue_entry) {
.skb = skb,
+ .skb_dev = skb->dev,
.state = *state,
.hook_index = index,
.size = sizeof(*entry) + route_key_size,
};
-
__nf_queue_entry_init_physdevs(entry);
if (!nf_queue_entry_get_refs(entry)) {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 58304fd1f70f..984a0eb9e149 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1212,6 +1212,8 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
if (physinif == ifindex || physoutif == ifindex)
return 1;
#endif
+ if (entry->skb_dev && entry->skb_dev->ifindex == ifindex)
+ return 1;
if (entry->state.in)
if (entry->state.in->ifindex == ifindex)
return 1;
diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c
index 03ffb1159fc1..d14ca157910b 100644
--- a/net/netfilter/nft_inner.c
+++ b/net/netfilter/nft_inner.c
@@ -163,7 +163,6 @@ static int nft_inner_parse_l2l3(const struct nft_inner *priv,
return -1;
if (fragoff == 0) {
- thoff = nhoff + sizeof(_ip6h);
ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
ctx->inner_thoff = thoff;
ctx->l4proto = l4proto;
@@ -247,8 +246,8 @@ static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
- local_bh_enable();
local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
+ local_bh_enable();
return false;
}
*tun_ctx = *this_cpu_tun_ctx;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index c42642075685..e7e8490a53d8 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -83,6 +83,14 @@ struct vport *ovs_netdev_link(struct vport *vport, bool tunnel)
}
rtnl_lock();
+ /* Do not link devices that are not registered to avoid a potential
+ * race with the NETDEV_UNREGISTER notification in dp_device_event().
+ */
+ if (vport->dev->reg_state != NETREG_REGISTERED) {
+ err = -ENODEV;
+ goto error_put_unlock;
+ }
+
err = netdev_master_upper_dev_link(vport->dev,
get_dpdev(vport->dp),
NULL, NULL, NULL);
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 4dbf0914df7d..706927139393 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -671,8 +671,23 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
/* Look for an existing pipe handle */
sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
- if (sknode)
- return sk_receive_skb(sknode, skb, 1);
+ if (sknode) {
+ int rc;
+
+ /* pep_do_rcv() runs from two contexts: from softirq via
+ * phonet_rcv() -> __sk_receive_skb() with BH disabled,
+ * and from process context via
+ * release_sock() -> __release_sock(), which drops
+ * the listener slock with spin_unlock_bh() before draining
+ * the backlog. The child pipe slock is taken below via
+ * bh_lock_sock_nested(), which does not itself disable BH, so
+ * disable BH here to keep both acquire contexts consistent.
+ */
+ local_bh_disable();
+ rc = sk_receive_skb(sknode, skb, 1);
+ local_bh_enable();
+ return rc;
+ }
switch (hdr->message_id) {
case PNS_PEP_CONNECT_REQ:
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 654e23d13e3d..5830b31a1f37 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -198,8 +198,13 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
write_lock_bh(&sock->sk->sk_callback_lock);
- /* done under the callback_lock to serialize with write_space */
+ /* done under the callback_lock to serialize with write_space.
+ * Set t_sock inside rds_tcp_tc_list_lock so readers walking
+ * rds_tcp_tc_list under the same lock cannot observe an
+ * entry whose t_sock is NULL.
+ */
spin_lock(&rds_tcp_tc_list_lock);
+ tc->t_sock = sock;
list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
#if IS_ENABLED(CONFIG_IPV6)
rds6_tcp_tc_count++;
@@ -211,8 +216,6 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
/* accepted sockets need our listen data ready undone */
if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
sock->sk->sk_data_ready = sock->sk->sk_user_data;
-
- tc->t_sock = sock;
if (!tc->t_rtn)
tc->t_rtn = net_generic(sock_net(sock->sk), rds_tcp_netid);
tc->t_cpath = cp;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 27c2aa2dd023..98f2165159d7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -213,8 +213,6 @@ struct rxrpc_skb_priv {
struct {
u16 offset; /* Offset of data */
u16 len; /* Length of data */
- u8 flags;
-#define RXRPC_RX_VERIFIED 0x01
};
struct {
rxrpc_seq_t first_ack; /* First packet in acks table */
@@ -309,15 +307,16 @@ struct rxrpc_security {
struct sk_buff *challenge);
/* verify a response */
- int (*verify_response)(struct rxrpc_connection *,
- struct sk_buff *);
+ int (*verify_response)(struct rxrpc_connection *conn,
+ struct sk_buff *response_skb,
+ void *response, unsigned int len);
/* clear connection security */
void (*clear)(struct rxrpc_connection *);
/* Default ticket -> key decoder */
int (*default_decode_ticket)(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int ticket_offset, unsigned int ticket_len,
+ void *ticket, unsigned int ticket_len,
struct key **_key);
};
@@ -774,6 +773,11 @@ struct rxrpc_call {
struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */
struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */
struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */
+ void *rx_dec_buffer; /* Decryption buffer */
+ unsigned short rx_dec_bsize; /* rx_dec_buffer size */
+ unsigned short rx_dec_offset; /* Decrypted packet data offset */
+ unsigned short rx_dec_len; /* Decrypted packet data len */
+ rxrpc_seq_t rx_dec_seq; /* Packet in decryption buffer */
rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */
rxrpc_seq_t rx_consumed; /* Highest packet consumed */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 2b19b252225e..fec59d9338b9 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -332,27 +332,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK;
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- sp->hdr.securityIndex != 0 &&
- (skb_cloned(skb) ||
- skb_has_frag_list(skb) ||
- skb_has_shared_frag(skb))) {
- /* Unshare the packet so that it can be
- * modified by in-place decryption.
- */
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
-
- if (nskb) {
- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared);
- rxrpc_input_call_packet(call, nskb);
- rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx);
- } else {
- /* OOM - Drop the packet. */
- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem);
- }
- } else {
- rxrpc_input_call_packet(call, skb);
- }
+ rxrpc_input_call_packet(call, skb);
rxrpc_free_skb(skb, rxrpc_skb_put_call_rx);
did_receive = true;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f035f486c139..fcb9d38bb521 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -152,6 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
spin_lock_init(&call->notify_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
+ call->rx_pkt_offset = USHRT_MAX;
call->tx_total_len = -1;
call->tx_jumbo_max = 1;
call->next_rx_timo = 20 * HZ;
@@ -553,6 +554,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call)
rxrpc_purge_queue(&call->recvmsg_queue);
rxrpc_purge_queue(&call->rx_queue);
rxrpc_purge_queue(&call->rx_oos_queue);
+ kfree(call->rx_dec_buffer);
}
/*
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 442414d90ba1..c96ca615b787 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -243,28 +243,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
static int rxrpc_verify_response(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
+ unsigned int len = skb->len - sizeof(struct rxrpc_wire_header);
+ void *buffer;
int ret;
- if (skb_cloned(skb) || skb_has_frag_list(skb) ||
- skb_has_shared_frag(skb)) {
- /* Copy the packet if shared so that we can do in-place
- * decryption.
- */
- struct sk_buff *nskb = skb_copy(skb, GFP_NOFS);
-
- if (nskb) {
- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared);
- ret = conn->security->verify_response(conn, nskb);
- rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy);
- } else {
- /* OOM - Drop the packet. */
- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem);
- ret = -ENOMEM;
- }
- } else {
- ret = conn->security->verify_response(conn, skb);
- }
+ buffer = kmalloc(len, GFP_NOFS);
+ if (!buffer)
+ return -ENOMEM;
+
+ ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), buffer, len);
+ if (ret < 0)
+ goto out;
+
+ ret = conn->security->verify_response(conn, skb, buffer, len);
+out:
+ kfree(buffer);
return ret;
}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 0a260df45d25..0b39046bdc61 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -32,9 +32,6 @@ static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- sp->flags |= RXRPC_RX_VERIFIED;
return 0;
}
@@ -57,9 +54,10 @@ static int none_sendmsg_respond_to_challenge(struct sk_buff *challenge,
}
static int none_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *response_skb,
+ void *response, unsigned int len)
{
- return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ return rxrpc_abort_conn(conn, response_skb, RX_PROTOCOL_ERROR, -EPROTO,
rxrpc_eproto_rxnull_response);
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index e1f7513a46db..c940600117a4 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -147,15 +147,52 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
}
/*
- * Decrypt and verify a DATA packet.
+ * Decrypt and verify a DATA packet. The content of the packet is pulled out
+ * into a flat buffer rather than decrypting in place in the skbuff. This also
+ * has the advantage of aligning the buffer correctly for the crypto routines.
+ *
+ * We keep track of the sequence number of the packet currently decrypted into
+ * the buffer in ->rx_dec_seq. If MSG_PEEK is used and steps onto a new
+ * packet, subsequent recvmsg() calls will have to go back and re-decrypt the
+ * current packet.
*/
static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ int ret;
- if (sp->flags & RXRPC_RX_VERIFIED)
- return 0;
- return call->security->verify_packet(call, skb);
+ if (sp->len > call->rx_dec_bsize) {
+ /* Make sure we can hold a 1412-byte jumbo subpacket and make
+ * sure that the buffer size is aligned to a crypto blocksize.
+ */
+ size_t size = clamp(round_up(sp->len, 32), 2048, 65535);
+ void *buffer = krealloc(call->rx_dec_buffer, size, GFP_NOFS);
+
+ if (!buffer)
+ return -ENOMEM;
+ call->rx_dec_buffer = buffer;
+ call->rx_dec_bsize = size;
+ }
+
+ ret = -EFAULT;
+ if (skb_copy_bits(skb, sp->offset, call->rx_dec_buffer, sp->len) < 0)
+ goto err;
+
+ call->rx_dec_offset = 0;
+ call->rx_dec_len = sp->len;
+ call->rx_dec_seq = sp->hdr.seq;
+ ret = call->security->verify_packet(call, skb);
+ if (ret < 0)
+ goto err;
+ return 0;
+
+err:
+ kfree(call->rx_dec_buffer);
+ call->rx_dec_buffer = NULL;
+ call->rx_dec_bsize = 0;
+ call->rx_dec_offset = 0;
+ call->rx_dec_len = 0;
+ return ret;
}
/*
@@ -283,16 +320,21 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (msg)
sock_recv_timestamp(msg, sock->sk, skb);
- if (rx_pkt_offset == 0) {
+ if (call->rx_dec_seq != sp->hdr.seq ||
+ !call->rx_dec_buffer) {
ret2 = rxrpc_verify_data(call, skb);
trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
- sp->offset, sp->len, ret2);
+ call->rx_dec_offset,
+ call->rx_dec_len, ret2);
if (ret2 < 0) {
ret = ret2;
goto out;
}
- rx_pkt_offset = sp->offset;
- rx_pkt_len = sp->len;
+ }
+
+ if (rx_pkt_offset == USHRT_MAX) {
+ rx_pkt_offset = call->rx_dec_offset;
+ rx_pkt_len = call->rx_dec_len;
} else {
trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
rx_pkt_offset, rx_pkt_len, 0);
@@ -304,10 +346,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (copy > remain)
copy = remain;
if (copy > 0) {
- ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter,
- copy);
- if (ret2 < 0) {
- ret = ret2;
+ ret2 = copy_to_iter(call->rx_dec_buffer + rx_pkt_offset,
+ copy, iter);
+ if (ret2 != copy) {
+ ret = -EFAULT;
goto out;
}
@@ -328,7 +370,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
/* The whole packet has been transferred. */
if (sp->hdr.flags & RXRPC_LAST_PACKET)
ret = 1;
- rx_pkt_offset = 0;
+ rx_pkt_offset = USHRT_MAX;
rx_pkt_len = 0;
skb = skb_peek_next(skb, &call->recvmsg_queue);
diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c
index 0d5e654da918..a1ee102abae1 100644
--- a/net/rxrpc/rxgk.c
+++ b/net/rxrpc/rxgk.c
@@ -473,15 +473,20 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxgk_header *hdr;
struct krb5_buffer metadata;
- unsigned int offset = sp->offset, len = sp->len;
+ unsigned int len = call->rx_dec_len;
size_t data_offset = 0, data_len = len;
+ void *data = call->rx_dec_buffer, *p = data;
u32 ac = 0;
int ret = -ENOMEM;
_enter("");
- crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE,
- &data_offset, &data_len);
+ if (crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE,
+ &data_offset, &data_len) < 0) {
+ ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
+ rxgk_abort_1_short_header);
+ goto put_gk;
+ }
hdr = kzalloc_obj(*hdr, GFP_NOFS);
if (!hdr)
@@ -496,16 +501,15 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call,
metadata.len = sizeof(*hdr);
metadata.data = hdr;
- ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata,
- skb, &offset, &len, &ac);
+ ret = rxgk_verify_mic(gk->krb5, gk->rx_Kc, &metadata, &p, &len, &ac);
kfree(hdr);
if (ret < 0) {
if (ret != -ENOMEM)
rxrpc_abort_eproto(call, skb, ac,
rxgk_abort_1_verify_mic_eproto);
} else {
- sp->offset = offset;
- sp->len = len;
+ call->rx_dec_offset = p - data;
+ call->rx_dec_len = len;
}
put_gk:
@@ -522,49 +526,53 @@ static int rxgk_verify_packet_encrypted(struct rxrpc_call *call,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxgk_header hdr;
- unsigned int offset = sp->offset, len = sp->len;
+ struct rxgk_header *hdr;
+ unsigned int offset = 0, len = call->rx_dec_len;
+ void *data = call->rx_dec_buffer, *p = data;
int ret;
u32 ac = 0;
_enter("");
- ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac);
+ if (crypto_krb5_check_data_len(gk->krb5, KRB5_ENCRYPT_MODE,
+ len, sizeof(*hdr)) < 0) {
+ ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
+ rxgk_abort_2_short_header);
+ goto error;
+ }
+
+ ret = rxgk_decrypt(gk->krb5, gk->rx_enc, &p, &len, &ac);
if (ret < 0) {
if (ret != -ENOMEM)
rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto);
goto error;
}
+ offset = p - data;
- if (len < sizeof(hdr)) {
+ if (len < sizeof(*hdr)) {
ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
rxgk_abort_2_short_header);
goto error;
}
/* Extract the header from the skb */
- ret = skb_copy_bits(skb, offset, &hdr, sizeof(hdr));
- if (ret < 0) {
- ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
- rxgk_abort_2_short_encdata);
- goto error;
- }
- offset += sizeof(hdr);
- len -= sizeof(hdr);
-
- if (ntohl(hdr.epoch) != call->conn->proto.epoch ||
- ntohl(hdr.cid) != call->cid ||
- ntohl(hdr.call_number) != call->call_id ||
- ntohl(hdr.seq) != sp->hdr.seq ||
- ntohl(hdr.sec_index) != call->security_ix ||
- ntohl(hdr.data_len) > len) {
+ hdr = data + offset;
+ offset += sizeof(*hdr);
+ len -= sizeof(*hdr);
+
+ if (ntohl(hdr->epoch) != call->conn->proto.epoch ||
+ ntohl(hdr->cid) != call->cid ||
+ ntohl(hdr->call_number) != call->call_id ||
+ ntohl(hdr->seq) != sp->hdr.seq ||
+ ntohl(hdr->sec_index) != call->security_ix ||
+ ntohl(hdr->data_len) > len) {
ret = rxrpc_abort_eproto(call, skb, RXGK_SEALEDINCON,
rxgk_abort_2_short_data);
goto error;
}
- sp->offset = offset;
- sp->len = ntohl(hdr.data_len);
+ call->rx_dec_offset = offset;
+ call->rx_dec_len = ntohl(hdr->data_len);
ret = 0;
error:
rxgk_put(gk);
@@ -1076,11 +1084,12 @@ static int rxgk_sendmsg_respond_to_challenge(struct sk_buff *challenge,
* unsigned int call_numbers<>;
* };
*/
-static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn,
- const struct krb5_enctype *krb5,
- struct sk_buff *skb,
- __be32 *p, __be32 *end)
+static int rxgk_verify_authenticator(struct rxrpc_connection *conn,
+ const struct krb5_enctype *krb5,
+ struct sk_buff *skb,
+ void *auth, unsigned int auth_len)
{
+ __be32 *p = auth, *end = auth + auth_len;
u32 app_len, call_count, level, epoch, cid, i;
_enter("");
@@ -1144,37 +1153,6 @@ static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn,
}
/*
- * Extract the authenticator and verify it.
- */
-static int rxgk_verify_authenticator(struct rxrpc_connection *conn,
- const struct krb5_enctype *krb5,
- struct sk_buff *skb,
- unsigned int auth_offset, unsigned int auth_len)
-{
- void *auth;
- __be32 *p;
- int ret;
-
- auth = kmalloc(auth_len, GFP_NOFS);
- if (!auth)
- return -ENOMEM;
-
- ret = skb_copy_bits(skb, auth_offset, auth, auth_len);
- if (ret < 0) {
- ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
- rxgk_abort_resp_short_auth);
- goto error;
- }
-
- p = auth;
- ret = rxgk_do_verify_authenticator(conn, krb5, skb, p,
- p + auth_len / sizeof(*p));
-error:
- kfree(auth);
- return ret;
-}
-
-/*
* Verify a response.
*
* struct RXGK_Response {
@@ -1184,49 +1162,45 @@ error:
* };
*/
static int rxgk_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ void *buffer, unsigned int len)
{
const struct krb5_enctype *krb5;
struct rxrpc_key_token *token;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxgk_response rhdr;
+ struct rxgk_response *rhdr;
struct rxgk_context *gk;
struct key *key = NULL;
- unsigned int offset = sizeof(struct rxrpc_wire_header);
- unsigned int len = skb->len - sizeof(struct rxrpc_wire_header);
- unsigned int token_offset, token_len;
- unsigned int auth_offset, auth_len;
+ unsigned int resp_token_len, auth_len;
+ void *resp_token, *auth;
__be32 xauth_len;
int ret, ec;
_enter("{%d}", conn->debug_id);
/* Parse the RXGK_Response object */
- if (sizeof(rhdr) + sizeof(__be32) > len)
+ if (len < sizeof(*rhdr) + sizeof(__be32))
goto short_packet;
-
- if (skb_copy_bits(skb, offset, &rhdr, sizeof(rhdr)) < 0)
- goto short_packet;
- offset += sizeof(rhdr);
- len -= sizeof(rhdr);
-
- token_offset = offset;
- token_len = ntohl(rhdr.token_len);
- if (token_len > len ||
- xdr_round_up(token_len) + sizeof(__be32) > len)
+ rhdr = buffer;
+ buffer += sizeof(*rhdr);
+ len -= sizeof(*rhdr);
+
+ resp_token = buffer;
+ resp_token_len = ntohl(rhdr->token_len);
+ if (resp_token_len > len ||
+ xdr_round_up(resp_token_len) + sizeof(__be32) > len)
goto short_packet;
- trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, token_len);
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, resp_token_len);
- offset += xdr_round_up(token_len);
- len -= xdr_round_up(token_len);
+ buffer += xdr_round_up(resp_token_len);
+ len -= xdr_round_up(resp_token_len);
- if (skb_copy_bits(skb, offset, &xauth_len, sizeof(xauth_len)) < 0)
- goto short_packet;
- offset += sizeof(xauth_len);
+ xauth_len = *(__be32 *)buffer;
+ buffer += sizeof(xauth_len);
len -= sizeof(xauth_len);
- auth_offset = offset;
+ auth = buffer;
auth_len = ntohl(xauth_len);
if (auth_len > len)
goto short_packet;
@@ -1241,7 +1215,7 @@ static int rxgk_verify_response(struct rxrpc_connection *conn,
* to the app to deal with - which might mean a round trip to
* userspace.
*/
- ret = rxgk_extract_token(conn, skb, token_offset, token_len, &key);
+ ret = rxgk_extract_token(conn, skb, resp_token, resp_token_len, &key);
if (ret < 0)
goto out;
@@ -1255,7 +1229,7 @@ static int rxgk_verify_response(struct rxrpc_connection *conn,
*/
token = key->payload.data[0];
conn->security_level = token->rxgk->level;
- conn->rxgk.start_time = __be64_to_cpu(rhdr.start_time);
+ conn->rxgk.start_time = __be64_to_cpu(rhdr->start_time);
gk = rxgk_generate_transport_key(conn, token->rxgk, sp->hdr.cksum, GFP_NOFS);
if (IS_ERR(gk)) {
@@ -1265,18 +1239,18 @@ static int rxgk_verify_response(struct rxrpc_connection *conn,
krb5 = gk->krb5;
- trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, token_len);
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum,
+ resp_token_len);
/* Decrypt, parse and verify the authenticator. */
- ret = rxgk_decrypt_skb(krb5, gk->resp_enc, skb,
- &auth_offset, &auth_len, &ec);
+ ret = rxgk_decrypt(krb5, gk->resp_enc, &auth, &auth_len, &ec);
if (ret < 0) {
rxrpc_abort_conn(conn, skb, RXGK_SEALEDINCON, ret,
rxgk_abort_resp_auth_dec);
goto out_gk;
}
- ret = rxgk_verify_authenticator(conn, krb5, skb, auth_offset, auth_len);
+ ret = rxgk_verify_authenticator(conn, krb5, skb, auth, auth_len);
if (ret < 0)
goto out_gk;
diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c
index 0ef2a29eb695..200a30064fae 100644
--- a/net/rxrpc/rxgk_app.c
+++ b/net/rxrpc/rxgk_app.c
@@ -40,7 +40,7 @@
* };
*/
int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int ticket_offset, unsigned int ticket_len,
+ void *buffer, unsigned int ticket_len,
struct key **_key)
{
struct rxrpc_key_token *token;
@@ -49,7 +49,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
size_t pre_ticket_len, payload_len;
unsigned int klen, enctype;
void *payload, *ticket;
- __be32 *t, *p, *q, tmp[2];
+ __be32 *t, *p, *q, *tmp;
int ret;
_enter("");
@@ -59,10 +59,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
rxgk_abort_resp_short_yfs_tkt);
/* Get the session key length */
- ret = skb_copy_bits(skb, ticket_offset, tmp, sizeof(tmp));
- if (ret < 0)
- return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
- rxgk_abort_resp_short_yfs_klen);
+ tmp = buffer;
enctype = ntohl(tmp[0]);
klen = ntohl(tmp[1]);
@@ -84,12 +81,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
* it.
*/
ticket = payload + pre_ticket_len;
- ret = skb_copy_bits(skb, ticket_offset, ticket, ticket_len);
- if (ret < 0) {
- ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
- rxgk_abort_resp_short_yfs_tkt);
- goto error;
- }
+ memcpy(ticket, buffer, ticket_len);
/* Fill out the form header. */
p = payload;
@@ -131,7 +123,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
goto error;
}
- /* Ticket read in with skb_copy_bits above */
+ /* Ticket appended above. */
q += xdr_round_up(ticket_len) / 4;
if (WARN_ON((unsigned long)q - (unsigned long)payload != payload_len)) {
ret = -EIO;
@@ -182,14 +174,15 @@ error:
* [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-afs-08 sec 6.1]
*/
int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int token_offset, unsigned int token_len,
+ void *token, unsigned int token_len,
struct key **_key)
{
const struct krb5_enctype *krb5;
const struct krb5_buffer *server_secret;
struct crypto_aead *token_enc = NULL;
struct key *server_key;
- unsigned int ticket_offset, ticket_len;
+ unsigned int ticket_len;
+ void *ticket;
u32 kvno, enctype;
int ret, ec = 0;
@@ -197,24 +190,23 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
__be32 kvno;
__be32 enctype;
__be32 token_len;
- } container;
+ } *container;
- if (token_len < sizeof(container))
+ if (token_len < sizeof(*container))
goto short_packet;
/* Decode the RXGK_TokenContainer object. This tells us which server
* key we should be using. We can then fetch the key, get the secret
* and set up the crypto to extract the token.
*/
- if (skb_copy_bits(skb, token_offset, &container, sizeof(container)) < 0)
- goto short_packet;
+ container = token;
+ token += sizeof(*container);
- kvno = ntohl(container.kvno);
- enctype = ntohl(container.enctype);
- ticket_len = ntohl(container.token_len);
- ticket_offset = token_offset + sizeof(container);
+ kvno = ntohl(container->kvno);
+ enctype = ntohl(container->enctype);
+ ticket_len = ntohl(container->token_len);
- if (ticket_len > xdr_round_down(token_len - sizeof(container)))
+ if (ticket_len > xdr_round_down(token_len - sizeof(*container)))
goto short_packet;
_debug("KVNO %u", kvno);
@@ -237,8 +229,8 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
* gain access to K0, from which we can derive the transport key and
* thence decode the authenticator.
*/
- ret = rxgk_decrypt_skb(krb5, token_enc, skb,
- &ticket_offset, &ticket_len, &ec);
+ ticket = token;
+ ret = rxgk_decrypt(krb5, token_enc, &ticket, &ticket_len, &ec);
crypto_free_aead(token_enc);
token_enc = NULL;
if (ret < 0) {
@@ -248,7 +240,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
return ret;
}
- ret = conn->security->default_decode_ticket(conn, skb, ticket_offset,
+ ret = conn->security->default_decode_ticket(conn, skb, ticket,
ticket_len, _key);
if (ret < 0)
goto cant_get_token;
diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h
index 1e257d7ab8ec..3deed5863f5a 100644
--- a/net/rxrpc/rxgk_common.h
+++ b/net/rxrpc/rxgk_common.h
@@ -41,10 +41,10 @@ struct rxgk_context {
* rxgk_app.c
*/
int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int ticket_offset, unsigned int ticket_len,
+ void *ticket, unsigned int ticket_len,
struct key **_key);
int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
- unsigned int token_offset, unsigned int token_len,
+ void *token, unsigned int token_len,
struct key **_key);
/*
@@ -62,31 +62,30 @@ int rxgk_set_up_token_cipher(const struct krb5_buffer *server_key,
gfp_t gfp);
/*
- * Apply decryption and checksumming functions to part of an skbuff. The
- * offset and length are updated to reflect the actual content of the encrypted
+ * Apply decryption and checksumming functions a flat data buffer. The data
+ * point and length are updated to reflect the actual content of the encrypted
* region.
*/
-static inline
-int rxgk_decrypt_skb(const struct krb5_enctype *krb5,
- struct crypto_aead *aead,
- struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len,
- int *_error_code)
+static inline int rxgk_decrypt(const struct krb5_enctype *krb5,
+ struct crypto_aead *aead,
+ void **_data, unsigned int *_len,
+ int *_error_code)
{
- struct scatterlist sg[16];
+ struct scatterlist sg[1];
size_t offset = 0, len = *_len;
- int nr_sg, ret;
+ int ret;
- sg_init_table(sg, ARRAY_SIZE(sg));
- nr_sg = skb_to_sgvec(skb, sg, *_offset, len);
- if (unlikely(nr_sg < 0))
- return nr_sg;
+ sg_init_one(sg, *_data, len);
- ret = crypto_krb5_decrypt(krb5, aead, sg, nr_sg,
- &offset, &len);
+ ret = crypto_krb5_decrypt(krb5, aead, sg, 1, &offset, &len);
switch (ret) {
case 0:
- *_offset += offset;
+ if (offset & 3) {
+ *_error_code = RXGK_INCONSISTENCY;
+ ret = -EPROTO;
+ break;
+ }
+ *_data += offset;
*_len = len;
break;
case -EBADMSG: /* Checksum mismatch. */
@@ -106,31 +105,26 @@ int rxgk_decrypt_skb(const struct krb5_enctype *krb5,
}
/*
- * Check the MIC on a region of an skbuff. The offset and length are updated
- * to reflect the actual content of the secure region.
+ * Check the MIC on a flat buffer. The data pointer and length are updated to
+ * reflect the actual content of the secure region.
*/
static inline
-int rxgk_verify_mic_skb(const struct krb5_enctype *krb5,
- struct crypto_shash *shash,
- const struct krb5_buffer *metadata,
- struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len,
- u32 *_error_code)
+int rxgk_verify_mic(const struct krb5_enctype *krb5,
+ struct crypto_shash *shash,
+ const struct krb5_buffer *metadata,
+ void **_data, unsigned int *_len,
+ u32 *_error_code)
{
- struct scatterlist sg[16];
+ struct scatterlist sg[1];
size_t offset = 0, len = *_len;
- int nr_sg, ret;
+ int ret;
- sg_init_table(sg, ARRAY_SIZE(sg));
- nr_sg = skb_to_sgvec(skb, sg, *_offset, len);
- if (unlikely(nr_sg < 0))
- return nr_sg;
+ sg_init_one(sg, *_data, len);
- ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, nr_sg,
- &offset, &len);
+ ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, 1, &offset, &len);
switch (ret) {
case 0:
- *_offset += offset;
+ *_data += offset;
*_len = len;
break;
case -EBADMSG: /* Checksum mismatch */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index cba7935977f0..6fbd883401ac 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -430,27 +430,25 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_seq_t seq,
struct skcipher_request *req)
{
- struct rxkad_level1_hdr sechdr;
+ struct rxkad_level1_hdr *sechdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
- struct scatterlist sg[16];
- u32 data_size, buf;
+ struct scatterlist sg[1];
+ void *data = call->rx_dec_buffer;
+ u32 len = sp->len, data_size, buf;
u16 check;
int ret;
_enter("");
- if (sp->len < 8)
+ if (len < 8)
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_1_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
*/
- sg_init_table(sg, ARRAY_SIZE(sg));
- ret = skb_to_sgvec(skb, sg, sp->offset, 8);
- if (unlikely(ret < 0))
- return ret;
+ sg_init_one(sg, data, len);
/* start the decryption afresh */
memset(&iv, 0, sizeof(iv));
@@ -464,13 +462,11 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
return ret;
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
- return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
- rxkad_abort_1_short_encdata);
- sp->offset += sizeof(sechdr);
- sp->len -= sizeof(sechdr);
+ sechdr = data;
+ call->rx_dec_offset = sizeof(*sechdr);
+ len -= sizeof(*sechdr);
- buf = ntohl(sechdr.data_size);
+ buf = ntohl(sechdr->data_size);
data_size = buf & 0xffff;
check = buf >> 16;
@@ -479,10 +475,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
if (check != 0)
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_1_short_check);
- if (data_size > sp->len)
+ if (data_size > len)
return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
rxkad_abort_1_short_data);
- sp->len = data_size;
+ call->rx_dec_len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
@@ -496,43 +492,28 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct skcipher_request *req)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr sechdr;
+ struct rxkad_level2_hdr *sechdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
- struct scatterlist _sg[4], *sg;
- u32 data_size, buf;
+ struct scatterlist sg[1];
+ void *data = call->rx_dec_buffer;
+ u32 len = sp->len, data_size, buf;
u16 check;
- int nsg, ret;
+ int ret;
- _enter(",{%d}", sp->len);
+ _enter(",{%d}", len);
- if (sp->len < 8)
+ if (len < 8)
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_2_short_header);
/* Don't let the crypto algo see a misaligned length. */
- sp->len = round_down(sp->len, 8);
+ len = round_down(len, 8);
- /* Decrypt the skbuff in-place. TODO: We really want to decrypt
- * directly into the target buffer.
+ /* Decrypt in place in the call's decryption buffer. TODO: We really
+ * want to decrypt directly into the target buffer.
*/
- sg = _sg;
- nsg = skb_shinfo(skb)->nr_frags + 1;
- if (nsg <= 4) {
- nsg = 4;
- } else {
- sg = kmalloc_objs(*sg, nsg, GFP_NOIO);
- if (!sg)
- return -ENOMEM;
- }
-
- sg_init_table(sg, nsg);
- ret = skb_to_sgvec(skb, sg, sp->offset, sp->len);
- if (unlikely(ret < 0)) {
- if (sg != _sg)
- kfree(sg);
- return ret;
- }
+ sg_init_one(sg, data, len);
/* decrypt from the session key */
token = call->conn->key->payload.data[0];
@@ -540,11 +521,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x);
+ skcipher_request_set_crypt(req, sg, sg, len, iv.x);
ret = crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
- if (sg != _sg)
- kfree(sg);
if (ret < 0) {
if (ret == -ENOMEM)
return ret;
@@ -553,13 +532,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
}
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
- return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
- rxkad_abort_2_short_len);
- sp->offset += sizeof(sechdr);
- sp->len -= sizeof(sechdr);
+ sechdr = data;
+ call->rx_dec_offset = sizeof(*sechdr);
+ len -= sizeof(*sechdr);
- buf = ntohl(sechdr.data_size);
+ buf = ntohl(sechdr->data_size);
data_size = buf & 0xffff;
check = buf >> 16;
@@ -569,17 +546,18 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
rxkad_abort_2_short_check);
- if (data_size > sp->len)
+ if (data_size > len)
return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
rxkad_abort_2_short_data);
- sp->len = data_size;
+ call->rx_dec_len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
}
/*
- * Verify the security on a received packet and the subpackets therein.
+ * Verify the security on a received (sub)packet. If the packet needs
+ * modifying (e.g. decrypting), it must be copied.
*/
static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
@@ -985,7 +963,6 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
*_expiry = 0;
ASSERT(server_key->payload.data[0] != NULL);
- ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
memcpy(&iv, &server_key->payload.data[2], sizeof(iv));
@@ -1134,14 +1111,15 @@ unlock:
* verify a response
*/
static int rxkad_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ void *buffer, unsigned int len)
{
struct rxkad_response *response;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
struct key *server_key;
time64_t expiry;
- void *ticket = NULL;
+ void *ticket;
u32 version, kvno, ticket_len, level;
__be32 csum;
int ret, i;
@@ -1164,13 +1142,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
}
}
- ret = -ENOMEM;
- response = kzalloc_obj(struct rxkad_response, GFP_NOFS);
- if (!response)
- goto error;
-
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- response, sizeof(*response)) < 0) {
+ response = buffer;
+ if (len < sizeof(*response)) {
ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
rxkad_abort_resp_short);
goto error;
@@ -1182,6 +1155,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
+ buffer += sizeof(*response);
+ len -= sizeof(*response);
+
if (version != RXKAD_VERSION) {
ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
rxkad_abort_resp_version);
@@ -1201,13 +1177,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
}
/* extract the kerberos ticket and decrypt and decode it */
- ret = -ENOMEM;
- ticket = kmalloc(ticket_len, GFP_NOFS);
- if (!ticket)
- goto error;
-
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
- ticket, ticket_len) < 0) {
+ ticket = buffer;
+ if (ticket_len > len) {
ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
rxkad_abort_resp_short_tkt);
goto error;
@@ -1287,8 +1258,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
error:
- kfree(ticket);
- kfree(response);
key_put(server_key);
_leave(" = %d", ret);
return ret;
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
index b1c65110f04d..dea9270f3e57 100644
--- a/net/shaper/shaper.c
+++ b/net/shaper/shaper.c
@@ -138,35 +138,58 @@ handle_nest_cancel:
return -EMSGSIZE;
}
+static void net_shaper_copy(struct net_shaper *dst,
+ const struct net_shaper *src)
+{
+ WRITE_ONCE(dst->parent.scope, READ_ONCE(src->parent.scope));
+ WRITE_ONCE(dst->parent.id, READ_ONCE(src->parent.id));
+ WRITE_ONCE(dst->handle.scope, READ_ONCE(src->handle.scope));
+ WRITE_ONCE(dst->handle.id, READ_ONCE(src->handle.id));
+
+ WRITE_ONCE(dst->metric, READ_ONCE(src->metric));
+ WRITE_ONCE(dst->bw_min, READ_ONCE(src->bw_min));
+ WRITE_ONCE(dst->bw_max, READ_ONCE(src->bw_max));
+ WRITE_ONCE(dst->burst, READ_ONCE(src->burst));
+ WRITE_ONCE(dst->priority, READ_ONCE(src->priority));
+ WRITE_ONCE(dst->weight, READ_ONCE(src->weight));
+
+ /* private fields are only used on the write path under the lock */
+ data_race(dst->leaves = src->leaves);
+}
+
static int
net_shaper_fill_one(struct sk_buff *msg,
const struct net_shaper_binding *binding,
const struct net_shaper *shaper,
const struct genl_info *info)
{
+ struct net_shaper cur;
void *hdr;
hdr = genlmsg_iput(msg, info);
if (!hdr)
return -EMSGSIZE;
+ /* Make a copy to avoid data races */
+ net_shaper_copy(&cur, shaper);
+
if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) ||
- net_shaper_fill_handle(msg, &shaper->parent,
+ net_shaper_fill_handle(msg, &cur.parent,
NET_SHAPER_A_PARENT) ||
- net_shaper_fill_handle(msg, &shaper->handle,
+ net_shaper_fill_handle(msg, &cur.handle,
NET_SHAPER_A_HANDLE) ||
- ((shaper->bw_min || shaper->bw_max || shaper->burst) &&
- nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) ||
- (shaper->bw_min &&
- nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) ||
- (shaper->bw_max &&
- nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) ||
- (shaper->burst &&
- nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) ||
- (shaper->priority &&
- nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) ||
- (shaper->weight &&
- nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight)))
+ ((cur.bw_min || cur.bw_max || cur.burst) &&
+ nla_put_u32(msg, NET_SHAPER_A_METRIC, cur.metric)) ||
+ (cur.bw_min &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MIN, cur.bw_min)) ||
+ (cur.bw_max &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MAX, cur.bw_max)) ||
+ (cur.burst &&
+ nla_put_uint(msg, NET_SHAPER_A_BURST, cur.burst)) ||
+ (cur.priority &&
+ nla_put_u32(msg, NET_SHAPER_A_PRIORITY, cur.priority)) ||
+ (cur.weight &&
+ nla_put_u32(msg, NET_SHAPER_A_WEIGHT, cur.weight)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -283,31 +306,24 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle,
parent->id = 0;
}
-/* MARK_0 is already in use due to XA_FLAGS_ALLOC. The VALID mark is set on
- * an entry only after the device-side configuration has completed
- * successfully (see net_shaper_commit()). Lookups and dumps must filter on
- * this mark to avoid exposing tentative entries inserted by
- * net_shaper_pre_insert() while the driver call is still in flight.
- */
-#define NET_SHAPER_VALID XA_MARK_1
-
static struct net_shaper *
net_shaper_lookup(struct net_shaper_binding *binding,
const struct net_shaper_handle *handle)
{
u32 index = net_shaper_handle_to_index(handle);
struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper *cur;
hierarchy = net_shaper_hierarchy_rcu(binding);
- if (!hierarchy || !xa_get_mark(&hierarchy->shapers, index,
- NET_SHAPER_VALID))
+ if (!hierarchy)
return NULL;
- /* Pairs with smp_wmb() in net_shaper_commit(): if the entry is
- * valid, its contents must be visible too.
- */
- smp_rmb();
- return xa_load(&hierarchy->shapers, index);
+ cur = xa_load(&hierarchy->shapers, index);
+ /* Check valid before reading fields */
+ if (!cur || !smp_load_acquire(&cur->valid))
+ return NULL;
+
+ return cur;
}
/* Allocate on demand the per device shaper's hierarchy container.
@@ -421,12 +437,10 @@ static void net_shaper_commit(struct net_shaper_binding *binding,
if (WARN_ON_ONCE(!cur))
continue;
- /* Successful update: drop the tentative mark
- * and update the hierarchy container.
- */
- *cur = shapers[i];
- smp_wmb();
- __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID);
+ /* Successful update: update the hierarchy container... */
+ net_shaper_copy(cur, &shapers[i]);
+ /* ... publish to lockless readers. */
+ smp_store_release(&cur->valid, true);
}
xa_unlock(&hierarchy->shapers);
}
@@ -443,10 +457,10 @@ static void net_shaper_rollback(struct net_shaper_binding *binding)
xa_lock(&hierarchy->shapers);
xa_for_each(&hierarchy->shapers, index, cur) {
- if (xa_get_mark(&hierarchy->shapers, index, NET_SHAPER_VALID))
+ if (cur->valid)
continue;
__xa_erase(&hierarchy->shapers, index);
- kfree(cur);
+ kfree_rcu(cur, rcu);
}
xa_unlock(&hierarchy->shapers);
}
@@ -859,12 +873,12 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb,
goto out_unlock;
for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index,
- U32_MAX, NET_SHAPER_VALID));
+ U32_MAX, XA_PRESENT));
ctx->start_index++) {
- /* Pairs with smp_wmb() in net_shaper_commit(): the entry
- * is marked VALID, so its contents must be visible too.
- */
- smp_rmb();
+ /* Check valid before reading fields */
+ if (!smp_load_acquire(&shaper->valid))
+ continue;
+
ret = net_shaper_fill_one(skb, binding, shaper, info);
if (ret)
break;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 3bfdaf5e64f5..964ebc268ee4 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1366,9 +1366,14 @@ unlock:
mutex_unlock(&tls_ctx->tx_lock);
}
+/* When has_copied is true the caller has already moved bytes to
+ * userspace. Report sk_err but leave it set so the next read
+ * surfaces it instead of a spurious EOF, otherwise sk_err is
+ * consumed via sock_error().
+ */
static int
tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
- bool released)
+ bool released, bool has_copied)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1386,8 +1391,11 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
if (!sk_psock_queue_empty(psock))
return 0;
- if (sk->sk_err)
+ if (sk->sk_err) {
+ if (has_copied)
+ return -READ_ONCE(sk->sk_err);
return sock_error(sk);
+ }
if (ret < 0)
return ret;
@@ -1423,7 +1431,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
}
if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
- return tls_rx_rec_wait(sk, psock, nonblock, false);
+ return tls_rx_rec_wait(sk, psock, nonblock, false, has_copied);
return 1;
}
@@ -2110,7 +2118,7 @@ int tls_sw_recvmsg(struct sock *sk,
int to_decrypt, chunk;
err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT,
- released);
+ released, !!(decrypted + copied));
if (err <= 0) {
if (psock) {
chunk = sk_msg_recvmsg(sk, psock, msg, len,
@@ -2297,7 +2305,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct tls_decrypt_arg darg;
err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK,
- true);
+ true, false);
if (err <= 0)
goto splice_read_end;
@@ -2383,7 +2391,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
} else {
struct tls_decrypt_arg darg;
- err = tls_rx_rec_wait(sk, NULL, true, released);
+ err = tls_rx_rec_wait(sk, NULL, true, released, !!copied);
if (err <= 0)
goto read_sock_end;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1cbf36ea043b..dc71ed79be4a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2711,8 +2711,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last, unsigned int last_len,
- bool freezable)
+ struct sk_buff *last, bool freezable)
{
unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
struct sk_buff *tail;
@@ -2725,7 +2724,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail != last ||
- (tail && tail->len != last_len) ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -2921,7 +2919,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
int flags = state->flags;
bool check_creds = false;
struct scm_cookie scm;
- unsigned int last_len;
struct unix_sock *u;
int copied = 0;
int err = 0;
@@ -2967,7 +2964,6 @@ redo:
goto unlock;
}
last = skb = skb_peek(&sk->sk_receive_queue);
- last_len = last ? last->len : 0;
again:
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
@@ -3001,8 +2997,7 @@ again:
mutex_unlock(&u->iolock);
- timeo = unix_stream_data_wait(sk, timeo, last,
- last_len, freezable);
+ timeo = unix_stream_data_wait(sk, timeo, last, freezable);
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
@@ -3019,7 +3014,6 @@ unlock:
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
last = skb;
- last_len = skb->len;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
goto again;
@@ -3094,7 +3088,6 @@ unlock:
skip = 0;
last = skb;
- last_len = skb->len;
unix_state_lock(sk);
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 989cc252d3d3..df3b418e0392 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -70,34 +70,6 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
return true;
}
-static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
- struct sk_buff *skb,
- struct msghdr *msg,
- size_t pkt_len,
- bool zerocopy)
-{
- struct ubuf_info *uarg;
-
- if (msg->msg_ubuf) {
- uarg = msg->msg_ubuf;
- net_zcopy_get(uarg);
- } else {
- struct ubuf_info_msgzc *uarg_zc;
-
- uarg = msg_zerocopy_realloc(sk_vsock(vsk),
- pkt_len, NULL, false);
- if (!uarg)
- return -1;
-
- uarg_zc = uarg_to_msgzc(uarg);
- uarg_zc->zerocopy = zerocopy ? 1 : 0;
- }
-
- skb_zcopy_init(skb, uarg);
-
- return 0;
-}
-
static int virtio_transport_fill_skb(struct sk_buff *skb,
struct virtio_vsock_pkt_info *info,
size_t len,
@@ -317,8 +289,10 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
u32 src_cid, src_port, dst_cid, dst_port;
const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
+ struct ubuf_info *uarg = NULL;
u32 pkt_len = info->pkt_len;
bool can_zcopy = false;
+ bool have_uref = false;
u32 rest_len;
int ret;
@@ -360,6 +334,25 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (can_zcopy)
max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
(MAX_SKB_FRAGS * PAGE_SIZE));
+
+ if (info->msg->msg_flags & MSG_ZEROCOPY &&
+ info->op == VIRTIO_VSOCK_OP_RW) {
+ uarg = info->msg->msg_ubuf;
+
+ if (!uarg) {
+ uarg = msg_zerocopy_realloc(sk_vsock(vsk),
+ pkt_len, NULL, false);
+ if (!uarg) {
+ virtio_transport_put_credit(vvs, pkt_len);
+ return -ENOMEM;
+ }
+
+ if (!can_zcopy)
+ uarg_to_msgzc(uarg)->zerocopy = 0;
+
+ have_uref = true;
+ }
+ }
}
rest_len = pkt_len;
@@ -378,27 +371,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
break;
}
- /* We process buffer part by part, allocating skb on
- * each iteration. If this is last skb for this buffer
- * and MSG_ZEROCOPY mode is in use - we must allocate
- * completion for the current syscall.
- *
- * Pass pkt_len because msg iter is already consumed
- * by virtio_transport_fill_skb(), so iter->count
- * can not be used for RLIMIT_MEMLOCK pinned-pages
- * accounting done by msg_zerocopy_realloc().
- */
- if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
- skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) {
- if (virtio_transport_init_zcopy_skb(vsk, skb,
- info->msg,
- pkt_len,
- can_zcopy)) {
- kfree_skb(skb);
- ret = -ENOMEM;
- break;
- }
- }
+ skb_zcopy_set(skb, uarg, NULL);
virtio_transport_inc_tx_pkt(vvs, skb);
@@ -422,6 +395,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
virtio_transport_put_credit(vvs, rest_len);
+ /* msg_zerocopy_realloc() initializes the ubuf_info refcnt to 1.
+ * skb_zcopy_set() increases it for each skb, so we can drop that
+ * initial reference to keep it balanced.
+ */
+ if (have_uref) {
+ if (rest_len == pkt_len)
+ /* No data sent, abort the notification. */
+ net_zcopy_put_abort(uarg, true);
+ else
+ net_zcopy_put(uarg);
+ }
+
/* Return number of bytes, if any data has been sent. */
if (rest_len != pkt_len)
ret = pkt_len - rest_len;
@@ -434,7 +419,14 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
{
u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0);
- if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc)
+ /* Allow at most buf_alloc * 2 total budget (payload + overhead),
+ * similar to how SO_RCVBUF is doubled to reserve space for sk_buff
+ * metadata. Check payload against buf_alloc to be sure the other
+ * peer is respecting the credit, and sk_buff overhead to bound
+ * queue growth.
+ */
+ if ((u64)vvs->buf_used + len > vvs->buf_alloc ||
+ skb_overhead > vvs->buf_alloc)
return false;
vvs->rx_bytes += len;
@@ -1350,7 +1342,7 @@ destroy:
return err;
}
-static void
+static bool
virtio_transport_recv_enqueue(struct vsock_sock *vsk,
struct sk_buff *skb)
{
@@ -1365,10 +1357,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
spin_lock_bh(&vvs->rx_lock);
can_enqueue = virtio_transport_inc_rx_pkt(vvs, len);
- if (!can_enqueue) {
- free_pkt = true;
+ if (!can_enqueue)
goto out;
- }
if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count++;
@@ -1408,6 +1398,8 @@ out:
spin_unlock_bh(&vvs->rx_lock);
if (free_pkt)
kfree_skb(skb);
+
+ return can_enqueue;
}
static int
@@ -1420,7 +1412,17 @@ virtio_transport_recv_connected(struct sock *sk,
switch (le16_to_cpu(hdr->op)) {
case VIRTIO_VSOCK_OP_RW:
- virtio_transport_recv_enqueue(vsk, skb);
+ if (!virtio_transport_recv_enqueue(vsk, skb)) {
+ /* There is no more space to queue the packet, so let's
+ * close the connection; otherwise, we'll lose data.
+ */
+ (void)virtio_transport_reset(vsk, skb);
+ virtio_transport_do_close(vsk, true);
+ sk->sk_err = ENOBUFS;
+ sk_error_report(sk);
+ vsock_remove_sock(vsk);
+ break;
+ }
vsock_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 4296ca1183f1..d2579380f51e 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1164,7 +1164,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
/* Close and cleanup the connection. */
vmci_transport_send_reset(pending, pkt);
skerr = EPROTO;
- err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
+ err = -EINVAL;
goto destroy;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 328af43ef832..358cbc9e43d8 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2462,6 +2462,9 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
memcpy(merged_ie + copied_len, next_sub->data,
next_sub->datalen);
copied_len += next_sub->datalen;
+
+ mbssid_elem = next_mbssid;
+ sub_elem = next_sub;
}
return copied_len;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 22d9d9bae8f5..63d145b524c9 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -789,6 +789,8 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq);
if (!chandef.chan)
return -EINVAL;
+ if (!cfg80211_chandef_valid(&chandef))
+ return -EINVAL;
return cfg80211_set_monitor_channel(rdev, dev, &chandef);
case NL80211_IFTYPE_MESH_POINT:
freq = cfg80211_wext_freq(wextfreq);
diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py
index d78908f6664d..dffadccbb01d 100644
--- a/scripts/gdb/linux/mm.py
+++ b/scripts/gdb/linux/mm.py
@@ -40,11 +40,11 @@ class x86_page_ops():
self.PAGE_OFFSET = int(gdb.parse_and_eval("page_offset_base"))
self.VMEMMAP_START = int(gdb.parse_and_eval("vmemmap_base"))
- self.PHYS_BASE = int(gdb.parse_and_eval("phys_base"))
+ self.PHYS_BASE = int(gdb.parse_and_eval("(unsigned long) phys_base"))
self.START_KERNEL_map = 0xffffffff80000000
- self.KERNEL_START = gdb.parse_and_eval("_text")
- self.KERNEL_END = gdb.parse_and_eval("_end")
+ self.KERNEL_START = gdb.parse_and_eval("(unsigned long) &_text")
+ self.KERNEL_END = gdb.parse_and_eval("(unsigned long) &_end")
self.VMALLOC_START = int(gdb.parse_and_eval("vmalloc_base"))
if self.VMALLOC_START == 0xffffc90000000000:
diff --git a/scripts/gdb/linux/slab.py b/scripts/gdb/linux/slab.py
index 0e2d93867fe2..ddde25aeca8d 100644
--- a/scripts/gdb/linux/slab.py
+++ b/scripts/gdb/linux/slab.py
@@ -196,7 +196,7 @@ def slabtrace(alloc, cache_name):
if target_cache['flags'] & SLAB_STORE_USER:
for i in range(0, nr_node_ids):
- cache_node = target_cache['node'][i]
+ cache_node = target_cache['per_node']['node'][i]
if cache_node['nr_slabs']['counter'] == 0:
continue
process_slab(loc_track, cache_node['partial'], alloc, target_cache)
@@ -300,7 +300,7 @@ def slabinfo():
nr_free = 0
nr_slabs = 0
for i in range(0, nr_node_ids):
- cache_node = cache['node'][i]
+ cache_node = cache['per_node']['node'][i]
try:
nr_slabs += cache_node['nr_slabs']['counter']
nr_objs = int(cache_node['total_objects']['counter'])
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 4e99393a35f1..2ad87a74bb03 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -651,7 +651,26 @@ static void do_vio_entry(struct module *mod, void *symval)
module_alias_printf(mod, true, "%s", alias);
}
-static void do_input(char *alias,
+static void __attribute__((format(printf, 3, 4)))
+alias_append(char *alias, size_t size, const char *fmt, ...)
+{
+ size_t len = strlen(alias);
+ va_list args;
+ int n;
+
+ if (len >= size)
+ fatal("alias buffer (%zu) overflow before append\n", size);
+
+ va_start(args, fmt);
+ n = vsnprintf(alias + len, size - len, fmt, args);
+ va_end(args);
+
+ if (n < 0 || (size_t)n >= size - len)
+ fatal("alias buffer (%zu) overflow on append (need %d, have %zu)\n",
+ size, n, size - len);
+}
+
+static void do_input(char *alias, size_t size,
kernel_ulong_t *arr, unsigned int min, unsigned int max)
{
unsigned int i;
@@ -659,13 +678,14 @@ static void do_input(char *alias,
for (i = min; i <= max; i++)
if (get_unaligned_native(arr + i / BITS_PER_LONG) &
(1ULL << (i % BITS_PER_LONG)))
- sprintf(alias + strlen(alias), "%X,*", i);
+ alias_append(alias, size, "%X,*", i);
}
/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */
static void do_input_entry(struct module *mod, void *symval)
{
char alias[256] = {};
+ const size_t sizeof_alias = sizeof(alias);
DEF_FIELD(symval, input_device_id, flags);
DEF_FIELD(symval, input_device_id, bustype);
@@ -687,35 +707,35 @@ static void do_input_entry(struct module *mod, void *symval)
ADD(alias, "p", flags & INPUT_DEVICE_ID_MATCH_PRODUCT, product);
ADD(alias, "e", flags & INPUT_DEVICE_ID_MATCH_VERSION, version);
- sprintf(alias + strlen(alias), "-e*");
+ alias_append(alias, sizeof_alias, "-e*");
if (flags & INPUT_DEVICE_ID_MATCH_EVBIT)
- do_input(alias, *evbit, 0, INPUT_DEVICE_ID_EV_MAX);
- sprintf(alias + strlen(alias), "k*");
+ do_input(alias, sizeof_alias, *evbit, 0, INPUT_DEVICE_ID_EV_MAX);
+ alias_append(alias, sizeof_alias, "k*");
if (flags & INPUT_DEVICE_ID_MATCH_KEYBIT)
- do_input(alias, *keybit,
+ do_input(alias, sizeof_alias, *keybit,
INPUT_DEVICE_ID_KEY_MIN_INTERESTING,
INPUT_DEVICE_ID_KEY_MAX);
- sprintf(alias + strlen(alias), "r*");
+ alias_append(alias, sizeof_alias, "r*");
if (flags & INPUT_DEVICE_ID_MATCH_RELBIT)
- do_input(alias, *relbit, 0, INPUT_DEVICE_ID_REL_MAX);
- sprintf(alias + strlen(alias), "a*");
+ do_input(alias, sizeof_alias, *relbit, 0, INPUT_DEVICE_ID_REL_MAX);
+ alias_append(alias, sizeof_alias, "a*");
if (flags & INPUT_DEVICE_ID_MATCH_ABSBIT)
- do_input(alias, *absbit, 0, INPUT_DEVICE_ID_ABS_MAX);
- sprintf(alias + strlen(alias), "m*");
+ do_input(alias, sizeof_alias, *absbit, 0, INPUT_DEVICE_ID_ABS_MAX);
+ alias_append(alias, sizeof_alias, "m*");
if (flags & INPUT_DEVICE_ID_MATCH_MSCIT)
- do_input(alias, *mscbit, 0, INPUT_DEVICE_ID_MSC_MAX);
- sprintf(alias + strlen(alias), "l*");
+ do_input(alias, sizeof_alias, *mscbit, 0, INPUT_DEVICE_ID_MSC_MAX);
+ alias_append(alias, sizeof_alias, "l*");
if (flags & INPUT_DEVICE_ID_MATCH_LEDBIT)
- do_input(alias, *ledbit, 0, INPUT_DEVICE_ID_LED_MAX);
- sprintf(alias + strlen(alias), "s*");
+ do_input(alias, sizeof_alias, *ledbit, 0, INPUT_DEVICE_ID_LED_MAX);
+ alias_append(alias, sizeof_alias, "s*");
if (flags & INPUT_DEVICE_ID_MATCH_SNDBIT)
- do_input(alias, *sndbit, 0, INPUT_DEVICE_ID_SND_MAX);
- sprintf(alias + strlen(alias), "f*");
+ do_input(alias, sizeof_alias, *sndbit, 0, INPUT_DEVICE_ID_SND_MAX);
+ alias_append(alias, sizeof_alias, "f*");
if (flags & INPUT_DEVICE_ID_MATCH_FFBIT)
- do_input(alias, *ffbit, 0, INPUT_DEVICE_ID_FF_MAX);
- sprintf(alias + strlen(alias), "w*");
+ do_input(alias, sizeof_alias, *ffbit, 0, INPUT_DEVICE_ID_FF_MAX);
+ alias_append(alias, sizeof_alias, "w*");
if (flags & INPUT_DEVICE_ID_MATCH_SWBIT)
- do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX);
+ do_input(alias, sizeof_alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX);
module_alias_printf(mod, false, "input:%s", alias);
}
@@ -895,12 +915,16 @@ static const struct dmifield {
{ NULL, DMI_NONE }
};
-static void dmi_ascii_filter(char *d, const char *s)
+static void dmi_ascii_filter(char *d, size_t avail, const char *s)
{
/* Filter out characters we don't want to see in the modalias string */
for (; *s; s++)
- if (*s > ' ' && *s < 127 && *s != ':')
+ if (*s > ' ' && *s < 127 && *s != ':') {
+ if (avail <= 1)
+ fatal("%s: alias buffer overflow\n", __func__);
*(d++) = *s;
+ avail--;
+ }
*d = 0;
}
@@ -909,6 +933,8 @@ static void dmi_ascii_filter(char *d, const char *s)
static void do_dmi_entry(struct module *mod, void *symval)
{
char alias[256] = {};
+ const size_t sizeof_alias = sizeof(alias);
+ size_t len;
int i, j;
DEF_FIELD_ADDR(symval, dmi_system_id, matches);
@@ -916,11 +942,12 @@ static void do_dmi_entry(struct module *mod, void *symval)
for (j = 0; j < 4; j++) {
if ((*matches)[j].slot &&
(*matches)[j].slot == dmi_fields[i].field) {
- sprintf(alias + strlen(alias), ":%s*",
- dmi_fields[i].prefix);
- dmi_ascii_filter(alias + strlen(alias),
+ alias_append(alias, sizeof_alias, ":%s*",
+ dmi_fields[i].prefix);
+ len = strlen(alias);
+ dmi_ascii_filter(alias + len, sizeof_alias - len,
(*matches)[j].substr);
- strcat(alias, "*");
+ alias_append(alias, sizeof_alias, "*");
}
}
}
diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD
index 452374d63c24..1213c8e04671 100644
--- a/scripts/package/PKGBUILD
+++ b/scripts/package/PKGBUILD
@@ -10,7 +10,7 @@ for pkg in $_extrapackages; do
pkgname+=("${pkgbase}-${pkg}")
done
-pkgver="${KERNELRELEASE//-/_}"
+pkgver="$(echo "${KERNELRELEASE}" | sed 's/-\(rc[0-9]\+\)/\1/;s/-/_/g')"
# The PKGBUILD is evaluated multiple times.
# Running scripts/build-version from here would introduce inconsistencies.
pkgrel="${KBUILD_REVISION}"
diff --git a/security/lsm_syscalls.c b/security/lsm_syscalls.c
index 5648b1f0ce9c..08a017669c02 100644
--- a/security/lsm_syscalls.c
+++ b/security/lsm_syscalls.c
@@ -57,7 +57,14 @@ u64 lsm_name_to_attr(const char *name)
SYSCALL_DEFINE4(lsm_set_self_attr, unsigned int, attr, struct lsm_ctx __user *,
ctx, u32, size, u32, flags)
{
- return security_setselfattr(attr, ctx, size, flags);
+ int rc;
+
+ rc = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
+ if (rc < 0)
+ return rc;
+ rc = security_setselfattr(attr, ctx, size, flags);
+ mutex_unlock(&current->signal->cred_guard_mutex);
+ return rc;
}
/**
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index f63c6f828735..010aac0c6c67 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -42,6 +42,7 @@ class Netlink:
SOL_NETLINK = 270
NETLINK_ADD_MEMBERSHIP = 1
+ NETLINK_LISTEN_ALL_NSID = 8
NETLINK_CAP_ACK = 10
NETLINK_EXT_ACK = 11
NETLINK_GET_STRICT_CHK = 12
@@ -680,6 +681,7 @@ class YnlFamily(SpecFamily):
Notification API:
ynl.ntf_subscribe(mcast_name) -- join a multicast group
+ ynl.ntf_listen_all_nsid() -- listen on all netns
ynl.check_ntf() -- drain pending notifications
ynl.poll_ntf(duration=None) -- yield notifications
@@ -748,6 +750,23 @@ class YnlFamily(SpecFamily):
self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP,
mcast_id)
+ def ntf_listen_all_nsid(self):
+ """Enable NETLINK_LISTEN_ALL_NSID to receive notifications from all
+ namespaces that have an nsid mapped in the current one."""
+ self.sock.setsockopt(Netlink.SOL_NETLINK,
+ Netlink.NETLINK_LISTEN_ALL_NSID, 1)
+
+ @staticmethod
+ def _decode_nsid(ancdata):
+ for cmsg_level, cmsg_type, cmsg_data in ancdata:
+ if (cmsg_level == Netlink.SOL_NETLINK and
+ cmsg_type == Netlink.NETLINK_LISTEN_ALL_NSID):
+ nsid = struct.unpack('i', cmsg_data)[0]
+ if nsid >= 0:
+ return nsid
+ return None
+ return None
+
def set_recv_dbg(self, enabled):
self._recv_dbg = enabled
@@ -1235,7 +1254,7 @@ class YnlFamily(SpecFamily):
f" when parsing '{attr_spec['name']}'")
return raw
- def handle_ntf(self, decoded):
+ def handle_ntf(self, decoded, nsid=None):
msg = {}
if self.include_raw:
msg['raw'] = decoded
@@ -1246,15 +1265,22 @@ class YnlFamily(SpecFamily):
msg['name'] = op['name']
msg['msg'] = attrs
+ if nsid is not None:
+ msg['nsid'] = nsid
self.async_msg_queue.put(msg)
+ def _recvmsg(self, flags=0):
+ reply, ancdata, _, _ = self.sock.recvmsg(self._recv_size, 4096, flags)
+ return reply, ancdata
+
def check_ntf(self):
while True:
try:
- reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT)
+ reply, ancdata = self._recvmsg(socket.MSG_DONTWAIT)
except BlockingIOError:
return
+ nsid = self._decode_nsid(ancdata)
nms = NlMsgs(reply)
self._recv_dbg_print(reply, nms)
for nl_msg in nms:
@@ -1271,7 +1297,7 @@ class YnlFamily(SpecFamily):
print("Unexpected msg id while checking for ntf", decoded)
continue
- self.handle_ntf(decoded)
+ self.handle_ntf(decoded, nsid)
def poll_ntf(self, duration=None):
start_time = time.time()
@@ -1335,7 +1361,8 @@ class YnlFamily(SpecFamily):
rsp = []
op_rsp = []
while not done:
- reply = self.sock.recv(self._recv_size)
+ reply, ancdata = self._recvmsg()
+ nsid = self._decode_nsid(ancdata)
nms = NlMsgs(reply)
self._recv_dbg_print(reply, nms)
for nl_msg in nms:
@@ -1374,7 +1401,7 @@ class YnlFamily(SpecFamily):
# Check if this is a reply to our request
if nl_msg.nl_seq not in reqs_by_seq or decoded.cmd() != op.rsp_value:
if decoded.cmd() in self.async_msg_ids:
- self.handle_ntf(decoded)
+ self.handle_ntf(decoded, nsid)
continue
print('Unexpected message: ' + repr(decoded))
continue
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 816d5d84816b..5b713837eede 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -111,6 +111,9 @@ build-test:
build-test-tarball:
@$(MAKE) -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory out
+check-headers:
+ @./check-headers.sh
+
#
# All other targets get passed through:
#
@@ -118,4 +121,4 @@ build-test-tarball:
$(print_msg)
$(make)
-.PHONY: tags TAGS FORCE Makefile
+.PHONY: tags TAGS FORCE Makefile build-test build-test-tarball check-headers
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index cee19c923c06..76b35ac19acb 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -285,7 +285,6 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS))
$(goals) all: sub-make
sub-make: fixdep
- @./check-headers.sh
$(Q)$(MAKE) FIXDEP_BUILT=1 -f Makefile.perf $(goals)
else # force_fixdep
@@ -565,6 +564,12 @@ fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh
$(fsmount_arrays): $(beauty_uapi_linux_dir)/mount.h $(fsmount_tbls)
$(Q)$(SHELL) '$(fsmount_tbls)' $(beauty_uapi_linux_dir) > $@
+fsmount_attr_arrays := $(beauty_outdir)/fsmount_attr_arrays.c
+fsmount_attr_tbls := $(srctree)/tools/perf/trace/beauty/fsmount_attr.sh
+
+$(fsmount_attr_arrays): $(beauty_uapi_linux_dir)/mount.h $(fsmount_attr_tbls)
+ $(Q)$(SHELL) '$(fsmount_attr_tbls)' $(beauty_uapi_linux_dir) > $@
+
fspick_arrays := $(beauty_outdir)/fspick_arrays.c
fspick_tbls := $(srctree)/tools/perf/trace/beauty/fspick.sh
@@ -855,6 +860,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE archheaders \
$(fadvise_advice_array) \
$(fsconfig_arrays) \
$(fsmount_arrays) \
+ $(fsmount_attr_arrays) \
$(fspick_arrays) \
$(pkey_alloc_access_rights_array) \
$(sndrv_pcm_ioctl_array) \
@@ -1302,6 +1308,7 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(
$(OUTPUT)$(fadvise_advice_array) \
$(OUTPUT)$(fsconfig_arrays) \
$(OUTPUT)$(fsmount_arrays) \
+ $(OUTPUT)$(fsmount_attr_arrays) \
$(OUTPUT)$(fspick_arrays) \
$(OUTPUT)$(madvise_behavior_array) \
$(OUTPUT)$(mmap_flags_array) \
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e58c49d047a2..48615ddccd93 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -771,11 +771,6 @@ static const char *bpf_cmd[] = {
};
static DEFINE_STRARRAY(bpf_cmd, "BPF_");
-static const char *fsmount_flags[] = {
- [1] = "CLOEXEC",
-};
-static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
-
#include "trace/beauty/generated/fsconfig_arrays.c"
static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
@@ -1202,7 +1197,9 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "fsconfig",
.arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
{ .name = "fsmount",
- .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
+ .arg = { [1] = { .scnprintf = SCA_FSMOUNT_FLAGS, /* fsmount_flags */
+ .strtoul = STUL_STRARRAYS,
+ .show_zero = true, },
[2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
{ .name = "fspick",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 0a07ad158f87..a90c35fa5c12 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -179,6 +179,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar
size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_FLOCK syscall_arg__scnprintf_flock
+size_t syscall_arg__scnprintf_fsmount_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FSMOUNT_FLAGS syscall_arg__scnprintf_fsmount_flags
+
size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_FSMOUNT_ATTR_FLAGS syscall_arg__scnprintf_fsmount_attr_flags
diff --git a/tools/perf/trace/beauty/clone.sh b/tools/perf/trace/beauty/clone.sh
index 18b6c0d75693..98cb1f8d4a6f 100755
--- a/tools/perf/trace/beauty/clone.sh
+++ b/tools/perf/trace/beauty/clone.sh
@@ -14,4 +14,8 @@ regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+CLONE_([^_]+[[:alnum:]_]+)[[:
grep -E $regex ${linux_sched} | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+CLONE_([^_]+[[:alnum:]_]+)[[:space:]]+\(1ULL[[:space:]]*<<[[:space:]]*([[:digit:]]+)\)[[:space:]]*.*'
+grep -E $regex ${linux_sched} | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[%s + 1] = \"%s\",\n"
printf "};\n"
diff --git a/tools/perf/trace/beauty/fsmount.c b/tools/perf/trace/beauty/fsmount.c
index 28c2c16fc1a8..179e649fc72a 100644
--- a/tools/perf/trace/beauty/fsmount.c
+++ b/tools/perf/trace/beauty/fsmount.c
@@ -16,9 +16,25 @@
#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
#endif
-static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+
+static size_t fsmount__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
{
#include "trace/beauty/generated/fsmount_arrays.c"
+ static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
+
+ return strarray__scnprintf_flags(&strarray__fsmount_flags, bf, size, show_prefix, flags);
+}
+
+size_t syscall_arg__scnprintf_fsmount_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+
+ return fsmount__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
+}
+
+static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+#include "trace/beauty/generated/fsmount_attr_arrays.c"
static DEFINE_STRARRAY(fsmount_attr_flags, "MOUNT_ATTR_");
size_t printed = 0;
diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh
index 6b67a54cdeee..6d1e80bc15e4 100755
--- a/tools/perf/trace/beauty/fsmount.sh
+++ b/tools/perf/trace/beauty/fsmount.sh
@@ -9,14 +9,9 @@ fi
linux_mount=${beauty_uapi_linux_dir}/mount.h
-# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier
-# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case
-# for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling
-# bits. Special case it as well in the beautifier
-
-printf "static const char *fsmount_attr_flags[] = {\n"
-regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
-grep -E $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \
+printf "static const char *fsmount_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSMOUNT_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+grep -E $regex ${linux_mount} | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
printf "};\n"
diff --git a/tools/perf/trace/beauty/fsmount_attr.sh b/tools/perf/trace/beauty/fsmount_attr.sh
new file mode 100644
index 000000000000..6b67a54cdeee
--- /dev/null
+++ b/tools/perf/trace/beauty/fsmount_attr.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
+else
+ beauty_uapi_linux_dir=$1
+fi
+
+linux_mount=${beauty_uapi_linux_dir}/mount.h
+
+# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier
+# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case
+# for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling
+# bits. Special case it as well in the beautifier
+
+printf "static const char *fsmount_attr_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+grep -E $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index ec715ad4bf25..ec4a0a025793 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -415,7 +415,7 @@ struct __kernel_timespec;
struct old_timespec32;
struct scm_timestamping_internal {
- struct timespec64 ts[3];
+ ktime_t ts[3];
};
extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss);
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 70b2b661f42c..13f71202845e 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -657,4 +657,16 @@ struct procmap_query {
__u64 build_id_addr; /* in */
};
+/*
+ * Shutdown the filesystem.
+ */
+#define FS_IOC_SHUTDOWN _IOR('X', 125, __u32)
+
+/*
+ * Flags for FS_IOC_SHUTDOWN
+ */
+#define FS_SHUTDOWN_FLAGS_DEFAULT 0x0
+#define FS_SHUTDOWN_FLAGS_LOGFLUSH 0x1 /* flush log but not data*/
+#define FS_SHUTDOWN_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+
#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h
index d9d86598d100..2204708dbf7a 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/mount.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
@@ -110,6 +110,7 @@ enum fsconfig_command {
* fsmount() flags.
*/
#define FSMOUNT_CLOEXEC 0x00000001
+#define FSMOUNT_NAMESPACE 0x00000002 /* Create the mount in a new mount namespace */
/*
* Mount attributes.
diff --git a/tools/perf/trace/beauty/include/uapi/linux/sched.h b/tools/perf/trace/beauty/include/uapi/linux/sched.h
index 359a14cc76a4..33a4624285cd 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/sched.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/sched.h
@@ -34,8 +34,12 @@
#define CLONE_IO 0x80000000 /* Clone io context */
/* Flags for the clone3() syscall. */
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#define CLONE_CLEAR_SIGHAND (1ULL << 32) /* Clear any signal handler and reset to SIG_DFL. */
+#define CLONE_INTO_CGROUP (1ULL << 33) /* Clone into a specific cgroup given the right permissions. */
+#define CLONE_AUTOREAP (1ULL << 34) /* Auto-reap child on exit. */
+#define CLONE_NNP (1ULL << 35) /* Set no_new_privs on child. */
+#define CLONE_PIDFD_AUTOKILL (1ULL << 36) /* Kill child when clone pidfd closes. */
+#define CLONE_EMPTY_MNTNS (1ULL << 37) /* Create an empty mount namespace. */
/*
* cloning flags intersect with CSIGNAL so can be used with unshare and clone3
@@ -43,6 +47,12 @@
*/
#define CLONE_NEWTIME 0x00000080 /* New time namespace */
+/*
+ * unshare flags share the bit space with clone flags but only apply to the
+ * unshare syscall:
+ */
+#define UNSHARE_EMPTY_MNTNS 0x00100000 /* Unshare an empty mount namespace. */
+
#ifndef __ASSEMBLY__
/**
* struct clone_args - arguments for the clone3 syscall
@@ -146,4 +156,7 @@ struct clone_args {
SCHED_FLAG_KEEP_ALL | \
SCHED_FLAG_UTIL_CLAMP)
+/* Only for sched_getattr() own flag param, if task is SCHED_DEADLINE */
+#define SCHED_GETATTR_FLAG_DL_DYNAMIC 0x01
+
#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index b87e7f39e15a..6ed8e149e3d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -417,6 +417,107 @@ static void run_tests(int family, enum bpf_map_type map_type)
close(map);
}
+/*
+ * Regression test for the KTLS + sockmap (verdict) reverse-order UAF.
+ *
+ * Vulnerable sequence:
+ * 1. Insert receiver socket into sockmap with BPF_SK_SKB_VERDICT program.
+ * sk->sk_data_ready becomes sk_psock_verdict_data_ready.
+ * 2. Configure TLS RX: tls_sw_strparser_arm() saves
+ * sk_psock_verdict_data_ready as rx_ctx->saved_data_ready.
+ *
+ * When data arrives, tls_rx_msg_ready() calls saved_data_ready() =
+ * sk_psock_verdict_data_ready(), which calls tcp_read_skb() and drains
+ * sk_receive_queue via __skb_unlink() without advancing copied_seq.
+ * tls_strp_msg_load() then finds the queue empty while tcp_inq() is still
+ * non-zero, hits WARN_ON_ONCE(!first), and leaves a dangling frag_list
+ * pointer that tls_decrypt_sg() walks — a use-after-free.
+ *
+ * The fix adds a tls_sw_has_ctx_rx() check to sk_psock_verdict_data_ready(),
+ * mirroring what sk_psock_strp_data_ready() already does: when a TLS RX
+ * context is present, defer to psock->saved_data_ready (sock_def_readable)
+ * instead of calling tcp_read_skb(), so TLS retains sole ownership of the
+ * receive queue. Data is then decrypted and returned correctly by
+ * tls_sw_recvmsg().
+ */
+static void test_sockmap_ktls_verdict_with_tls_rx(int family, int sotype)
+{
+ struct tls12_crypto_info_aes_gcm_128 crypto_info = {};
+ char send_buf[] = "hello ktls sockmap reverse order";
+ char recv_buf[sizeof(send_buf)] = {};
+ struct test_sockmap_ktls *skel;
+ int c = -1, p = -1, zero = 0;
+ int prog_fd, map_fd;
+ ssize_t n;
+ int err;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open_and_load"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_skb_verdict_pass);
+ map_fd = bpf_map__fd(skel->maps.sock_map_verdict);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_SKB_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk_skb verdict"))
+ goto out;
+
+ /* Step 1: configure TLS TX on sender (no sockmap involvement) */
+ err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP) client"))
+ goto out;
+
+ crypto_info.info.version = TLS_1_2_VERSION;
+ crypto_info.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ memset(crypto_info.key, 0x01, sizeof(crypto_info.key));
+ memset(crypto_info.salt, 0x02, sizeof(crypto_info.salt));
+
+ err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info));
+ if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
+ goto out;
+
+ /* Step 2: insert receiver into sockmap BEFORE TLS RX */
+ err = bpf_map_update_elem(map_fd, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto out;
+
+ /* Step 3: configure TLS RX AFTER sockmap insertion */
+ err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP) server"))
+ goto out;
+
+ err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_info, sizeof(crypto_info));
+ if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
+ goto out;
+
+ /*
+ * A buggy kernel hits WARN_ON_ONCE in tls_strp_load_anchor_with_queue
+ * and may UAF in tls_decrypt_sg here. With the fix,
+ * sk_psock_verdict_data_ready defers to sock_def_readable and TLS
+ * decrypts the record normally.
+ */
+ n = send(c, send_buf, sizeof(send_buf), 0);
+ if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "send"))
+ goto out;
+
+ n = recv_timeout(p, recv_buf, sizeof(recv_buf), 0, 5);
+ if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "recv"))
+ goto out;
+
+ ASSERT_OK(memcmp(send_buf, recv_buf, sizeof(send_buf)), "data integrity");
+
+out:
+ if (c != -1)
+ close(c);
+ if (p != -1)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_ktls_test(int family, int sotype)
{
if (test__start_subtest("tls simple offload"))
@@ -429,6 +530,8 @@ static void run_ktls_test(int family, int sotype)
test_sockmap_ktls_tx_no_buf(family, sotype, true);
if (test__start_subtest("tls tx with pop"))
test_sockmap_ktls_tx_pop(family, sotype);
+ if (test__start_subtest("tls verdict with tls rx"))
+ test_sockmap_ktls_verdict_with_tls_rx(family, sotype);
}
void test_sockmap_ktls(void)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
index 83df4919c224..facafeaf4620 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -17,6 +17,13 @@ struct {
__type(value, int);
} sock_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} sock_map_verdict SEC(".maps");
+
SEC("sk_msg")
int prog_sk_policy(struct sk_msg_md *msg)
{
@@ -38,3 +45,17 @@ int prog_sk_policy_redir(struct sk_msg_md *msg)
bpf_msg_apply_bytes(msg, apply_bytes);
return bpf_msg_redirect_map(msg, &sock_map, two, 0);
}
+
+/*
+ * Verdict program for the reverse-order TLS/sockmap regression test.
+ * Returns SK_PASS so tcp_read_skb() drains the receive queue via
+ * sk_psock_verdict_recv() without calling tcp_eat_skb(), which is
+ * the precondition for the KTLS strparser frag_list UAF.
+ */
+SEC("sk_skb/verdict")
+int prog_skb_verdict_pass(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 788689497e92..77fb4c5d871b 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -986,6 +986,56 @@ TEST_F(hmm, migrate)
}
/*
+ * Migrate private file memory to device private memory.
+ */
+TEST_F(hmm, migrate_file_private)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int fd;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ fd = hmm_create_file(size);
+ ASSERT_GE(fd, 0);
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = fd;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
* Migrate anonymous memory to device private memory and fault some of it back
* to system memory, then try migrating the resulting mix of system and device
* private memory to the device.
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index d8468451b3a3..c17b133a81d2 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -103,7 +103,7 @@ RUN_ALL=false
RUN_DESTRUCTIVE=false
TAP_PREFIX="# "
-while getopts "aht:n" OPT; do
+while getopts "aht:nd" OPT; do
case ${OPT} in
"a") RUN_ALL=true ;;
"h") usage ;;
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
index e8031f68200a..ebdb4c790a5d 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
@@ -4,7 +4,7 @@
ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \
vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \
vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \
- vlmc_router_port_test vlmc_filtering_test"
+ vlmc_router_port_test vlmc_filtering_test vlmc_mcast_toggle_test"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
@@ -537,6 +537,34 @@ vlmc_filtering_test()
log_test "Disable multicast vlan snooping when vlan filtering is disabled"
}
+vlmc_mcast_toggle_test()
+{
+ RET=0
+
+ ip link add name br1-mcast up type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1
+ ip link add name dummy1-mcast up master br1-mcast type dummy
+
+ # Enabling per-VLAN multicast snooping should disable the per-port
+ # multicast context on "dummy1-mcast".
+ ip link set dev br1-mcast type bridge mcast_vlan_snooping 1
+
+ # Toggling multicast snooping on the bridge should not affect the
+ # per-port multicast context on "dummy1-mcast" given that per-VLAN
+ # multicast snooping is enabled.
+ ip link set dev br1-mcast type bridge mcast_snooping 0
+ ip link set dev br1-mcast type bridge mcast_snooping 1
+
+ # If both the per-port and per-{port, VLAN} multicast contexts are
+ # enabled on "dummy1-mcast", removing it from the bridge will result
+ # in a splat.
+ ip link set dev dummy1-mcast nomaster
+
+ log_test "Toggling mcast snooping with per-VLAN mcast snooping enabled"
+
+ ip link del dev dummy1-mcast
+ ip link del dev br1-mcast
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
index 64f05229ab24..ded3f896e622 100644
--- a/tools/testing/selftests/net/lib/xdp_native.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -268,6 +268,17 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_PASS;
}
+static __always_inline __u16 csum_fold_helper(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ return ~((csum & 0xffff) + (csum >> 16));
+}
+
+static __always_inline __u16 csum_fold_udp_helper(__u32 csum)
+{
+ return csum_fold_helper(csum) ? : 0xffff;
+}
+
static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
{
void *data_end = (void *)(long)ctx->data_end;
@@ -281,21 +292,22 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = data + sizeof(*eth);
- __u16 total_len;
if (iph + 1 > (struct iphdr *)data_end)
return NULL;
- iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
-
udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
if (!udph || udph + 1 > (struct udphdr *)data_end)
return NULL;
- len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ len = iph->tot_len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+ iph->tot_len = len_new;
+ iph->check = csum_fold_helper(
+ bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), ~((__u32)iph->check)));
} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
struct ipv6hdr *ipv6h = data + sizeof(*eth);
- __u16 payload_len;
if (ipv6h + 1 > (struct ipv6hdr *)data_end)
return NULL;
@@ -304,33 +316,27 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
if (!udph || udph + 1 > (struct udphdr *)data_end)
return NULL;
- *udp_csum = ~((__u32)udph->check);
-
len = ipv6h->payload_len;
len_new = bpf_htons(bpf_ntohs(len) + offset);
ipv6h->payload_len = len_new;
-
- *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
- sizeof(len_new), *udp_csum);
-
- len = udph->len;
- len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
- *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
- sizeof(len_new), *udp_csum);
} else {
return NULL;
}
+ len = udph->len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+
+ *udp_csum = ~((__u32)udph->check);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+
udph->len = len_new;
return udph;
}
-static __u16 csum_fold_helper(__u32 csum)
-{
- return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
-}
-
static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
unsigned long hdr_len)
{
@@ -359,7 +365,7 @@ static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
return -1;
udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
- udph->check = (__u16)csum_fold_helper(udp_csum);
+ udph->check = (__u16)csum_fold_udp_helper(udp_csum);
if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
return -1;
@@ -403,7 +409,7 @@ static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
return -1;
udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
- udph->check = (__u16)csum_fold_helper(udp_csum);
+ udph->check = (__u16)csum_fold_udp_helper(udp_csum);
buff_len = bpf_xdp_get_buff_len(ctx);
@@ -484,8 +490,7 @@ static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
return -1;
udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
-
- udph->check = (__u16)csum_fold_helper(udp_csum);
+ udph->check = (__u16)csum_fold_udp_helper(udp_csum);
if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
return -1;
@@ -542,7 +547,7 @@ static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
return -1;
udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
- udph->check = (__u16)csum_fold_helper(udp_csum);
+ udph->check = (__u16)csum_fold_udp_helper(udp_csum);
if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
return -1;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index a6447f7a31fe..d158678fa6ab 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -401,7 +401,7 @@ do_transfer()
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
local start
- start=$(date +%s%3N)
+ start=$(date +%s%N)
ip netns exec ${connector_ns} \
./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$extra_args $connect_addr < "$cin" > "$cout" &
@@ -423,7 +423,7 @@ do_transfer()
fi
local stop
- stop=$(date +%s%3N)
+ stop=$(date +%s%N)
if $capture; then
sleep 1
@@ -439,7 +439,7 @@ do_transfer()
fi
local duration
- duration=$((stop-start))
+ duration=$(((stop-start) / 1000000))
printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index beec41f6662a..5acd12021e6e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1828,6 +1828,22 @@ chk_add_tx_nr()
fi
}
+chk_add_drop_tx_nr()
+{
+ local drop_tx_nr=$1
+ local count
+
+ print_check "add addr tx drop"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTxDrop")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$drop_tx_nr" ]; then
+ fail_test "got $count ADD_ADDR drop[s] TX, expected $drop_tx_nr"
+ else
+ print_ok
+ fi
+}
+
chk_rm_nr()
{
local rm_addr_nr=$1
@@ -3278,6 +3294,21 @@ add_addr_ports_tests()
chk_mpc_endp_attempt ${retl} 1
fi
+
+ # first signal address drops, second one still progresses
+ if reset "signal addr list progresses after tx drop"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 0
+ ip netns exec $ns1 sysctl -q net.ipv4.tcp_timestamps=1
+ ip netns exec $ns2 sysctl -q net.ipv4.tcp_timestamps=1
+
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 dead:beef:3::1 flags signal
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_add_drop_tx_nr 1
+ chk_add_tx_nr 1 1
+ chk_add_nr 1 1 0
+ fi
}
bind_tests()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 989a5975dcea..5ef6033775c8 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -28,7 +28,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
MPTCP_LIB_SUBTEST_FLAKY=0
-MPTCP_LIB_SUBTESTS_LAST_TS_MS=
+MPTCP_LIB_SUBTESTS_LAST_TS_NS=
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
MPTCP_LIB_IP_MPTCP=0
@@ -236,7 +236,7 @@ mptcp_lib_kversion_ge() {
}
mptcp_lib_subtests_last_ts_reset() {
- MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)"
+ MPTCP_LIB_SUBTESTS_LAST_TS_NS="$(date +%s%N)"
}
mptcp_lib_subtests_last_ts_reset
@@ -255,7 +255,7 @@ __mptcp_lib_result_check_duplicated() {
__mptcp_lib_result_add() {
local result="${1}"
local time="time="
- local ts_prev_ms
+ local ts_prev_ns
shift
local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
@@ -265,9 +265,9 @@ __mptcp_lib_result_add() {
# not to add two '#'
[[ "${*}" != *"#"* ]] && time="# ${time}"
- ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}"
+ ts_prev_ns="${MPTCP_LIB_SUBTESTS_LAST_TS_NS}"
mptcp_lib_subtests_last_ts_reset
- time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms"
+ time+="$(((MPTCP_LIB_SUBTESTS_LAST_TS_NS - ts_prev_ns) / 1000000))ms"
MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}")
}
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
index af1532b4d2da..ec9a51bbf3c9 100755
--- a/tools/testing/selftests/net/ovpn/test-close-socket.sh
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -53,7 +53,7 @@ ovpn_run_ping_traffic() {
for p in $(seq 1 ${OVPN_NUM_PEERS}); do
ovpn_cmd_ok "send ping traffic to peer ${p}" \
- ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \
+ ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \
5.5.5.$((p + 1))
done
}
diff --git a/tools/testing/selftests/net/ovpn/test-mark.sh b/tools/testing/selftests/net/ovpn/test-mark.sh
index 5a8f47554286..7c1d56e9c525 100755
--- a/tools/testing/selftests/net/ovpn/test-mark.sh
+++ b/tools/testing/selftests/net/ovpn/test-mark.sh
@@ -66,7 +66,7 @@ ovpn_mark_run_baseline_traffic() {
for p in $(seq 1 3); do
ovpn_cmd_ok "send baseline traffic to peer ${p}" \
- ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \
+ ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \
5.5.5.$((p + 1))
done
}
@@ -101,7 +101,7 @@ ovpn_mark_verify_drop_traffic() {
local total_count
for p in $(seq 1 3); do
- if ping_output=$(ip netns exec ovpn_peer0 ping -qfc 500 -w 1 \
+ if ping_output=$(ip netns exec ovpn_peer0 ping -qfc 100 -w 1 \
5.5.5.$((p + 1)) 2>&1); then
printf '%s\n' "expected ping to peer ${p} to fail \
after nft drop rule"
@@ -144,7 +144,7 @@ ovpn_mark_verify_traffic_recovery() {
sleep 1
for p in $(seq 1 3); do
ovpn_cmd_ok "send recovery traffic to peer ${p}" \
- ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \
+ ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \
5.5.5.$((p + 1))
done
}
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
index c06e3135fbef..9b5610837032 100755
--- a/tools/testing/selftests/net/ovpn/test.sh
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -110,7 +110,7 @@ ovpn_run_basic_traffic() {
ovpn_run_lan_traffic() {
ovpn_cmd_ok "ping LAN behind peer1" \
- ip netns exec ovpn_peer0 ping -qfc 500 -w 3 "${OVPN_LAN_IP}"
+ ip netns exec ovpn_peer0 ping -qfc 100 -w 3 "${OVPN_LAN_IP}"
}
ovpn_run_float_mode() {
@@ -127,7 +127,7 @@ ovpn_run_float_mode() {
for p in $(seq 1 ${OVPN_NUM_PEERS}); do
peer_ns="ovpn_peer${p}"
ovpn_cmd_ok "ping tunnel after float peer ${p}" \
- ip netns exec "${peer_ns}" ping -qfc 500 -w 3 5.5.5.1
+ ip netns exec "${peer_ns}" ping -qfc 100 -w 3 5.5.5.1
done
}
diff --git a/tools/testing/selftests/net/rds/config b/tools/testing/selftests/net/rds/config
index 97db7ecb892a..3d62d0c750a8 100644
--- a/tools/testing/selftests/net/rds/config
+++ b/tools/testing/selftests/net/rds/config
@@ -1,3 +1,4 @@
+CONFIG_MODULES=n
CONFIG_NET_NS=y
CONFIG_NET_SCH_NETEM=y
CONFIG_RDS=y