From 8e98b87f515d8c4bae521048a037b2cc431c3fd5 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 14:10:49 +0100 Subject: iio: imu: adis: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-adis-improv-v1-1-7f90e9fad200@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index dc9ea299e088..8898966bc0f0 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include #include +#include #include #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; -- cgit v1.2.3 From 59598510be1d49e1cff7fd7593293bb8e1b2398b Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 13:41:03 +0100 Subject: iio: adc: ad_sigma_delta: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: 0fb6ee8d0b5e ("iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-dev_sigma_delta_no_irq_flags-v1-1-db39261592cf@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714..719cf9cc6e1a 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; -- cgit v1.2.3 From 6f6c72acddf4357fcc83593c20ef9064fb42db92 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sat, 27 Jan 2024 21:02:08 +0100 Subject: iio: move LIGHT_UVA and LIGHT_UVB to the end of iio_modifier The new modifiers should have added to the end of the enum, so they do not affect the existing entries. No modifiers were added since then, so they can be moved safely to the end of the list. Move IIO_MOD_LIGHT_UVA and IIO_MOD_LIGHT_UVB to the end of iio_modifier. Fixes: b89710bd215e ("iio: add modifiers for A and B ultraviolet light") Suggested-by: Paul Cercueil Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20240127200208.185815-1-javier.carrasco.cruz@gmail.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 5060963707b1..f2e0b2d50e6b 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -91,8 +91,6 @@ enum iio_modifier { IIO_MOD_CO2, IIO_MOD_VOC, IIO_MOD_LIGHT_UV, - IIO_MOD_LIGHT_UVA, - IIO_MOD_LIGHT_UVB, IIO_MOD_LIGHT_DUV, IIO_MOD_PM1, IIO_MOD_PM2P5, @@ -107,6 +105,8 @@ enum iio_modifier { IIO_MOD_PITCH, IIO_MOD_YAW, IIO_MOD_ROLL, + IIO_MOD_LIGHT_UVA, + IIO_MOD_LIGHT_UVB, }; enum iio_event_type { -- cgit v1.2.3 From 4d5e86a56615cc387d21c629f9af8fb0e958d350 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Jan 2024 11:29:11 +0200 Subject: RDMA/mlx5: Fix fortify source warning while accessing Eth segment ------------[ cut here ]------------ memcpy: detected field-spanning write (size 56) of single field "eseg->inline_hdr.start" at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 (size 2) WARNING: CPU: 0 PID: 293779 at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Modules linked in: 8021q garp mrp stp llc rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) ib_core(OE) mlx5_core(OE) pci_hyperv_intf mlxdevm(OE) mlx_compat(OE) tls mlxfw(OE) psample nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink mst_pciconf(OE) knem(OE) vfio_pci vfio_pci_core vfio_iommu_type1 vfio iommufd irqbypass cuse nfsv3 nfs fscache netfs xfrm_user xfrm_algo ipmi_devintf ipmi_msghandler binfmt_misc crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic ghash_clmulni_intel sha512_ssse3 snd_pcsp aesni_intel crypto_simd cryptd snd_pcm snd_timer joydev snd soundcore input_leds serio_raw evbug nfsd auth_rpcgss nfs_acl lockd grace sch_fq_codel sunrpc drm efi_pstore ip_tables x_tables autofs4 psmouse virtio_net net_failover failover floppy [last unloaded: mlx_compat(OE)] CPU: 0 PID: 293779 Comm: ssh Tainted: G OE 6.2.0-32-generic #32~22.04.1-Ubuntu Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Code: 0c 01 00 a8 01 75 25 48 8b 75 a0 b9 02 00 00 00 48 c7 c2 10 5b fd c0 48 c7 c7 80 5b fd c0 c6 05 57 0c 03 00 01 e8 95 4d 93 da <0f> 0b 44 8b 4d b0 4c 8b 45 c8 48 8b 4d c0 e9 49 fb ff ff 41 0f b7 RSP: 0018:ffffb5b48478b570 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb5b48478b628 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffb5b48478b5e8 R13: ffff963a3c609b5e R14: ffff9639c3fbd800 R15: ffffb5b480475a80 FS: 00007fc03b444c80(0000) GS:ffff963a3dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000556f46bdf000 CR3: 0000000006ac6003 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_regs+0x72/0x90 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? __warn+0x8d/0x160 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? report_bug+0x1bb/0x1d0 ? handle_bug+0x46/0x90 ? exc_invalid_op+0x19/0x80 ? asm_exc_invalid_op+0x1b/0x20 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] mlx5_ib_post_send_nodrain+0xb/0x20 [mlx5_ib] ipoib_send+0x2ec/0x770 [ib_ipoib] ipoib_start_xmit+0x5a0/0x770 [ib_ipoib] dev_hard_start_xmit+0x8e/0x1e0 ? validate_xmit_skb_list+0x4d/0x80 sch_direct_xmit+0x116/0x3a0 __dev_xmit_skb+0x1fd/0x580 __dev_queue_xmit+0x284/0x6b0 ? _raw_spin_unlock_irq+0xe/0x50 ? __flush_work.isra.0+0x20d/0x370 ? push_pseudo_header+0x17/0x40 [ib_ipoib] neigh_connected_output+0xcd/0x110 ip_finish_output2+0x179/0x480 ? __smp_call_single_queue+0x61/0xa0 __ip_finish_output+0xc3/0x190 ip_finish_output+0x2e/0xf0 ip_output+0x78/0x110 ? __pfx_ip_finish_output+0x10/0x10 ip_local_out+0x64/0x70 __ip_queue_xmit+0x18a/0x460 ip_queue_xmit+0x15/0x30 __tcp_transmit_skb+0x914/0x9c0 tcp_write_xmit+0x334/0x8d0 tcp_push_one+0x3c/0x60 tcp_sendmsg_locked+0x2e1/0xac0 tcp_sendmsg+0x2d/0x50 inet_sendmsg+0x43/0x90 sock_sendmsg+0x68/0x80 sock_write_iter+0x93/0x100 vfs_write+0x326/0x3c0 ksys_write+0xbd/0xf0 ? do_syscall_64+0x69/0x90 __x64_sys_write+0x19/0x30 do_syscall_64+0x59/0x90 ? do_user_addr_fault+0x1d0/0x640 ? exit_to_user_mode_prepare+0x3b/0xd0 ? irqentry_exit_to_user_mode+0x9/0x20 ? irqentry_exit+0x43/0x50 ? exc_page_fault+0x92/0x1b0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc03ad14a37 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffdf8697fe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000008024 RCX: 00007fc03ad14a37 RDX: 0000000000008024 RSI: 0000556f46bd8270 RDI: 0000000000000003 RBP: 0000556f46bb1800 R08: 0000000000007fe3 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000556f46bc66b0 R14: 000000000000000a R15: 0000556f46bb2f50 ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/r/8228ad34bd1a25047586270f7b1fb4ddcd046282.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/qp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bd53cf4be7bd..f0e55bf3ec8b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; -- cgit v1.2.3 From 43fdbd140238d44e7e847232719fef7d20f9d326 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 28 Jan 2024 11:29:12 +0200 Subject: IB/mlx5: Don't expose debugfs entries for RRoCE general parameters if not supported debugfs entries for RRoCE general CC parameters must be exposed only when they are supported, otherwise when accessing them there may be a syndrome error in kernel log, for example: $ cat /sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp cat: '/sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp': Invalid argument $ dmesg mlx5_core 0000:08:00.1: mlx5_cmd_out_err:805:(pid 1253): QUERY_CONG_PARAMS(0x824) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x325a82), err(-22) Fixes: 66fb1d5df6ac ("IB/mlx5: Extend debug control for CC parameters") Reviewed-by: Edward Srouji Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/e7ade70bad52b7468bdb1de4d41d5fad70c8b71c.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bf5320b28b8b..2c10350bd422 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1103,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; -- cgit v1.2.3 From 177fbbcb4ed6b306c1626a277fac3fb1c495a4c7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 13:14:13 +0100 Subject: wifi: cfg80211: detect stuck ECSA element in probe resp We recently added some validation that we don't try to connect to an AP that is currently in a channel switch process, since that might want the channel to be quiet or we might not be able to connect in time to hear the switching in a beacon. This was in commit c09c4f31998b ("wifi: mac80211: don't connect to an AP while it's in a CSA process"). However, we promptly got a report that this caused new connection failures, and it turns out that the AP that we now cannot connect to is permanently advertising an extended channel switch announcement, even with quiet. The AP in question was an Asus RT-AC53, with firmware 3.0.0.4.380_10760-g21a5898. As a first step, attempt to detect that we're dealing with such a situation, so mac80211 can use this later. Reported-by: coldolt Closes: https://lore.kernel.org/linux-wireless/CAJvGw+DQhBk_mHXeu6RTOds5iramMW2FbMB01VbKRA4YbHHDTA@mail.gmail.com/ Fixes: c09c4f31998b ("wifi: mac80211: don't connect to an AP while it's in a CSA process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240129131413.246972c8775e.Ibf834d7f52f9951a353b6872383da710a7358338@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cf79656ce09c..2b54fdd8ca15 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2910,6 +2910,8 @@ struct cfg80211_bss_ies { * own the beacon_ies, but they're just pointers to the ones from the * @hidden_beacon_bss struct) * @proberesp_ies: the information elements from the last Probe Response frame + * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame, + * cannot rely on it having valid data * @hidden_beacon_bss: in case this BSS struct represents a probe response from * a BSS that hides the SSID in its beacon, this points to the BSS struct * that holds the beacon data. @beacon_ies is still valid, of course, and @@ -2950,6 +2952,8 @@ struct cfg80211_bss { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; + u8 proberesp_ecsa_stuck:1; + u8 bssid_index; u8 max_bssid_indicator; -- cgit v1.2.3 From 54ce1927eb787f7bbb7ee664841c8f5932703f39 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 31 Jan 2024 15:55:38 -0800 Subject: cxl/cper: Fix errant CPER prints for CXL events Jonathan reports that CXL CPER events dump an extra generic error message. {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 {1}[Hardware Error]: event severity: recoverable {1}[Hardware Error]: Error 0, type: recoverable {1}[Hardware Error]: section type: unknown, fbcd0a77-c260-417f-85a9-088b1621eba6 {1}[Hardware Error]: section length: 0x90 {1}[Hardware Error]: 00000000: 00000090 00000007 00000000 0d938086 ................ {1}[Hardware Error]: 00000010: 00100000 00000000 00040000 00000000 ................ ... CXL events were rerouted though the CXL subsystem for additional processing. However, when that work was done it was missed that cper_estatus_print_section() continued with a generic error message which is confusing. Teach CPER print code to ignore printing details of some section types. Assign the CXL event GUIDs to this set to prevent confusing unknown prints. Reported-by: Jonathan Cameron Suggested-by: Jonathan Cameron Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Signed-off-by: Ard Biesheuvel --- include/linux/cper.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/cper.h b/include/linux/cper.h index c1a7dc325121..265b0f8fc0b3 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -90,6 +90,29 @@ enum { GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + /* * Flags bits definitions for flags in struct cper_record_header * If set, the error has been recovered -- cgit v1.2.3 From 862cf85fef85becc55a173387527adb4f076fab0 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 31 Jan 2024 10:16:47 +0100 Subject: iio: commom: st_sensors: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for st_sensors common buffer. While at it, moved the odr_lock before buffer_data as we definitely don't want any other data to share a cacheline with the buffer. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: e031d5f558f1 ("iio:st_sensors: remove buffer allocation at each buffer enable") Signed-off-by: Nuno Sa Cc: Link: https://lore.kernel.org/r/20240131-dev_dma_safety_stm-v2-1-580c07fae51b@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 607c3a89a647..f9ae5cdd884f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -258,9 +258,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER -- cgit v1.2.3 From 41b7fa157ea1c8c3a575ca7f5f32034de9bee3ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Feb 2024 15:19:16 +0000 Subject: rxrpc: Fix counting of new acks and nacks Fix the counting of new acks and nacks when parsing a packet - something that is used in congestion control. As the code stands, it merely notes if there are any nacks whereas what we really should do is compare the previous SACK table to the new one, assuming we get two successive ACK packets with nacks in them. However, we really don't want to do that if we can avoid it as the tables might not correspond directly as one may be shifted from the other - something that will only get harder to deal with once extended ACK tables come into full use (with a capacity of up to 8192). Instead, count the number of nacks shifted out of the old SACK, the number of nacks retained in the portion still active and the number of new acks and nacks in the new table then calculate what we need. Note this ends up a bit of an estimate as the Rx protocol allows acks to be withdrawn by the receiver and packets requested to be retransmitted. Fixes: d57a3a151660 ("rxrpc: Save last ACK's SACK table rather than marking txbufs") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 4c1ef7b3705c..87b8de9b6c1c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -128,6 +128,7 @@ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ + EM(rxrpc_skb_get_last_nack, "GET last-nack") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ @@ -141,6 +142,7 @@ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ + EM(rxrpc_skb_put_last_nack, "PUT last-nack") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ @@ -1552,7 +1554,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), @@ -1560,9 +1562,9 @@ TRACE_EVENT(rxrpc_congest, __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), __entry->sum.cwnd, __entry->sum.ssthresh, - __entry->sum.nr_acks, __entry->sum.saw_nacks, + __entry->sum.nr_acks, __entry->sum.nr_retained_nacks, __entry->sum.nr_new_acks, - __entry->sum.nr_rot_new_acks, + __entry->sum.nr_new_nacks, __entry->top - __entry->hard_ack, __entry->sum.cumulative_acks, __entry->sum.dup_acks, -- cgit v1.2.3 From 34a1066981a967eab619938e7b35a9be6b4c34e1 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Sun, 4 Feb 2024 21:01:17 +0100 Subject: ASoC: tas2781: add module parameter to tascodec_init() The tascodec_init() of the snd-soc-tas2781-comlib module is called from snd-soc-tas2781-i2c and snd-hda-scodec-tas2781-i2c modules. It calls request_firmware_nowait() with parameter THIS_MODULE and a cont/callback from the latter modules. The latter modules can be removed while their callbacks are running, resulting in a general protection failure. Add module parameter to tascodec_init() so request_firmware_nowait() can be called with the module of the callback. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/118dad922cef50525e5aab09badef2fa0eb796e5.1707076603.git.soyer@irl.hu Signed-off-by: Mark Brown --- include/sound/tas2781.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index b00d65417c31..9aff384941de 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -142,6 +142,7 @@ struct tasdevice_priv { void tas2781_reset(struct tasdevice_priv *tas_dev); int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, + struct module *module, void (*cont)(const struct firmware *fw, void *context)); struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c); int tasdevice_init(struct tasdevice_priv *tas_priv); -- cgit v1.2.3 From dad6a09f3148257ac1773cd90934d721d68ab595 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Jan 2024 15:56:36 -0800 Subject: hrtimer: Report offline hrtimer enqueue The hrtimers migration on CPU-down hotplug process has been moved earlier, before the CPU actually goes to die. This leaves a small window of opportunity to queue an hrtimer in a blind spot, leaving it ignored. For example a practical case has been reported with RCU waking up a SCHED_FIFO task right before the CPUHP_AP_IDLE_DEAD stage, queuing that way a sched/rt timer to the local offline CPU. Make sure such situations never go unnoticed and warn when that happens. Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Reported-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240129235646.3171983-4-boqun.feng@gmail.com --- include/linux/hrtimer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 87e3bedf8eb0..641c4567cfa7 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -157,6 +157,7 @@ enum hrtimer_base_type { * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired + * @online: CPU is online from an hrtimers point of view * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote @@ -179,7 +180,8 @@ struct hrtimer_cpu_base { unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, - softirq_activated : 1; + softirq_activated : 1, + online : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; -- cgit v1.2.3 From 5f8408aca66772d3aa9b4831577b2ac5ec41bcd9 Mon Sep 17 00:00:00 2001 From: Grzegorz Trzebiatowski Date: Fri, 26 Jan 2024 13:28:04 +0100 Subject: accel/ivpu: Add job status for jobs aborted by the driver Add DRM_IVPU_JOB_STATUS_ABORTED to indicate that the job was aborted by the driver due to e.g. TDR or user context MMU faults. This will help UMD and tests distinguish if job was aborted by the FW or the driver. Signed-off-by: Grzegorz Trzebiatowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-8-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 63c49318a863..19a13468eca5 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -305,6 +305,7 @@ struct drm_ivpu_submit { /* drm_ivpu_bo_wait job status codes */ #define DRM_IVPU_JOB_STATUS_SUCCESS 0 +#define DRM_IVPU_JOB_STATUS_ABORTED 256 /** * struct drm_ivpu_bo_wait - Wait for BO to become inactive -- cgit v1.2.3 From 3ee07964d407411fd578a3bc998de44fd64d266a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 1 Feb 2024 11:55:56 +0100 Subject: serial: core: introduce uart_port_tx_flags() And an enum with a flag: UART_TX_NOSTOP. To NOT call __port->ops->stop_tx() when the circular buffer is empty. mxs-uart needs this (see the next patch). Signed-off-by: "Jiri Slaby (SUSE)" Cc: stable Tested-by: Emil Kronborg Link: https://lore.kernel.org/r/20240201105557.28043-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 536b2581d3e2..55b1f3ba48ac 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -748,8 +748,17 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); -#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ - for_post) \ +/** + * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() + * + * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer + */ +enum UART_TX_FLAGS { + UART_TX_NOSTOP = BIT(0), +}; + +#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ + for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct circ_buf *xmit = &__port->state->xmit; \ @@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port); if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ @@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port); */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ - __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) @@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port); * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ - __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) + +/** + * uart_port_tx_flags -- transmit helper for uart_port with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ -- cgit v1.2.3 From 853b8d7597eea4ccaaefbcf0942cd42fc86d542a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 2 Feb 2024 12:22:58 +0200 Subject: remap_range: merge do_clone_file_range() into vfs_clone_file_range() commit dfad37051ade ("remap_range: move permission hooks out of do_clone_file_range()") moved the permission hooks from do_clone_file_range() out to its caller vfs_clone_file_range(), but left all the fast sanity checks in do_clone_file_range(). This makes the expensive security hooks be called in situations that they would not have been called before (e.g. fs does not support clone). The only reason for the do_clone_file_range() helper was that overlayfs did not use to be able to call vfs_clone_file_range() from copy up context with sb_writers lock held. However, since commit c63e56a4a652 ("ovl: do not open/llseek lower file with upper sb_writers held"), overlayfs just uses an open coded version of vfs_clone_file_range(). Merge_clone_file_range() into vfs_clone_file_range(), restoring the original order of checks as it was before the regressing commit and adapt the overlayfs code to call vfs_clone_file_range() before the permission hooks that were added by commit ca7ab482401c ("ovl: add permission hooks outside of do_splice_direct()"). Note that in the merge of do_clone_file_range(), the file_start_write() context was reduced to cover ->remap_file_range() without holding it over the permission hooks, which was the reason for doing the regressing commit in the first place. Reported-and-tested-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401312229.eddeb9a6-oliver.sang@intel.com Fixes: dfad37051ade ("remap_range: move permission hooks out of do_clone_file_range()") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20240202102258.1582671-1-amir73il@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..023f37c60709 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2101,9 +2101,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); -extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); -- cgit v1.2.3 From f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 23 Jan 2024 18:58:26 +0100 Subject: blk-wbt: Fix detection of dirty-throttled tasks The detection of dirty-throttled tasks in blk-wbt has been subtly broken since its beginning in 2016. Namely if we are doing cgroup writeback and the throttled task is not in the root cgroup, balance_dirty_pages() will set dirty_sleep for the non-root bdi_writeback structure. However blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback structure. Thus detection of recently throttled tasks is not working in this case (we noticed this when we switched to cgroup v2 and suddently writeback was slow). Since blk-wbt has no easy way to get to proper bdi_writeback and furthermore its intention has always been to work on the whole device rather than on individual cgroups, just move the dirty_sleep timestamp from bdi_writeback to backing_dev_info. That fixes the checking for recently throttled task and saves memory for everybody as a bonus. CC: stable@vger.kernel.org Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz [axboe: fixup indentation errors] Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ae12696ec492..2ad261082bba 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -141,8 +141,6 @@ struct bdi_writeback { struct delayed_work dwork; /* work item used for writeback */ struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ - unsigned long dirty_sleep; /* last wait */ - struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK @@ -179,6 +177,11 @@ struct backing_dev_info { * any dirty wbs, which is depended upon by bdi_has_dirty(). */ atomic_long_t tot_write_bandwidth; + /* + * Jiffies when last process was dirty throttled on this bdi. Used by + * blk-wbt. + */ + unsigned long last_bdp_sleep; struct bdi_writeback wb; /* the root writeback info for this bdi */ struct list_head wb_list; /* list of all wbs */ -- cgit v1.2.3 From cd7d469c25704d414d71bf3644f163fb74e7996b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 13 Oct 2023 13:55:44 +0800 Subject: libceph: fail sparse-read if the data length doesn't match Once this happens that means there have bugs. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fa018d5864e7..f66f6aac74f6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -45,6 +45,7 @@ enum ceph_sparse_read_state { CEPH_SPARSE_READ_HDR = 0, CEPH_SPARSE_READ_EXTENTS, CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA_PRE, CEPH_SPARSE_READ_DATA, }; @@ -64,7 +65,7 @@ struct ceph_sparse_read { u64 sr_req_len; /* orig request length */ u64 sr_pos; /* current pos in buffer */ int sr_index; /* current extent index */ - __le32 sr_datalen; /* length of actual data */ + u32 sr_datalen; /* length of actual data */ u32 sr_count; /* extent count in reply */ int sr_ext_len; /* length of extent array */ struct ceph_sparse_extent *sr_extent; /* extent array */ -- cgit v1.2.3 From 8e46a2d068c92a905d01cbb018b00d66991585ab Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Dec 2023 16:01:03 +0800 Subject: libceph: just wait for more data to be available on the socket A short read may occur while reading the message footer from the socket. Later, when the socket is ready for another read, the messenger invokes all read_partial_*() handlers, including read_partial_sparse_msg_data(). The expectation is that read_partial_sparse_msg_data() would bail, allowing the messenger to invoke read_partial() for the footer and pick up where it left off. However read_partial_sparse_msg_data() violates that and ends up calling into the state machine in the OSD client. The sparse-read state machine assumes that it's a new op and interprets some piece of the footer as the sparse-read header and returns bogus extents/data length, etc. To determine whether read_partial_sparse_msg_data() should bail, let's reuse cursor->total_resid. Because once it reaches to zero that means all the extents and data have been successfully received in last read, else it could break out when partially reading any of the extents and data. And then osd_sparse_read() could continue where it left off. [ idryomov: changelog ] Link: https://tracker.ceph.com/issues/63586 Fixes: d396f89db39a ("libceph: add sparse read support to msgr1") Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2eaaabbe98cb..1717cc57cdac 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -283,7 +283,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; - bool sparse_read; + u64 sparse_read_total; int front_alloc_len; struct ceph_msgpool *pool; -- cgit v1.2.3 From 292781c3c5485ce33bd22b2ef1b2bed709b4d672 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 1 Feb 2024 23:33:29 +0100 Subject: netfilter: nft_compat: reject unused compat flag Flag (1 << 0) is ignored is set, never used, reject it it with EINVAL instead. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ca30232b7bc8..117c6a9b845b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -285,9 +285,11 @@ enum nft_rule_attributes { /** * enum nft_rule_compat_flags - nf_tables rule compat flags * + * @NFT_RULE_COMPAT_F_UNUSED: unused * @NFT_RULE_COMPAT_F_INV: invert the check result */ enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_UNUSED = (1 << 0), NFT_RULE_COMPAT_F_INV = (1 << 1), NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, }; -- cgit v1.2.3 From 7395dfacfff65e9938ac0889dafa1ab01e987d15 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 6 Feb 2024 00:11:40 +0100 Subject: netfilter: nf_tables: use timestamp to check for set element timeout Add a timestamp field at the beginning of the transaction, store it in the nftables per-netns area. Update set backend .insert, .deactivate and sync gc path to use the timestamp, this avoids that an element expires while control plane transaction is still unfinished. .lookup and .update, which are used from packet path, still use the current time to check if the element has expired. And .get path and dump also since this runs lockless under rcu read size lock. Then, there is async gc which also needs to check the current time since it runs asynchronously from a workqueue. Fixes: c3e1b005ed1c ("netfilter: nf_tables: add set element timeout support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 001226c34621..510244cc0f8f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -808,10 +808,16 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS); } -static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, + u64 tstamp) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); + time_after_eq64(tstamp, *nft_set_ext_expiration(ext)); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return __nft_set_elem_expired(ext, get_jiffies_64()); } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, @@ -1779,6 +1785,7 @@ struct nftables_pernet { struct list_head notify_list; struct mutex commit_mutex; u64 table_handle; + u64 tstamp; unsigned int base_seq; unsigned int gc_seq; u8 validate_state; @@ -1791,6 +1798,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net) return net_generic(net, nf_tables_net_id); } +static inline u64 nft_net_tstamp(const struct net *net) +{ + return nft_pernet(net)->tstamp; +} + #define __NFT_REDUCE_READONLY 1UL #define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY -- cgit v1.2.3 From 7d708c145b2631941b8b0b4a740dc2990818c39c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 9 Feb 2024 01:24:54 +0000 Subject: Revert "usb: dwc3: Support EBC feature of DWC_usb31" This reverts commit 398aa9a7e77cf23c2a6f882ddd3dcd96f21771dc. The update to the gadget API to support EBC feature is incomplete. It's missing at least the following: * New usage documentation * Gadget capability check * Condition for the user to check how many and which endpoints can be used as "fifo_mode" * Description of how it can affect completed request (e.g. dwc3 won't update TRB on completion -- ie. how it can affect request's actual length report) Let's revert this until it's ready. Fixes: 398aa9a7e77c ("usb: dwc3: Support EBC feature of DWC_usb31") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3042f847ff904b4dd4e4cf66a1b9df470e63439e.1707441690.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index a771ccc038ac..6532beb587b1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -236,7 +236,6 @@ struct usb_ep { unsigned max_streams:16; unsigned mult:2; unsigned maxburst:5; - unsigned fifo_mode:1; u8 address; const struct usb_endpoint_descriptor *desc; const struct usb_ss_ep_comp_descriptor *comp_desc; -- cgit v1.2.3 From 4356e9f841f7fbb945521cef3577ba394c65f3fc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Feb 2024 12:39:31 -0800 Subject: work around gcc bugs with 'asm goto' with outputs We've had issues with gcc and 'asm goto' before, and we created a 'asm_volatile_goto()' macro for that in the past: see commits 3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation bug") and a9f180345f53 ("compiler/gcc4: Make quirk for asm_volatile_goto() unconditional"). Then, much later, we ended up removing the workaround in commit 43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR 58670") because we no longer supported building the kernel with the affected gcc versions, but we left the macro uses around. Now, Sean Christopherson reports a new version of a very similar problem, which is fixed by re-applying that ancient workaround. But the problem in question is limited to only the 'asm goto with outputs' cases, so instead of re-introducing the old workaround as-is, let's rename and limit the workaround to just that much less common case. It looks like there are at least two separate issues that all hit in this area: (a) some versions of gcc don't mark the asm goto as 'volatile' when it has outputs: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 which is easy to work around by just adding the 'volatile' by hand. (b) Internal compiler errors: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 which are worked around by adding the extra empty 'asm' as a barrier, as in the original workaround. but the problem Sean sees may be a third thing since it involves bad code generation (not an ICE) even with the manually added 'volatile'. but the same old workaround works for this case, even if this feels a bit like voodoo programming and may only be hiding the issue. Reported-and-tested-by: Sean Christopherson Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Cc: Nick Desaulniers Cc: Uros Bizjak Cc: Jakub Jelinek Cc: Andrew Pinski Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 19 +++++++++++++++++++ include/linux/compiler_types.h | 4 ++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index aebb65bf95a7..c1a963be7d28 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -64,6 +64,25 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * + * Work it around via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6f1ca49306d2..663d8791c871 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -362,8 +362,8 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE -- cgit v1.2.3 From 7e4a205fe56b9092f0143dad6aa5fee081139b09 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Feb 2024 23:53:05 -0500 Subject: Revert "get rid of DCACHE_GENOCIDE" This reverts commit 57851607326a2beef21e67f83f4f53a90df8445a. Unfortunately, while we only call that thing once, the callback *can* be called more than once for the same dentry - all it takes is rename_lock being touched while we are in d_walk(). For now let's revert it. Signed-off-by: Al Viro --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1666c387861f..d07cf2f1bb7d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,6 +173,7 @@ struct dentry_operations { #define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ #define DCACHE_CANT_MOUNT BIT(8) +#define DCACHE_GENOCIDE BIT(9) #define DCACHE_SHRINK_LIST BIT(10) #define DCACHE_OP_WEAK_REVALIDATE BIT(11) -- cgit v1.2.3 From aec7961916f3f9e88766e2688992da6980f11b8d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:19 -0800 Subject: tls: fix race between async notify and socket close The submitting thread (one which called recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete() so any code past that point risks touching already freed data. Try to avoid the locking and extra flags altogether. Have the main thread hold an extra reference, this way we can depend solely on the atomic ref counter for synchronization. Don't futz with reiniting the completion, either, we are now tightly controlling when completion fires. Reported-by: valis Fixes: 0cada33241d9 ("net/tls: fix race condition causing kernel panic") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 962f0c501111..340ad43971e4 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -97,9 +97,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -136,8 +133,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; -- cgit v1.2.3 From 119ff04864a24470b1e531bb53e5c141aa8fefb0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:21 +0000 Subject: tcp: move tp->scaling_ratio to tcp_sock_read_txrx group tp->scaling_ratio is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 89b290d8c8dc..168f5dca6609 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -221,6 +221,7 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ @@ -352,7 +353,6 @@ struct tcp_sock { u32 compressed_ack_rcv_nxt; struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ -- cgit v1.2.3 From 666a877deab2bcf8fd11c962d69e687e18168a6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:22 +0000 Subject: tcp: move tp->tcp_usec_ts to tcp_sock_read_txrx group tp->tcp_usec_ts is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 168f5dca6609..a1c47a6d69b0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,6 +224,7 @@ struct tcp_sock { u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, + tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ __cacheline_group_end(tcp_sock_read_txrx); @@ -368,8 +369,7 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - tcp_usec_ts:1, /* TSval values in usec */ - unused:4; + unused:5; u8 thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ -- cgit v1.2.3 From c353c7b7ffb7ae6ed8f3339906fe33c8be6cf344 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:23 +0000 Subject: net-device: move lstats in net_device_read_txrx dev->lstats is notably used from loopback ndo_start_xmit() and other virtual drivers. Per cpu stats updates are dirtying per-cpu data, but the pointer itself is read-only. Fixes: 43a71cd66b9c ("net-device: reorganize net_device fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Simon Horman Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 118c40258d07..ef7bfbb98497 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2141,6 +2141,11 @@ struct net_device { /* TXRX read-mostly hotpath */ __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; @@ -2395,11 +2400,6 @@ struct net_device { enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; -- cgit v1.2.3 From 11ba1728be3edb6928791f4c622f154ebe228ae6 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 2 Feb 2024 12:30:26 +0000 Subject: ptrace: Introduce exception_ip arch hook On architectures with delay slot, architecture level instruction pointer (or program counter) in pt_regs may differ from where exception was triggered. Introduce exception_ip hook to invoke architecture code and determine actual instruction pointer to the exception. Link: https://lore.kernel.org/lkml/00d1b813-c55f-4365-8d81-d70258e10b16@app.fastmail.com/ Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- include/linux/ptrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index eaaef3ffec22..90507d4afcd6 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif +#ifndef exception_ip +#define exception_ip(x) instruction_pointer(x) +#endif + extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); -- cgit v1.2.3 From bf5802238dc181b1f7375d358af1d01cd72d1c11 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2024 09:03:24 -0800 Subject: xen/gntalloc: Replace UAPI 1-element array Without changing the structure size (since it is UAPI), add a proper flexible array member, and reference it in the kernel so that it will not be trip the array-bounds sanitizer[1]. Link: https://github.com/KSPP/linux/issues/113 [1] Cc: Juergen Gross Cc: Stefano Stabellini Cc: Oleksandr Tyshchenko Cc: Gustavo A. R. Silva Cc: xen-devel@lists.xenproject.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240206170320.work.437-kees@kernel.org Signed-off-by: Juergen Gross --- include/uapi/xen/gntalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/xen/gntalloc.h b/include/uapi/xen/gntalloc.h index 48d2790ef928..3109282672f3 100644 --- a/include/uapi/xen/gntalloc.h +++ b/include/uapi/xen/gntalloc.h @@ -31,7 +31,10 @@ struct ioctl_gntalloc_alloc_gref { __u64 index; /* The grant references of the newly created grant, one per page */ /* Variable size, depending on count */ - __u32 gref_ids[1]; + union { + __u32 gref_ids[1]; + __DECLARE_FLEX_ARRAY(__u32, gref_ids_flex); + }; }; #define GNTALLOC_FLAG_WRITABLE 1 -- cgit v1.2.3 From 6ac86372102b477083db99a9af8246fb916271b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:15:59 +0100 Subject: gpiolib: add gpiod_to_gpio_device() stub for !GPIOLIB Add empty stub of gpiod_to_gpio_device() when GPIOLIB is not enabled. Cc: Fixes: 370232d096e3 ("gpiolib: provide gpiod_to_gpio_device()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 9a5c6c76e653..012797e7106d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -819,6 +819,12 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + WARN_ON(1); + return ERR_PTR(-ENODEV); +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit v1.2.3 From ebe0c15b135b1e4092c25b95d89e9a5899467499 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:16:00 +0100 Subject: gpiolib: add gpio_device_get_base() stub for !GPIOLIB Add empty stub of gpio_device_get_base() when GPIOLIB is not enabled. Cc: Fixes: 8c85a102fc4e ("gpiolib: provide gpio_device_get_base()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 012797e7106d..c1df7698edb0 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -825,6 +825,12 @@ static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline int gpio_device_get_base(struct gpio_device *gdev) +{ + WARN_ON(1); + return -ENODEV; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit v1.2.3 From 2df8aa3cad407044f2febdbbdf220c6dae839c79 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:16:01 +0100 Subject: gpiolib: add gpio_device_get_label() stub for !GPIOLIB Add empty stub of gpio_device_get_label() when GPIOLIB is not enabled. Cc: Fixes: d1f7728259ef ("gpiolib: provide gpio_device_get_label()") Suggested-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c1df7698edb0..7f75c9a51874 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -831,6 +831,12 @@ static inline int gpio_device_get_base(struct gpio_device *gdev) return -ENODEV; } +static inline const char *gpio_device_get_label(struct gpio_device *gdev) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit v1.2.3 From 29f6975332479f92233594901c649ff4d71f8cb6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 5 Feb 2024 11:10:25 -0800 Subject: nvme: implement support for relaxed effects NVM Express TP4167 provides a way for controllers to report a relaxed execution constraint. Specifically, it notifies of exclusivity for IO vs. admin commands instead of grouping these together. If set, then we don't need to freeze IO in order to execute that admin command. The freezing distrupts IO processes, so it's nice to avoid that if the controller tells us it's not necessary. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bc605ec4a3fd..3ef4053ea950 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -646,6 +646,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14), NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), -- cgit v1.2.3 From 321da3dc1f3c92a12e3c5da934090d2992a8814c Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: scsi: sd: usb_storage: uas: Access media prior to querying device properties It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable@vger.kernel.org Reported-by: Tasos Sahanidis Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Tested-by: Tasos Sahanidis Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 5ec1e71a09de..01c02cb76ea6 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -208,6 +208,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ -- cgit v1.2.3 From 8a566f94104df87a067458351675129bb4e1ece2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Feb 2024 16:22:55 +0200 Subject: seq_buf: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Link: https://lkml.kernel.org/r/20240215142255.400264-1-andriy.shevchenko@linux.intel.com Cc: "Matthew Wilcox (Oracle)" Cc: Andrew Morton Signed-off-by: Andy Shevchenko Signed-off-by: Steven Rostedt (Google) --- include/linux/seq_buf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index c44f4b47b945..07b26e751060 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -2,7 +2,10 @@ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H -#include +#include +#include +#include +#include /* * Trace sequences are used to allow a function to call several other functions -- cgit v1.2.3 From 6efe4d18796934b8ada66c1c446510e7f2d9b972 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Feb 2024 17:25:06 +0200 Subject: seq_buf: Fix kernel documentation There are plenty of issues with the kernel documentation here: - misspelled word "sequence" - different style of returned value descriptions - missed Return sections - unaligned style of ASCII / NUL-terminated / etc - wrong function references Fix all these. Link: https://lkml.kernel.org/r/20240215152506.598340-1-andriy.shevchenko@linux.intel.com Cc: Andrew Morton Signed-off-by: Andy Shevchenko Reviewed-by: Randy Dunlap Signed-off-by: Steven Rostedt (Google) --- include/linux/seq_buf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 07b26e751060..fe41da005970 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -13,7 +13,7 @@ */ /** - * seq_buf - seq buffer structure + * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer @@ -80,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_str - get %NUL-terminated C string from seq_buf + * seq_buf_str - get NUL-terminated C string from seq_buf * @s: the seq_buf handle * - * This makes sure that the buffer in @s is nul terminated and + * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then @@ -93,7 +93,7 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * After this function is called, s->buffer is safe to use * in string operations. * - * Returns @s->buf after making sure it is terminated. + * Returns: @s->buf after making sure it is terminated. */ static inline const char *seq_buf_str(struct seq_buf *s) { @@ -113,7 +113,7 @@ static inline const char *seq_buf_str(struct seq_buf *s) * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * - * Return the number of bytes available in the buffer, or zero if + * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) @@ -135,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired - * by seq_buf_get. To signal an error condition, or that the data + * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) -- cgit v1.2.3 From 68fb3ca0e408e00db1c3f8fccdfa19e274c033be Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Feb 2024 11:14:33 -0800 Subject: update workarounds for gcc "asm goto" issue In commit 4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs") I did the gcc workaround unconditionally, because the cause of the bad code generation wasn't entirely clear. In the meantime, Jakub Jelinek debugged the issue, and has come up with a fix in gcc [2], which also got backported to the still maintained branches of gcc-11, gcc-12 and gcc-13. Note that while the fix technically wasn't in the original gcc-14 branch, Jakub says: "while it is true that no GCC 14 snapshots until today (or whenever the fix will be committed) have the fix, for GCC trunk it is up to the distros to use the latest snapshot if they use it at all and would allow better testing of the kernel code without the workaround, so that if there are other issues they won't be discovered years later. Most userland code doesn't actually use asm goto with outputs..." so we will consider gcc-14 to be fixed - if somebody is using gcc snapshots of the gcc-14 before the fix, they should upgrade. Note that while the bug goes back to gcc-11, in practice other gcc changes seem to have effectively hidden it since gcc-12.1 as per a bisect by Jakub. So even a gcc-14 snapshot without the fix likely doesn't show actual problems. Also, make the default 'asm_goto_output()' macro mark the asm as volatile by hand, because of an unrelated gcc issue [1] where it doesn't match the documented behavior ("asm goto is always volatile"). Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 [1] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 [2] Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Requested-by: Jakub Jelinek Cc: Uros Bizjak Cc: Nick Desaulniers Cc: Sean Christopherson Cc: Andrew Pinski Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 7 ++++--- include/linux/compiler_types.h | 9 ++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c1a963be7d28..75bd1692d2e3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -67,10 +67,9 @@ /* * GCC 'asm goto' with outputs miscompiles certain code sequences: * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 * - * Work it around via the same compiler barrier quirk that we used + * Work around it via the same compiler barrier quirk that we used * to use for the old 'asm goto' workaround. * * Also, always mark such 'asm goto' statements as volatile: all @@ -80,8 +79,10 @@ * * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND #define asm_goto_output(x...) \ do { asm volatile goto(x); asm (""); } while (0) +#endif #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 663d8791c871..0caf354cb94b 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -362,8 +362,15 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ #ifndef asm_goto_output -#define asm_goto_output(x...) asm goto(x) +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE -- cgit v1.2.3 From b5fc07a5fb56216a49e6c1d0b172d5464d99a89b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Feb 2024 17:14:11 -0500 Subject: scsi: core: Consult supported VPD page list prior to fetching page Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") removed the logic which checks whether a VPD page is present on the supported pages list before asking for the page itself. That was done because SPC helpfully states "The Supported VPD Pages VPD page list may or may not include all the VPD pages that are able to be returned by the device server". Testing had revealed a few devices that supported some of the 0xBn pages but didn't actually list them in page 0. Julian Sikorski bisected a problem with his drive resetting during discovery to the commit above. As it turns out, this particular drive firmware will crash if we attempt to fetch page 0xB9. Various approaches were attempted to work around this. In the end, reinstating the logic that consults VPD page 0 before fetching any other page was the path of least resistance. A firmware update for the devices which originally compelled us to remove the check has since been released. Link: https://lore.kernel.org/r/20240214221411.2888112-1-martin.petersen@oracle.com Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") Cc: stable@vger.kernel.org Cc: Bart Van Assche Reported-by: Julian Sikorski Tested-by: Julian Sikorski Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 01c02cb76ea6..c38f4fe5e64c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; -- cgit v1.2.3 From dc489f86257cab5056e747344f17a164f63bff4b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:03 +0100 Subject: net: bridge: switchdev: Skip MDB replays of deferred events on offload Before this change, generation of the list of MDB events to replay would race against the creation of new group memberships, either from the IGMP/MLD snooping logic or from user configuration. While new memberships are immediately visible to walkers of br->mdb_list, the notification of their existence to switchdev event subscribers is deferred until a later point in time. So if a replay list was generated during a time that overlapped with such a window, it would also contain a replay of the not-yet-delivered event. The driver would thus receive two copies of what the bridge internally considered to be one single event. On destruction of the bridge, only a single membership deletion event was therefore sent. As a consequence of this, drivers which reference count memberships (at least DSA), would be left with orphan groups in their hardware database when the bridge was destroyed. This is only an issue when replaying additions. While deletion events may still be pending on the deferred queue, they will already have been removed from br->mdb_list, so no duplicates can be generated in that scenario. To a user this meant that old group memberships, from a bridge in which a port was previously attached, could be reanimated (in hardware) when the port joined a new bridge, without the new bridge's knowledge. For example, on an mv88e6xxx system, create a snooping bridge and immediately add a port to it: root@infix-06-0b-00:~$ ip link add dev br0 up type bridge mcast_snooping 1 && \ > ip link set dev x3 up master br0 And then destroy the bridge: root@infix-06-0b-00:~$ ip link del dev br0 root@infix-06-0b-00:~$ mvls atu ADDRESS FID STATE Q F 0 1 2 3 4 5 6 7 8 9 a DEV:0 Marvell 88E6393X 33:33:00:00:00:6a 1 static - - 0 . . . . . . . . . . 33:33:ff:87:e4:3f 1 static - - 0 . . . . . . . . . . ff:ff:ff:ff:ff:ff 1 static - - 0 1 2 3 4 5 6 7 8 9 a root@infix-06-0b-00:~$ The two IPv6 groups remain in the hardware database because the port (x3) is notified of the host's membership twice: once via the original event and once via a replay. Since only a single delete notification is sent, the count remains at 1 when the bridge is destroyed. Then add the same port (or another port belonging to the same hardware domain) to a new bridge, this time with snooping disabled: root@infix-06-0b-00:~$ ip link add dev br1 up type bridge mcast_snooping 0 && \ > ip link set dev x3 up master br1 All multicast, including the two IPv6 groups from br0, should now be flooded, according to the policy of br1. But instead the old memberships are still active in the hardware database, causing the switch to only forward traffic to those groups towards the CPU (port 0). Eliminate the race in two steps: 1. Grab the write-side lock of the MDB while generating the replay list. This prevents new memberships from showing up while we are generating the replay list. But it leaves the scenario in which a deferred event was already generated, but not delivered, before we grabbed the lock. Therefore: 2. Make sure that no deferred version of a replay event is already enqueued to the switchdev deferred queue, before adding it to the replay list, when replaying additions. Fixes: 4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined mdb entries") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/switchdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index a43062d4c734..8346b0d29542 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -308,6 +308,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); -- cgit v1.2.3 From 9b99c17f7510bed2adbe17751fb8abddba5620bc Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:50 -0800 Subject: x86/numa: Fix the address overlap check in numa_fill_memblks() numa_fill_memblks() fills in the gaps in numa_meminfo memblks over a physical address range. To do so, it first creates a list of existing memblks that overlap that address range. The issue is that it is off by one when comparing to the end of the address range, so memblks that do not overlap are selected. The impact of selecting a memblk that does not actually overlap is that an existing memblk may be filled when the expected action is to do nothing and return NUMA_NO_MEMBLK to the caller. The caller can then add a new NUMA node and memblk. Replace the broken open-coded search for address overlap with the memblock helper memblock_addrs_overlap(). Update the kernel doc and in code comments. Suggested by: "Huang, Ying" Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Mike Rapoport (IBM) Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/10a3e6109c34c21a8dd4c513cf63df63481a2b07.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- include/linux/memblock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b695f9e946da..e2082240586d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); bool memblock_validate_numa_coverage(unsigned long threshold_bytes); -- cgit v1.2.3 From b8adb69a7d29c2d33eb327bca66476fb6066516b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:30 +0100 Subject: mptcp: fix lockless access in subflow ULP diag Since the introduction of the subflow ULP diag interface, the dump callback accessed all the subflow data with lockless. We need either to annotate all the read and write operation accordingly, or acquire the subflow socket lock. Let's do latter, even if slower, to avoid a diffstat havoc. Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index dd78a1181031..f6eba9652d01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, -- cgit v1.2.3 From 13ddaf26be324a7f951891ecd9ccd04466d27458 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 7 Feb 2024 02:25:59 +0800 Subject: mm/swap: fix race when skipping swapcache When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads swapin the same entry at the same time, they get different pages (A, B). Before one thread (T0) finishes the swapin and installs page (A) to the PTE, another thread (T1) could finish swapin of page (B), swap_free the entry, then swap out the possibly modified page reusing the same entry. It breaks the pte_same check in (T0) because PTE value is unchanged, causing ABA problem. Thread (T0) will install a stalled page (A) into the PTE and cause data corruption. One possible callstack is like this: CPU0 CPU1 ---- ---- do_swap_page() do_swap_page() with same entry swap_read_folio() <- read to page A swap_read_folio() <- read to page B ... set_pte_at() swap_free() <- entry is free pte_same() <- Check pass, PTE seems unchanged, but page A is stalled! swap_free() <- page B content lost! set_pte_at() <- staled page A installed! And besides, for ZRAM, swap_free() allows the swap device to discard the entry content, so even if page (B) is not modified, if swap_read_folio() on CPU0 happens later than swap_free() on CPU1, it may also cause data loss. To fix this, reuse swapcache_prepare which will pin the swap entry using the cache flag, and allow only one thread to swap it in, also prevent any parallel code from putting the entry in the cache. Release the pin after PT unlocked. Racers just loop and wait since it's a rare and very short event. A schedule_timeout_uninterruptible(1) call is added to avoid repeated page faults wasting too much CPU, causing livelock or adding too much noise to perf statistics. A similar livelock issue was described in commit 029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") Reproducer: This race issue can be triggered easily using a well constructed reproducer and patched brd (with a delay in read path) [1]: With latest 6.8 mainline, race caused data loss can be observed easily: $ gcc -g -lpthread test-thread-swap-race.c && ./a.out Polulating 32MB of memory region... Keep swapping out... Starting round 0... Spawning 65536 workers... 32746 workers spawned, wait for done... Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! Round 0 Failed, 15 data loss! This reproducer spawns multiple threads sharing the same memory region using a small swap device. Every two threads updates mapped pages one by one in opposite direction trying to create a race, with one dedicated thread keep swapping out the data out using madvise. The reproducer created a reproduce rate of about once every 5 minutes, so the race should be totally possible in production. After this patch, I ran the reproducer for over a few hundred rounds and no data loss observed. Performance overhead is minimal, microbenchmark swapin 10G from 32G zram: Before: 10934698 us After: 11157121 us Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) [kasong@tencent.com: v4] Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Acked-by: Yu Zhao Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Yosry Ahmed Cc: Yu Zhao Cc: Barry Song <21cnbao@gmail.com> Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4db00ddad261..8d28f6091a32 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -549,6 +549,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } -- cgit v1.2.3 From f3e6b3ae9cfc128af11b665c6ef4022ba2683778 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 18 Feb 2024 14:28:57 -0800 Subject: acpi/ghes: Remove CXL CPER notifications Initial tests with the CXL CPER implementation identified that error reports were being duplicated in the log and the trace event [1]. Then it was discovered that the notification handler took sleeping locks while the GHES event handling runs in spin_lock_irqsave() context [2] While the duplicate reporting was fixed in v6.8-rc4, the fix for the sleeping-lock-vs-atomic collision would enjoy more time to settle and gain some test cycles. Given how late it is in the development cycle, remove the CXL hookup for now and try again during the next merge window. Note that end result is that v6.8 does not emit CXL CPER payloads to the kernel log, but this is in line with the CXL trend to move error reporting to trace events instead of the kernel log. Cc: Ard Biesheuvel Cc: Rafael J. Wysocki Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: http://lore.kernel.org/r/20240108165855.00002f5a@Huawei.com [1] Closes: http://lore.kernel.org/r/b963c490-2c13-4b79-bbe7-34c6568423c7@moroto.mountain [2] Signed-off-by: Dan Williams --- include/linux/cxl-event.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 91125eca4c8a..03fa6d50d46f 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -140,22 +140,4 @@ struct cxl_cper_event_rec { union cxl_event event; } __packed; -typedef void (*cxl_cper_callback)(enum cxl_event_type type, - struct cxl_cper_event_rec *rec); - -#ifdef CONFIG_ACPI_APEI_GHES -int cxl_cper_register_callback(cxl_cper_callback callback); -int cxl_cper_unregister_callback(cxl_cper_callback callback); -#else -static inline int cxl_cper_register_callback(cxl_cper_callback callback) -{ - return 0; -} - -static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - return 0; -} -#endif - #endif /* _LINUX_CXL_EVENT_H */ -- cgit v1.2.3 From 77aebae1ea12de6eae5ce70d05b3d4724eec4023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 9 Feb 2024 12:34:44 +0100 Subject: drm/xe/uapi: Remove support for persistent exec_queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persistent exec_queues delays explicit destruction of exec_queues until they are done executing, but destruction on process exit is still immediate. It turns out no UMD is relying on this functionality, so remove it. If there turns out to be a use-case in the future, let's re-add. Persistent exec_queues were never used for LR VMs v2: - Don't add an "UNUSED" define for the missing property (Lucas, Rodrigo) v3: - Remove the remaining struct xe_exec_queue::persistent state (Niranjana, Lucas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Cc: David Airlie Cc: Daniel Vetter Cc: Lucas De Marchi Cc: Francois Dugast Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Acked-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240209113444.8396-1-thomas.hellstrom@linux.intel.com (cherry picked from commit f1a9abc0cf311375695bede1590364864c05976d) Signed-off-by: Thomas Hellström --- include/uapi/drm/xe_drm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 9fa3ae324731..6d11ee9e571a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1046,7 +1046,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -- cgit v1.2.3 From b820de741ae48ccf50dd95e297889c286ff4f760 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Feb 2024 12:47:38 -0800 Subject: fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the following kernel warning appears: WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8 Call trace: kiocb_set_cancel_fn+0x9c/0xa8 ffs_epfile_read_iter+0x144/0x1d0 io_read+0x19c/0x498 io_issue_sqe+0x118/0x27c io_submit_sqes+0x25c/0x5fc __arm64_sys_io_uring_enter+0x104/0xab0 invoke_syscall+0x58/0x11c el0_svc_common+0xb4/0xf4 do_el0_svc+0x2c/0xb0 el0_svc+0x2c/0xa4 el0t_64_sync_handler+0x68/0xb4 el0t_64_sync+0x1a4/0x1a8 Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is submitted by libaio. Suggested-by: Jens Axboe Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..c2dcc98cb4c8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -352,6 +352,8 @@ enum rw_hint { * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ -- cgit v1.2.3 From 9e0f0430389be7696396c62f037be4bf72cf93e3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Feb 2024 12:32:58 +0100 Subject: netfilter: nft_flow_offload: reset dst in route object after setting up flow dst is transferred to the flow object, route object does not own it anymore. Reset dst in route object, otherwise if flow_offload_add() fails, error path releases dst twice, leading to a refcount underflow. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 956c752ceb31..a763dd327c6e 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -276,7 +276,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, -- cgit v1.2.3 From 3f4d8aac6e768c2215ce68275256971c2f54f0c8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Jan 2024 13:50:58 +1000 Subject: nouveau: add an ioctl to return vram bar size. This returns the BAR resources size so userspace can make decisions based on rebar support. userspace using this has been proposed for nvk, but it's a rather trivial uapi addition. Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie --- include/uapi/drm/nouveau_drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 0bade1592f34..10a917639d8d 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -54,6 +54,13 @@ extern "C" { */ #define NOUVEAU_GETPARAM_EXEC_PUSH_MAX 17 +/* + * NOUVEAU_GETPARAM_VRAM_BAR_SIZE - query bar size + * + * Query the VRAM BAR size. + */ +#define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18 + struct drm_nouveau_getparam { __u64 param; __u64 value; -- cgit v1.2.3 From 72fa02fdf83306c52bc1eede28359e3fa32a151a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Jan 2024 14:24:25 +1000 Subject: nouveau: add an ioctl to report vram usage This reports the currently used vram allocations. userspace using this has been proposed for nvk, but it's a rather trivial uapi addition. Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie --- include/uapi/drm/nouveau_drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 10a917639d8d..77d7ff0d5b11 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -61,6 +61,13 @@ extern "C" { */ #define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18 +/* + * NOUVEAU_GETPARAM_VRAM_USED + * + * Get remaining VRAM size. + */ +#define NOUVEAU_GETPARAM_VRAM_USED 19 + struct drm_nouveau_getparam { __u64 param; __u64 value; -- cgit v1.2.3 From 65d4418c5002ec5b0e529455bf4152fd43459079 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Feb 2024 10:07:41 -0400 Subject: iommu/sva: Restore SVA handle sharing Prior to commit 092edaddb660 ("iommu: Support mm PASID 1:n with sva domains") the code allowed a SVA handle to be bound multiple times to the same (mm, device) pair. This was alluded to in the kdoc comment, but we had understood this to be more a remark about allowing multiple devices, not a literal same-driver re-opening the same SVA. It turns out uacce and idxd were both relying on the core code to handle reference counting for same-device same-mm scenarios. As this looks hard to resolve in the drivers bring it back to the core code. The new design has changed the meaning of the domain->users refcount to refer to the number of devices that are sharing that domain for the same mm. This is part of the design to lift the SVA domain de-duplication out of the drivers. Return the old behavior by explicitly de-duplicating the struct iommu_sva handle. The same (mm, device) will return the same handle pointer and the core code will handle tracking this. The last unbind of the handle will destroy it. Fixes: 092edaddb660 ("iommu: Support mm PASID 1:n with sva domains") Reported-by: Zhangfei Gao Closes: https://lore.kernel.org/all/20240221110658.529-1-zhangfei.gao@linaro.org/ Tested-by: Zhangfei Gao Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-9455fc497a6f+3b4-iommu_sva_sharing_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ea2a820e1eb..5e27cb3a3be9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -892,11 +892,14 @@ struct iommu_fwspec { struct iommu_sva { struct device *dev; struct iommu_domain *domain; + struct list_head handle_item; + refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; + struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, -- cgit v1.2.3 From c1b967d03c5d570ed7b90a88031fa2af34bf5b20 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2023 22:11:26 -0400 Subject: nfs: fix UAF on pathwalk running into umount NFS ->d_revalidate(), ->permission() and ->get_link() need to access some parts of nfs_server when called in RCU mode: server->flags server->caps *(server->io_stats) and, worst of all, call server->nfs_client->rpc_ops->have_delegation (the last one - as NFS_PROTO(inode)->have_delegation()). We really don't want to RCU-delay the entire nfs_free_server() (it would have to be done with schedule_work() from RCU callback, since it can't be made to run from interrupt context), but actual freeing of nfs_server and ->io_stats can be done via call_rcu() just fine. nfs_client part is handled simply by making nfs_free_client() use kfree_rcu(). Acked-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/nfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index cd797e00fe35..92de074e63b9 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_client { char cl_ipaddr[48]; struct net *cl_net; struct list_head pending_cb_stateids; + struct rcu_head rcu; }; /* @@ -265,6 +266,7 @@ struct nfs_server { const struct cred *cred; bool has_sec_mnt_opts; struct kobject kobj; + struct rcu_head rcu; }; /* Server capabilities */ -- cgit v1.2.3 From e31f0a57ae1ab2f6e17adb8e602bc120ad722232 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Sep 2023 00:12:00 -0400 Subject: procfs: make freeing proc_fs_info rcu-delayed makes proc_pid_ns() safe from rcu pathwalk (put_pid_ns() is still synchronous, but that's not a problem - it does rcu-delay everything that needs to be) Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/proc_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index de407e7c3b55..0b2a89854440 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -65,6 +65,7 @@ struct proc_fs_info { kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; + struct rcu_head rcu; }; static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) -- cgit v1.2.3